1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Move instructions between statements.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/ForwardOpTree.h"
14 #include "polly/Options.h"
15 #include "polly/ScopBuilder.h"
16 #include "polly/ScopInfo.h"
17 #include "polly/ScopPass.h"
18 #include "polly/Support/GICHelper.h"
19 #include "polly/Support/ISLOStream.h"
20 #include "polly/Support/ISLTools.h"
21 #include "polly/Support/VirtualInstruction.h"
22 #include "polly/ZoneAlgo.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Value.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Compiler.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "isl/ctx.h"
39 #include "isl/isl-noexceptions.h"
40 #include <cassert>
41 #include <memory>
42 
43 #define DEBUG_TYPE "polly-optree"
44 
45 using namespace llvm;
46 using namespace polly;
47 
48 static cl::opt<bool>
49     AnalyzeKnown("polly-optree-analyze-known",
50                  cl::desc("Analyze array contents for load forwarding"),
51                  cl::cat(PollyCategory), cl::init(true), cl::Hidden);
52 
53 static cl::opt<bool>
54     NormalizePHIs("polly-optree-normalize-phi",
55                   cl::desc("Replace PHIs by their incoming values"),
56                   cl::cat(PollyCategory), cl::init(false), cl::Hidden);
57 
58 static cl::opt<unsigned>
59     MaxOps("polly-optree-max-ops",
60            cl::desc("Maximum number of ISL operations to invest for known "
61                     "analysis; 0=no limit"),
62            cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);
63 
64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
65 STATISTIC(KnownOutOfQuota,
66           "Analyses aborted because max_operations was reached");
67 
68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
69 STATISTIC(TotalKnownLoadsForwarded,
70           "Number of forwarded loads because their value was known");
71 STATISTIC(TotalReloads, "Number of reloaded values");
72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
74 STATISTIC(TotalModifiedStmts,
75           "Number of statements with at least one forwarded tree");
76 
77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");
78 
79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
80 STATISTIC(NumValueWritesInLoops,
81           "Number of scalar value writes nested in affine loops after OpTree");
82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
83 STATISTIC(NumPHIWritesInLoops,
84           "Number of scalar phi writes nested in affine loops after OpTree");
85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
86 STATISTIC(NumSingletonWritesInLoops,
87           "Number of singleton writes nested in affine loops after OpTree");
88 
89 namespace {
90 
91 /// The state of whether an operand tree was/can be forwarded.
92 ///
93 /// The items apply to an instructions and its operand tree with the instruction
94 /// as the root element. If the value in question is not an instruction in the
95 /// SCoP, it can be a leaf of an instruction's operand tree.
96 enum ForwardingDecision {
97   /// The root instruction or value cannot be forwarded at all.
98   FD_CannotForward,
99 
100   /// The root instruction or value can be forwarded as a leaf of a larger
101   /// operand tree.
102   /// It does not make sense to move the value itself, it would just replace it
103   /// by a use of itself. For instance, a constant "5" used in a statement can
104   /// be forwarded, but it would just replace it by the same constant "5".
105   /// However, it makes sense to move as an operand of
106   ///
107   ///   %add = add 5, 5
108   ///
109   /// where "5" is moved as part of a larger operand tree. "5" would be placed
110   /// (disregarding for a moment that literal constants don't have a location
111   /// and can be used anywhere) into the same statement as %add would.
112   FD_CanForwardLeaf,
113 
114   /// The root instruction can be forwarded and doing so avoids a scalar
115   /// dependency.
116   ///
117   /// This can be either because the operand tree can be moved to the target
118   /// statement, or a memory access is redirected to read from a different
119   /// location.
120   FD_CanForwardProfitably,
121 
122   /// Used to indicate that a forwarding has be carried out successfully, and
123   /// the forwarded memory access can be deleted.
124   FD_DidForwardTree,
125 
126   /// Used to indicate that a forwarding has be carried out successfully, and
127   /// the forwarded memory access is being reused.
128   FD_DidForwardLeaf,
129 
130   /// A forwarding method cannot be applied to the operand tree.
131   /// The difference to FD_CannotForward is that there might be other methods
132   /// that can handle it.
133   /// The conditions that make an operand tree applicable must be checked even
134   /// with DoIt==true because a method following the one that returned
135   /// FD_NotApplicable might have returned FD_CanForwardTree.
136   FD_NotApplicable
137 };
138 
139 /// Implementation of operand tree forwarding for a specific SCoP.
140 ///
141 /// For a statement that requires a scalar value (through a value read
142 /// MemoryAccess), see if its operand can be moved into the statement. If so,
143 /// the MemoryAccess is removed and the all the operand tree instructions are
144 /// moved into the statement. All original instructions are left in the source
145 /// statements. The simplification pass can clean these up.
146 class ForwardOpTreeImpl : ZoneAlgorithm {
147 private:
148   /// Scope guard to limit the number of isl operations for this pass.
149   IslMaxOperationsGuard &MaxOpGuard;
150 
151   /// How many instructions have been copied to other statements.
152   int NumInstructionsCopied = 0;
153 
154   /// Number of loads forwarded because their value was known.
155   int NumKnownLoadsForwarded = 0;
156 
157   /// Number of values reloaded from known array elements.
158   int NumReloads = 0;
159 
160   /// How many read-only accesses have been copied.
161   int NumReadOnlyCopied = 0;
162 
163   /// How many operand trees have been forwarded.
164   int NumForwardedTrees = 0;
165 
166   /// Number of statements with at least one forwarded operand tree.
167   int NumModifiedStmts = 0;
168 
169   /// Whether we carried out at least one change to the SCoP.
170   bool Modified = false;
171 
172   /// Contains the zones where array elements are known to contain a specific
173   /// value.
174   /// { [Element[] -> Zone[]] -> ValInst[] }
175   /// @see computeKnown()
176   isl::union_map Known;
177 
178   /// Translator for newly introduced ValInsts to already existing ValInsts such
179   /// that new introduced load instructions can reuse the Known analysis of its
180   /// original load. { ValInst[] -> ValInst[] }
181   isl::union_map Translator;
182 
183   /// Get list of array elements that do contain the same ValInst[] at Domain[].
184   ///
185   /// @param ValInst { Domain[] -> ValInst[] }
186   ///                The values for which we search for alternative locations,
187   ///                per statement instance.
188   ///
189   /// @return { Domain[] -> Element[] }
190   ///         For each statement instance, the array elements that contain the
191   ///         same ValInst.
192   isl::union_map findSameContentElements(isl::union_map ValInst) {
193     assert(!ValInst.is_single_valued().is_false());
194 
195     // { Domain[] }
196     isl::union_set Domain = ValInst.domain();
197 
198     // { Domain[] -> Scatter[] }
199     isl::union_map Schedule = getScatterFor(Domain);
200 
201     // { Element[] -> [Scatter[] -> ValInst[]] }
202     isl::union_map MustKnownCurried =
203         convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();
204 
205     // { [Domain[] -> ValInst[]] -> Scatter[] }
206     isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);
207 
208     // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
209     isl::union_map SchedValDomVal =
210         DomValSched.range_product(ValInst.range_map()).reverse();
211 
212     // { Element[] -> [Domain[] -> ValInst[]] }
213     isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);
214 
215     // { Domain[] -> Element[] }
216     isl::union_map MustKnownMap =
217         MustKnownInst.uncurry().domain().unwrap().reverse();
218     simplify(MustKnownMap);
219 
220     return MustKnownMap;
221   }
222 
223   /// Find a single array element for each statement instance, within a single
224   /// array.
225   ///
226   /// @param MustKnown { Domain[] -> Element[] }
227   ///                  Set of candidate array elements.
228   /// @param Domain    { Domain[] }
229   ///                  The statement instance for which we need elements for.
230   ///
231   /// @return { Domain[] -> Element[] }
232   ///         For each statement instance, an array element out of @p MustKnown.
233   ///         All array elements must be in the same array (Polly does not yet
234   ///         support reading from different accesses using the same
235   ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
236   ///         null.
237   isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
238     // { Domain[] -> Element[] }
239     isl::map Result;
240 
241     // MemoryAccesses can read only elements from a single array
242     // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
243     // Look through all spaces until we find one that contains at least the
244     // wanted statement instance.s
245     for (isl::map Map : MustKnown.get_map_list()) {
246       // Get the array this is accessing.
247       isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
248       ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());
249 
250       // No support for generation of indirect array accesses.
251       if (SAI->getBasePtrOriginSAI())
252         continue;
253 
254       // Determine whether this map contains all wanted values.
255       isl::set MapDom = Map.domain();
256       if (!Domain.is_subset(MapDom).is_true())
257         continue;
258 
259       // There might be multiple array elements that contain the same value, but
260       // choose only one of them. lexmin is used because it returns a one-value
261       // mapping, we do not care about which one.
262       // TODO: Get the simplest access function.
263       Result = Map.lexmin();
264       break;
265     }
266 
267     return Result;
268   }
269 
270 public:
271   ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
272       : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}
273 
274   /// Compute the zones of known array element contents.
275   ///
276   /// @return True if the computed #Known is usable.
277   bool computeKnownValues() {
278     isl::union_map MustKnown, KnownFromLoad, KnownFromInit;
279 
280     // Check that nothing strange occurs.
281     collectCompatibleElts();
282 
283     {
284       IslQuotaScope QuotaScope = MaxOpGuard.enter();
285 
286       computeCommon();
287       if (NormalizePHIs)
288         computeNormalizedPHIs();
289       Known = computeKnown(true, true);
290 
291       // Preexisting ValInsts use the known content analysis of themselves.
292       Translator = makeIdentityMap(Known.range(), false);
293     }
294 
295     if (!Known || !Translator || !NormalizeMap) {
296       assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
297       Known = nullptr;
298       Translator = nullptr;
299       NormalizeMap = nullptr;
300       LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
301       return false;
302     }
303 
304     KnownAnalyzed++;
305     LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");
306 
307     return true;
308   }
309 
310   void printStatistics(raw_ostream &OS, int Indent = 0) {
311     OS.indent(Indent) << "Statistics {\n";
312     OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
313                           << '\n';
314     OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
315                           << '\n';
316     OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
317     OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
318                           << '\n';
319     OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
320                           << '\n';
321     OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
322                           << NumModifiedStmts << '\n';
323     OS.indent(Indent) << "}\n";
324   }
325 
326   void printStatements(raw_ostream &OS, int Indent = 0) const {
327     OS.indent(Indent) << "After statements {\n";
328     for (auto &Stmt : *S) {
329       OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
330       for (auto *MA : Stmt)
331         MA->print(OS);
332 
333       OS.indent(Indent + 12);
334       Stmt.printInstructions(OS);
335     }
336     OS.indent(Indent) << "}\n";
337   }
338 
339   /// Create a new MemoryAccess of type read and MemoryKind::Array.
340   ///
341   /// @param Stmt           The statement in which the access occurs.
342   /// @param LI             The instruction that does the access.
343   /// @param AccessRelation The array element that each statement instance
344   ///                       accesses.
345   ///
346   /// @param The newly created access.
347   MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
348                                     isl::map AccessRelation) {
349     isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
350     ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());
351 
352     // Create a dummy SCEV access, to be replaced anyway.
353     SmallVector<const SCEV *, 4> Sizes;
354     Sizes.reserve(SAI->getNumberOfDimensions());
355     SmallVector<const SCEV *, 4> Subscripts;
356     Subscripts.reserve(SAI->getNumberOfDimensions());
357     for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
358       Sizes.push_back(SAI->getDimensionSize(i));
359       Subscripts.push_back(nullptr);
360     }
361 
362     MemoryAccess *Access =
363         new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
364                          LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
365     S->addAccessFunction(Access);
366     Stmt->addAccess(Access, true);
367 
368     Access->setNewAccessRelation(AccessRelation);
369 
370     return Access;
371   }
372 
373   /// Forward a load by reading from an array element that contains the same
374   /// value. Typically the location it was loaded from.
375   ///
376   /// @param TargetStmt  The statement the operand tree will be copied to.
377   /// @param Inst        The (possibly speculatable) instruction to forward.
378   /// @param UseStmt     The statement that uses @p Inst.
379   /// @param UseLoop     The loop @p Inst is used in.
380   /// @param DefStmt     The statement @p Inst is defined in.
381   /// @param DefLoop     The loop which contains @p Inst.
382   /// @param DoIt        If false, only determine whether an operand tree can be
383   ///                    forwarded. If true, carry out the forwarding. Do not
384   ///                    use DoIt==true if an operand tree is not known to be
385   ///                    forwardable.
386   ///
387   /// @return FD_NotApplicable  if @p Inst cannot be forwarded by creating a new
388   ///                           load.
389   ///         FD_CannotForward  if the pointer operand cannot be forwarded.
390   ///         FD_CanForwardProfitably if @p Inst is forwardable.
391   ///         FD_DidForwardTree if @p DoIt was true.
392   ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
393                                       ScopStmt *UseStmt, Loop *UseLoop,
394                                       ScopStmt *DefStmt, Loop *DefLoop,
395                                       bool DoIt) {
396     // Cannot do anything without successful known analysis.
397     if (Known.is_null() || Translator.is_null() ||
398         MaxOpGuard.hasQuotaExceeded())
399       return FD_NotApplicable;
400 
401     LoadInst *LI = dyn_cast<LoadInst>(Inst);
402     if (!LI)
403       return FD_NotApplicable;
404 
405     // If the load is already in the statement, no forwarding is necessary.
406     // However, it might happen that the LoadInst is already present in the
407     // statement's instruction list. In that case we do as follows:
408     // - For the evaluation (DoIt==false), we can trivially forward it as it is
409     //   benefit of forwarding an already present instruction.
410     // - For the execution (DoIt==true), prepend the instruction (to make it
411     //   available to all instructions following in the instruction list), but
412     //   do not add another MemoryAccess.
413     MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
414     if (Access && !DoIt)
415       return FD_CanForwardProfitably;
416 
417     ForwardingDecision OpDecision = forwardTree(
418         TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt);
419     switch (OpDecision) {
420     case FD_CannotForward:
421       assert(!DoIt);
422       return OpDecision;
423 
424     case FD_CanForwardLeaf:
425     case FD_CanForwardProfitably:
426       assert(!DoIt);
427       break;
428 
429     case FD_DidForwardLeaf:
430     case FD_DidForwardTree:
431       assert(DoIt);
432       break;
433 
434     default:
435       llvm_unreachable("Shouldn't return this");
436     }
437 
438     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
439 
440     // { DomainDef[] -> ValInst[] }
441     isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
442     assert(!isNormalized(ExpectedVal).is_false() &&
443            "LoadInsts are always normalized");
444 
445     // { DomainUse[] -> DomainTarget[] }
446     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
447 
448     // { DomainTarget[] -> ValInst[] }
449     isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
450     isl::union_map TranslatedExpectedVal =
451         isl::union_map(TargetExpectedVal).apply_range(Translator);
452 
453     // { DomainTarget[] -> Element[] }
454     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
455 
456     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
457     if (!SameVal)
458       return FD_NotApplicable;
459 
460     if (DoIt)
461       TargetStmt->prependInstruction(LI);
462 
463     if (!DoIt)
464       return FD_CanForwardProfitably;
465 
466     if (Access) {
467       LLVM_DEBUG(
468           dbgs() << "    forwarded known load with preexisting MemoryAccess"
469                  << Access << "\n");
470     } else {
471       Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
472       LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
473                         << Access << "\n");
474 
475       // { ValInst[] }
476       isl::space ValInstSpace = ExpectedVal.get_space().range();
477 
478       // After adding a new load to the SCoP, also update the Known content
479       // about it. The new load will have a known ValInst of
480       // { [DomainTarget[] -> Value[]] }
481       // but which -- because it is a copy of it -- has same value as the
482       // { [DomainDef[] -> Value[]] }
483       // that it replicates. Instead of  cloning the known content of
484       // [DomainDef[] -> Value[]]
485       // for DomainTarget[], we add a 'translator' that maps
486       // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
487       // before comparing to the known content.
488       // TODO: 'Translator' could also be used to map PHINodes to their incoming
489       // ValInsts.
490       if (ValInstSpace.is_wrapping()) {
491         // { DefDomain[] -> Value[] }
492         isl::map ValInsts = ExpectedVal.range().unwrap();
493 
494         // { DefDomain[] }
495         isl::set DefDomain = ValInsts.domain();
496 
497         // { Value[] }
498         isl::space ValSpace = ValInstSpace.unwrap().range();
499 
500         // { Value[] -> Value[] }
501         isl::map ValToVal =
502             isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));
503 
504         // { DomainDef[] -> DomainTarget[] }
505         isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);
506 
507         // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
508         isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal);
509 
510         Translator = Translator.add_map(LocalTranslator);
511         LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
512                           << "\n");
513       }
514     }
515     LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
516                       << "\n");
517     LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
518                       << "\n");
519     assert(Access);
520 
521     NumKnownLoadsForwarded++;
522     TotalKnownLoadsForwarded++;
523     return FD_DidForwardTree;
524   }
525 
526   /// Forward a scalar by redirecting the access to an array element that stores
527   /// the same value.
528   ///
529   /// @param TargetStmt  The statement the operand tree will be copied to.
530   /// @param Inst        The scalar to forward.
531   /// @param UseStmt     The statement that uses @p Inst.
532   /// @param UseLoop     The loop @p Inst is used in.
533   /// @param DefStmt     The statement @p Inst is defined in.
534   /// @param DefLoop     The loop which contains @p Inst.
535   /// @param DoIt        If false, only determine whether an operand tree can be
536   ///                    forwarded. If true, carry out the forwarding. Do not
537   ///                    use DoIt==true if an operand tree is not known to be
538   ///                    forwardable.
539   ///
540   /// @return FD_NotApplicable        if @p Inst cannot be reloaded.
541   ///         FD_CanForwardLeaf       if @p Inst can be reloaded.
542   ///         FD_CanForwardProfitably if @p Inst has been reloaded.
543   ///         FD_DidForwardLeaf       if @p DoIt was true.
544   ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
545                                         ScopStmt *UseStmt, Loop *UseLoop,
546                                         ScopStmt *DefStmt, Loop *DefLoop,
547                                         bool DoIt) {
548     // Cannot do anything without successful known analysis.
549     if (Known.is_null() || Translator.is_null() ||
550         MaxOpGuard.hasQuotaExceeded())
551       return FD_NotApplicable;
552 
553     MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
554     if (Access && Access->isLatestArrayKind()) {
555       if (DoIt)
556         return FD_DidForwardLeaf;
557       return FD_CanForwardLeaf;
558     }
559 
560     // Don't spend too much time analyzing whether it can be reloaded. When
561     // carrying-out the forwarding, we cannot bail-out in the middle of the
562     // transformation. It also shouldn't take as long because some results are
563     // cached.
564     IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt);
565 
566     // { DomainDef[] -> ValInst[] }
567     isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
568 
569     // { DomainUse[] -> DomainTarget[] }
570     isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);
571 
572     // { DomainTarget[] -> ValInst[] }
573     isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
574     isl::union_map TranslatedExpectedVal =
575         TargetExpectedVal.apply_range(Translator);
576 
577     // { DomainTarget[] -> Element[] }
578     isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);
579 
580     isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
581     if (!SameVal)
582       return FD_NotApplicable;
583 
584     if (!DoIt)
585       return FD_CanForwardProfitably;
586 
587     if (!Access)
588       Access = TargetStmt->ensureValueRead(Inst);
589 
590     simplify(SameVal);
591     Access->setNewAccessRelation(SameVal);
592 
593     TotalReloads++;
594     NumReloads++;
595     return FD_DidForwardLeaf;
596   }
597 
598   /// Forwards a speculatively executable instruction.
599   ///
600   /// @param TargetStmt  The statement the operand tree will be copied to.
601   /// @param UseInst     The (possibly speculatable) instruction to forward.
602   /// @param DefStmt     The statement @p UseInst is defined in.
603   /// @param DefLoop     The loop which contains @p UseInst.
604   /// @param DoIt        If false, only determine whether an operand tree can be
605   ///                    forwarded. If true, carry out the forwarding. Do not
606   ///                    use DoIt==true if an operand tree is not known to be
607   ///                    forwardable.
608   ///
609   /// @return FD_NotApplicable  if @p UseInst is not speculatable.
610   ///         FD_CannotForward  if one of @p UseInst's operands is not
611   ///                           forwardable.
612   ///         FD_CanForwardTree if @p UseInst is forwardable.
613   ///         FD_DidForward     if @p DoIt was true.
614   ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt,
615                                          Instruction *UseInst,
616                                          ScopStmt *DefStmt, Loop *DefLoop,
617                                          bool DoIt) {
618     // PHIs, unless synthesizable, are not yet supported.
619     if (isa<PHINode>(UseInst))
620       return FD_NotApplicable;
621 
622     // Compatible instructions must satisfy the following conditions:
623     // 1. Idempotent (instruction will be copied, not moved; although its
624     //    original instance might be removed by simplification)
625     // 2. Not access memory (There might be memory writes between)
626     // 3. Not cause undefined behaviour (we might copy to a location when the
627     //    original instruction was no executed; this is currently not possible
628     //    because we do not forward PHINodes)
629     // 4. Not leak memory if executed multiple times (i.e. malloc)
630     //
631     // Instruction::mayHaveSideEffects is not sufficient because it considers
632     // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
633     // not sufficient because it allows memory accesses.
634     if (mayBeMemoryDependent(*UseInst))
635       return FD_NotApplicable;
636 
637     if (DoIt) {
638       // To ensure the right order, prepend this instruction before its
639       // operands. This ensures that its operands are inserted before the
640       // instruction using them.
641       // TODO: The operand tree is not really a tree, but a DAG. We should be
642       // able to handle DAGs without duplication.
643       TargetStmt->prependInstruction(UseInst);
644       NumInstructionsCopied++;
645       TotalInstructionsCopied++;
646     }
647 
648     for (Value *OpVal : UseInst->operand_values()) {
649       ForwardingDecision OpDecision =
650           forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt);
651       switch (OpDecision) {
652       case FD_CannotForward:
653         assert(!DoIt);
654         return FD_CannotForward;
655 
656       case FD_CanForwardLeaf:
657       case FD_CanForwardProfitably:
658         assert(!DoIt);
659         break;
660 
661       case FD_DidForwardLeaf:
662       case FD_DidForwardTree:
663         assert(DoIt);
664         break;
665 
666       case FD_NotApplicable:
667         llvm_unreachable("forwardTree should never return FD_NotApplicable");
668       }
669     }
670 
671     if (DoIt)
672       return FD_DidForwardTree;
673     return FD_CanForwardProfitably;
674   }
675 
676   /// Determines whether an operand tree can be forwarded or carries out a
677   /// forwarding, depending on the @p DoIt flag.
678   ///
679   /// @param TargetStmt  The statement the operand tree will be copied to.
680   /// @param UseVal      The value (usually an instruction) which is root of an
681   ///                    operand tree.
682   /// @param UseStmt     The statement that uses @p UseVal.
683   /// @param UseLoop     The loop @p UseVal is used in.
684   /// @param DoIt        If false, only determine whether an operand tree can be
685   ///                    forwarded. If true, carry out the forwarding. Do not
686   ///                    use DoIt==true if an operand tree is not known to be
687   ///                    forwardable.
688   ///
689   /// @return If DoIt==false, return whether the operand tree can be forwarded.
690   ///         If DoIt==true, return FD_DidForward.
691   ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
692                                  ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) {
693     ScopStmt *DefStmt = nullptr;
694     Loop *DefLoop = nullptr;
695 
696     // { DefDomain[] -> TargetDomain[] }
697     isl::map DefToTarget;
698 
699     VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
700     switch (VUse.getKind()) {
701     case VirtualUse::Constant:
702     case VirtualUse::Block:
703     case VirtualUse::Hoisted:
704       // These can be used anywhere without special considerations.
705       if (DoIt)
706         return FD_DidForwardTree;
707       return FD_CanForwardLeaf;
708 
709     case VirtualUse::Synthesizable: {
710       // ScopExpander will take care for of generating the code at the new
711       // location.
712       if (DoIt)
713         return FD_DidForwardTree;
714 
715       // Check if the value is synthesizable at the new location as well. This
716       // might be possible when leaving a loop for which ScalarEvolution is
717       // unable to derive the exit value for.
718       // TODO: If there is a LCSSA PHI at the loop exit, use that one.
719       // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
720       // do not forward past its loop header. This would require us to use a
721       // previous loop induction variable instead the current one. We currently
722       // do not allow forwarding PHI nodes, thus this should never occur (the
723       // only exception where no phi is necessary being an unreachable loop
724       // without edge from the outside).
725       VirtualUse TargetUse = VirtualUse::create(
726           S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
727       if (TargetUse.getKind() == VirtualUse::Synthesizable)
728         return FD_CanForwardLeaf;
729 
730       LLVM_DEBUG(
731           dbgs() << "    Synthesizable would not be synthesizable anymore: "
732                  << *UseVal << "\n");
733       return FD_CannotForward;
734     }
735 
736     case VirtualUse::ReadOnly:
737       // Note that we cannot return FD_CanForwardTree here. With a operand tree
738       // depth of 0, UseVal is the use in TargetStmt that we try to replace.
739       // With -polly-analyze-read-only-scalars=true we would ensure the
740       // existence of a MemoryAccess (which already exists for a leaf) and be
741       // removed again by tryForwardTree because it's goal is to remove this
742       // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission
743       // to do so.
744       if (!DoIt)
745         return FD_CanForwardLeaf;
746 
747       // If we model read-only scalars, we need to create a MemoryAccess for it.
748       if (ModelReadOnlyScalars)
749         TargetStmt->ensureValueRead(UseVal);
750 
751       NumReadOnlyCopied++;
752       TotalReadOnlyCopied++;
753       return FD_DidForwardLeaf;
754 
755     case VirtualUse::Intra:
756       // Knowing that UseStmt and DefStmt are the same statement instance, just
757       // reuse the information about UseStmt for DefStmt
758       DefStmt = UseStmt;
759 
760       LLVM_FALLTHROUGH;
761     case VirtualUse::Inter:
762       Instruction *Inst = cast<Instruction>(UseVal);
763 
764       if (!DefStmt) {
765         DefStmt = S->getStmtFor(Inst);
766         if (!DefStmt)
767           return FD_CannotForward;
768       }
769 
770       DefLoop = LI->getLoopFor(Inst->getParent());
771 
772       ForwardingDecision SpeculativeResult =
773           forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt);
774       if (SpeculativeResult != FD_NotApplicable)
775         return SpeculativeResult;
776 
777       ForwardingDecision KnownResult = forwardKnownLoad(
778           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
779       if (KnownResult != FD_NotApplicable)
780         return KnownResult;
781 
782       ForwardingDecision ReloadResult = reloadKnownContent(
783           TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt);
784       if (ReloadResult != FD_NotApplicable)
785         return ReloadResult;
786 
787       // When no method is found to forward the operand tree, we effectively
788       // cannot handle it.
789       LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
790       return FD_CannotForward;
791     }
792 
793     llvm_unreachable("Case unhandled");
794   }
795 
796   /// Try to forward an operand tree rooted in @p RA.
797   bool tryForwardTree(MemoryAccess *RA) {
798     assert(RA->isLatestScalarKind());
799     LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");
800 
801     ScopStmt *Stmt = RA->getStatement();
802     Loop *InLoop = Stmt->getSurroundingLoop();
803 
804     isl::map TargetToUse;
805     if (!Known.is_null()) {
806       isl::space DomSpace = Stmt->getDomainSpace();
807       TargetToUse =
808           isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
809     }
810 
811     ForwardingDecision Assessment =
812         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false);
813     assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf);
814     if (Assessment != FD_CanForwardProfitably)
815       return false;
816 
817     ForwardingDecision Execution =
818         forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true);
819     assert(((Execution == FD_DidForwardTree) ||
820             (Execution == FD_DidForwardLeaf)) &&
821            "A previous positive assessment must also be executable");
822 
823     if (Execution == FD_DidForwardTree)
824       Stmt->removeSingleMemoryAccess(RA);
825     return true;
826   }
827 
828   /// Return which SCoP this instance is processing.
829   Scop *getScop() const { return S; }
830 
831   /// Run the algorithm: Use value read accesses as operand tree roots and try
832   /// to forward them into the statement.
833   bool forwardOperandTrees() {
834     for (ScopStmt &Stmt : *S) {
835       bool StmtModified = false;
836 
837       // Because we are modifying the MemoryAccess list, collect them first to
838       // avoid iterator invalidation.
839       SmallVector<MemoryAccess *, 16> Accs;
840       for (MemoryAccess *RA : Stmt) {
841         if (!RA->isRead())
842           continue;
843         if (!RA->isLatestScalarKind())
844           continue;
845 
846         Accs.push_back(RA);
847       }
848 
849       for (MemoryAccess *RA : Accs) {
850         if (tryForwardTree(RA)) {
851           Modified = true;
852           StmtModified = true;
853           NumForwardedTrees++;
854           TotalForwardedTrees++;
855         }
856       }
857 
858       if (StmtModified) {
859         NumModifiedStmts++;
860         TotalModifiedStmts++;
861       }
862     }
863 
864     if (Modified) {
865       ScopsModified++;
866       S->realignParams();
867     }
868     return Modified;
869   }
870 
871   /// Print the pass result, performed transformations and the SCoP after the
872   /// transformation.
873   void print(raw_ostream &OS, int Indent = 0) {
874     printStatistics(OS, Indent);
875 
876     if (!Modified) {
877       // This line can easily be checked in regression tests.
878       OS << "ForwardOpTree executed, but did not modify anything\n";
879       return;
880     }
881 
882     printStatements(OS, Indent);
883   }
884 };
885 
886 /// Pass that redirects scalar reads to array elements that are known to contain
887 /// the same value.
888 ///
889 /// This reduces the number of scalar accesses and therefore potentially
890 /// increases the freedom of the scheduler. In the ideal case, all reads of a
891 /// scalar definition are redirected (We currently do not care about removing
892 /// the write in this case).  This is also useful for the main DeLICM pass as
893 /// there are less scalars to be mapped.
894 class ForwardOpTree : public ScopPass {
895 private:
896   /// The pass implementation, also holding per-scop data.
897   std::unique_ptr<ForwardOpTreeImpl> Impl;
898 
899 public:
900   static char ID;
901 
902   explicit ForwardOpTree() : ScopPass(ID) {}
903   ForwardOpTree(const ForwardOpTree &) = delete;
904   ForwardOpTree &operator=(const ForwardOpTree &) = delete;
905 
906   void getAnalysisUsage(AnalysisUsage &AU) const override {
907     AU.addRequiredTransitive<ScopInfoRegionPass>();
908     AU.addRequired<LoopInfoWrapperPass>();
909     AU.setPreservesAll();
910   }
911 
912   bool runOnScop(Scop &S) override {
913     // Free resources for previous SCoP's computation, if not yet done.
914     releaseMemory();
915 
916     LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
917 
918     {
919       IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
920       Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);
921 
922       if (AnalyzeKnown) {
923         LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
924         Impl->computeKnownValues();
925       }
926 
927       LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
928       Impl->forwardOperandTrees();
929 
930       if (MaxOpGuard.hasQuotaExceeded()) {
931         LLVM_DEBUG(dbgs() << "Not all operations completed because of "
932                              "max_operations exceeded\n");
933         KnownOutOfQuota++;
934       }
935     }
936 
937     LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
938     LLVM_DEBUG(dbgs() << S);
939 
940     // Update statistics
941     auto ScopStats = S.getStatistics();
942     NumValueWrites += ScopStats.NumValueWrites;
943     NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
944     NumPHIWrites += ScopStats.NumPHIWrites;
945     NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
946     NumSingletonWrites += ScopStats.NumSingletonWrites;
947     NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;
948 
949     return false;
950   }
951 
952   void printScop(raw_ostream &OS, Scop &S) const override {
953     if (!Impl)
954       return;
955 
956     assert(Impl->getScop() == &S);
957     Impl->print(OS);
958   }
959 
960   void releaseMemory() override { Impl.reset(); }
961 }; // class ForwardOpTree
962 
963 char ForwardOpTree::ID;
964 } // namespace
965 
966 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }
967 
968 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
969                       "Polly - Forward operand tree", false, false)
970 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
971 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
972                     "Polly - Forward operand tree", false, false)
973