1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Create a polyhedral description for a static control flow region.
10 //
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/Sequence.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/AssumptionCache.h"
33 #include "llvm/Analysis/Delinearization.h"
34 #include "llvm/Analysis/Loads.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
37 #include "llvm/Analysis/RegionInfo.h"
38 #include "llvm/Analysis/RegionIterator.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include <cassert>
59 
60 using namespace llvm;
61 using namespace polly;
62 
63 #define DEBUG_TYPE "polly-scops"
64 
65 STATISTIC(ScopFound, "Number of valid Scops");
66 STATISTIC(RichScopFound, "Number of Scops containing a loop");
67 STATISTIC(InfeasibleScops,
68           "Number of SCoPs with statically infeasible context.");
69 
70 bool polly::ModelReadOnlyScalars;
71 
72 // The maximal number of dimensions we allow during invariant load construction.
73 // More complex access ranges will result in very high compile time and are also
74 // unlikely to result in good code. This value is very high and should only
75 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
76 static unsigned const MaxDimensionsInAccessRange = 9;
77 
78 static cl::opt<bool, true> XModelReadOnlyScalars(
79     "polly-analyze-read-only-scalars",
80     cl::desc("Model read-only scalar values in the scop description"),
81     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
82     cl::cat(PollyCategory));
83 
84 static cl::opt<int>
85     OptComputeOut("polly-analysis-computeout",
86                   cl::desc("Bound the scop analysis by a maximal amount of "
87                            "computational steps (0 means no bound)"),
88                   cl::Hidden, cl::init(800000), cl::cat(PollyCategory));
89 
90 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
91     "polly-allow-dereference-of-all-function-parameters",
92     cl::desc(
93         "Treat all parameters to functions that are pointers as dereferencible."
94         " This is useful for invariant load hoisting, since we can generate"
95         " less runtime checks. This is only valid if all pointers to functions"
96         " are always initialized, so that Polly can choose to hoist"
97         " their loads. "),
98     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
99 
100 static cl::opt<bool>
101     PollyIgnoreInbounds("polly-ignore-inbounds",
102                         cl::desc("Do not take inbounds assumptions at all"),
103                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
104 
105 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
106     "polly-rtc-max-arrays-per-group",
107     cl::desc("The maximal number of arrays to compare in each alias group."),
108     cl::Hidden, cl::init(20), cl::cat(PollyCategory));
109 
110 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
111     "polly-rtc-max-array-disjuncts",
112     cl::desc("The maximal number of disjunts allowed in memory accesses to "
113              "to build RTCs."),
114     cl::Hidden, cl::init(8), cl::cat(PollyCategory));
115 
116 static cl::opt<unsigned> RunTimeChecksMaxParameters(
117     "polly-rtc-max-parameters",
118     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
119     cl::init(8), cl::cat(PollyCategory));
120 
121 static cl::opt<bool> UnprofitableScalarAccs(
122     "polly-unprofitable-scalar-accs",
123     cl::desc("Count statements with scalar accesses as not optimizable"),
124     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
125 
126 static cl::opt<std::string> UserContextStr(
127     "polly-context", cl::value_desc("isl parameter set"),
128     cl::desc("Provide additional constraints on the context parameters"),
129     cl::init(""), cl::cat(PollyCategory));
130 
131 static cl::opt<bool> DetectReductions("polly-detect-reductions",
132                                       cl::desc("Detect and exploit reductions"),
133                                       cl::Hidden, cl::init(true),
134                                       cl::cat(PollyCategory));
135 
136 // Multiplicative reductions can be disabled separately as these kind of
137 // operations can overflow easily. Additive reductions and bit operations
138 // are in contrast pretty stable.
139 static cl::opt<bool> DisableMultiplicativeReductions(
140     "polly-disable-multiplicative-reductions",
141     cl::desc("Disable multiplicative reductions"), cl::Hidden,
142     cl::cat(PollyCategory));
143 
144 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
145 
146 static cl::opt<GranularityChoice> StmtGranularity(
147     "polly-stmt-granularity",
148     cl::desc(
149         "Algorithm to use for splitting basic blocks into multiple statements"),
150     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
151                           "One statement per basic block"),
152                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
153                           "Scalar independence heuristic"),
154                clEnumValN(GranularityChoice::Stores, "store",
155                           "Store-level granularity")),
156     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
157 
158 /// Helper to treat non-affine regions and basic blocks the same.
159 ///
160 ///{
161 
162 /// Return the block that is the representing block for @p RN.
getRegionNodeBasicBlock(RegionNode * RN)163 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
164   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
165                            : RN->getNodeAs<BasicBlock>();
166 }
167 
168 /// Return the @p idx'th block that is executed after @p RN.
169 static inline BasicBlock *
getRegionNodeSuccessor(RegionNode * RN,Instruction * TI,unsigned idx)170 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
171   if (RN->isSubRegion()) {
172     assert(idx == 0);
173     return RN->getNodeAs<Region>()->getExit();
174   }
175   return TI->getSuccessor(idx);
176 }
177 
containsErrorBlock(RegionNode * RN,const Region & R,ScopDetection * SD)178 static bool containsErrorBlock(RegionNode *RN, const Region &R,
179                                ScopDetection *SD) {
180   if (!RN->isSubRegion())
181     return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
182   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
183     if (SD->isErrorBlock(*BB, R))
184       return true;
185   return false;
186 }
187 
188 ///}
189 
190 /// Create a map to map from a given iteration to a subsequent iteration.
191 ///
192 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
193 /// is incremented by one and all other dimensions are equal, e.g.,
194 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
195 ///
196 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
createNextIterationMap(isl::space SetSpace,unsigned Dim)197 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
198   isl::space MapSpace = SetSpace.map_from_set();
199   isl::map NextIterationMap = isl::map::universe(MapSpace);
200   for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
201     if (u != Dim)
202       NextIterationMap =
203           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
204   isl::constraint C =
205       isl::constraint::alloc_equality(isl::local_space(MapSpace));
206   C = C.set_constant_si(1);
207   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
208   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
209   NextIterationMap = NextIterationMap.add_constraint(C);
210   return NextIterationMap;
211 }
212 
213 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
collectBoundedParts(isl::set S)214 static isl::set collectBoundedParts(isl::set S) {
215   isl::set BoundedParts = isl::set::empty(S.get_space());
216   for (isl::basic_set BSet : S.get_basic_set_list())
217     if (BSet.is_bounded())
218       BoundedParts = BoundedParts.unite(isl::set(BSet));
219   return BoundedParts;
220 }
221 
222 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
223 ///
224 /// @returns A separation of @p S into first an unbounded then a bounded subset,
225 ///          both with regards to the dimension @p Dim.
partitionSetParts(isl::set S,unsigned Dim)226 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
227                                                        unsigned Dim) {
228   for (unsigned u : rangeIslSize(0, S.tuple_dim()))
229     S = S.lower_bound_si(isl::dim::set, u, 0);
230 
231   unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
232   isl::set OnlyDimS = S;
233 
234   // Remove dimensions that are greater than Dim as they are not interesting.
235   assert(NumDimsS >= Dim + 1);
236   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
237 
238   // Create artificial parametric upper bounds for dimensions smaller than Dim
239   // as we are not interested in them.
240   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
241 
242   for (unsigned u = 0; u < Dim; u++) {
243     isl::constraint C = isl::constraint::alloc_inequality(
244         isl::local_space(OnlyDimS.get_space()));
245     C = C.set_coefficient_si(isl::dim::param, u, 1);
246     C = C.set_coefficient_si(isl::dim::set, u, -1);
247     OnlyDimS = OnlyDimS.add_constraint(C);
248   }
249 
250   // Collect all bounded parts of OnlyDimS.
251   isl::set BoundedParts = collectBoundedParts(OnlyDimS);
252 
253   // Create the dimensions greater than Dim again.
254   BoundedParts =
255       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
256 
257   // Remove the artificial upper bound parameters again.
258   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
259 
260   isl::set UnboundedParts = S.subtract(BoundedParts);
261   return std::make_pair(UnboundedParts, BoundedParts);
262 }
263 
264 /// Create the conditions under which @p L @p Pred @p R is true.
buildConditionSet(ICmpInst::Predicate Pred,isl::pw_aff L,isl::pw_aff R)265 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
266                                   isl::pw_aff R) {
267   switch (Pred) {
268   case ICmpInst::ICMP_EQ:
269     return L.eq_set(R);
270   case ICmpInst::ICMP_NE:
271     return L.ne_set(R);
272   case ICmpInst::ICMP_SLT:
273     return L.lt_set(R);
274   case ICmpInst::ICMP_SLE:
275     return L.le_set(R);
276   case ICmpInst::ICMP_SGT:
277     return L.gt_set(R);
278   case ICmpInst::ICMP_SGE:
279     return L.ge_set(R);
280   case ICmpInst::ICMP_ULT:
281     return L.lt_set(R);
282   case ICmpInst::ICMP_UGT:
283     return L.gt_set(R);
284   case ICmpInst::ICMP_ULE:
285     return L.le_set(R);
286   case ICmpInst::ICMP_UGE:
287     return L.ge_set(R);
288   default:
289     llvm_unreachable("Non integer predicate not supported");
290   }
291 }
292 
adjustDomainDimensions(isl::set Dom,Loop * OldL,Loop * NewL)293 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
294                                              Loop *NewL) {
295   // If the loops are the same there is nothing to do.
296   if (NewL == OldL)
297     return Dom;
298 
299   int OldDepth = scop->getRelativeLoopDepth(OldL);
300   int NewDepth = scop->getRelativeLoopDepth(NewL);
301   // If both loops are non-affine loops there is nothing to do.
302   if (OldDepth == -1 && NewDepth == -1)
303     return Dom;
304 
305   // Distinguish three cases:
306   //   1) The depth is the same but the loops are not.
307   //      => One loop was left one was entered.
308   //   2) The depth increased from OldL to NewL.
309   //      => One loop was entered, none was left.
310   //   3) The depth decreased from OldL to NewL.
311   //      => Loops were left were difference of the depths defines how many.
312   if (OldDepth == NewDepth) {
313     assert(OldL->getParentLoop() == NewL->getParentLoop());
314     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
315     Dom = Dom.add_dims(isl::dim::set, 1);
316   } else if (OldDepth < NewDepth) {
317     assert(OldDepth + 1 == NewDepth);
318     auto &R = scop->getRegion();
319     (void)R;
320     assert(NewL->getParentLoop() == OldL ||
321            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
322     Dom = Dom.add_dims(isl::dim::set, 1);
323   } else {
324     assert(OldDepth > NewDepth);
325     unsigned Diff = OldDepth - NewDepth;
326     unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
327     assert(NumDim >= Diff);
328     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
329   }
330 
331   return Dom;
332 }
333 
334 /// Compute the isl representation for the SCEV @p E in this BB.
335 ///
336 /// @param BB               The BB for which isl representation is to be
337 /// computed.
338 /// @param InvalidDomainMap A map of BB to their invalid domains.
339 /// @param E                The SCEV that should be translated.
340 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
341 ///
342 /// Note that this function will also adjust the invalid context accordingly.
343 
344 __isl_give isl_pw_aff *
getPwAff(BasicBlock * BB,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,const SCEV * E,bool NonNegative)345 ScopBuilder::getPwAff(BasicBlock *BB,
346                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
347                       const SCEV *E, bool NonNegative) {
348   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
349   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
350   return PWAC.first.release();
351 }
352 
353 /// Build condition sets for unsigned ICmpInst(s).
354 /// Special handling is required for unsigned operands to ensure that if
355 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
356 /// it should wrap around.
357 ///
358 /// @param IsStrictUpperBound holds information on the predicate relation
359 /// between TestVal and UpperBound, i.e,
360 /// TestVal < UpperBound  OR  TestVal <= UpperBound
buildUnsignedConditionSets(BasicBlock * BB,Value * Condition,__isl_keep isl_set * Domain,const SCEV * SCEV_TestVal,const SCEV * SCEV_UpperBound,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,bool IsStrictUpperBound)361 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
362     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
363     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
364     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
365     bool IsStrictUpperBound) {
366   // Do not take NonNeg assumption on TestVal
367   // as it might have MSB (Sign bit) set.
368   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
369   // Take NonNeg assumption on UpperBound.
370   isl_pw_aff *UpperBound =
371       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
372 
373   // 0 <= TestVal
374   isl_set *First =
375       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
376                             isl_pw_aff_get_domain_space(TestVal))),
377                         isl_pw_aff_copy(TestVal));
378 
379   isl_set *Second;
380   if (IsStrictUpperBound)
381     // TestVal < UpperBound
382     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
383   else
384     // TestVal <= UpperBound
385     Second = isl_pw_aff_le_set(TestVal, UpperBound);
386 
387   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
388   return ConsequenceCondSet;
389 }
390 
buildConditionSets(BasicBlock * BB,SwitchInst * SI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)391 bool ScopBuilder::buildConditionSets(
392     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
393     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
394     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
395   Value *Condition = getConditionFromTerminator(SI);
396   assert(Condition && "No condition for switch");
397 
398   isl_pw_aff *LHS, *RHS;
399   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
400 
401   unsigned NumSuccessors = SI->getNumSuccessors();
402   ConditionSets.resize(NumSuccessors);
403   for (auto &Case : SI->cases()) {
404     unsigned Idx = Case.getSuccessorIndex();
405     ConstantInt *CaseValue = Case.getCaseValue();
406 
407     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
408     isl_set *CaseConditionSet =
409         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
410                           isl::manage(RHS))
411             .release();
412     ConditionSets[Idx] = isl_set_coalesce(
413         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
414   }
415 
416   assert(ConditionSets[0] == nullptr && "Default condition set was set");
417   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
418   for (unsigned u = 2; u < NumSuccessors; u++)
419     ConditionSetUnion =
420         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
421   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
422 
423   isl_pw_aff_free(LHS);
424 
425   return true;
426 }
427 
buildConditionSets(BasicBlock * BB,Value * Condition,Instruction * TI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)428 bool ScopBuilder::buildConditionSets(
429     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
430     __isl_keep isl_set *Domain,
431     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
432     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
433   isl_set *ConsequenceCondSet = nullptr;
434 
435   if (auto Load = dyn_cast<LoadInst>(Condition)) {
436     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
437     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
438     bool NonNeg = false;
439     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
440     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
441     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
442                                            isl::manage(RHS))
443                              .release();
444   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
445     auto *Unique = dyn_cast<ConstantInt>(
446         getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
447     assert(Unique &&
448            "A PHINode condition should only be accepted by ScopDetection if "
449            "getUniqueNonErrorValue returns non-NULL");
450 
451     if (Unique->isZero())
452       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
453     else
454       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
455   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
456     if (CCond->isZero())
457       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
458     else
459       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
460   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
461     auto Opcode = BinOp->getOpcode();
462     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
463 
464     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
465                                     InvalidDomainMap, ConditionSets) &&
466                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
467                                     InvalidDomainMap, ConditionSets);
468     if (!Valid) {
469       while (!ConditionSets.empty())
470         isl_set_free(ConditionSets.pop_back_val());
471       return false;
472     }
473 
474     isl_set_free(ConditionSets.pop_back_val());
475     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
476     isl_set_free(ConditionSets.pop_back_val());
477     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
478 
479     if (Opcode == Instruction::And)
480       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
481     else
482       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
483   } else {
484     auto *ICond = dyn_cast<ICmpInst>(Condition);
485     assert(ICond &&
486            "Condition of exiting branch was neither constant nor ICmp!");
487 
488     Region &R = scop->getRegion();
489 
490     isl_pw_aff *LHS, *RHS;
491     // For unsigned comparisons we assumed the signed bit of neither operand
492     // to be set. The comparison is equal to a signed comparison under this
493     // assumption.
494     bool NonNeg = ICond->isUnsigned();
495     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
496                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
497 
498     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
499     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
500 
501     switch (ICond->getPredicate()) {
502     case ICmpInst::ICMP_ULT:
503       ConsequenceCondSet =
504           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
505                                      RightOperand, InvalidDomainMap, true);
506       break;
507     case ICmpInst::ICMP_ULE:
508       ConsequenceCondSet =
509           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
510                                      RightOperand, InvalidDomainMap, false);
511       break;
512     case ICmpInst::ICMP_UGT:
513       ConsequenceCondSet =
514           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
515                                      LeftOperand, InvalidDomainMap, true);
516       break;
517     case ICmpInst::ICMP_UGE:
518       ConsequenceCondSet =
519           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
520                                      LeftOperand, InvalidDomainMap, false);
521       break;
522     default:
523       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
524       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
525       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
526                                              isl::manage(LHS), isl::manage(RHS))
527                                .release();
528       break;
529     }
530   }
531 
532   // If no terminator was given we are only looking for parameter constraints
533   // under which @p Condition is true/false.
534   if (!TI)
535     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
536   assert(ConsequenceCondSet);
537   ConsequenceCondSet = isl_set_coalesce(
538       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
539 
540   isl_set *AlternativeCondSet = nullptr;
541   bool TooComplex =
542       isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
543 
544   if (!TooComplex) {
545     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
546                                           isl_set_copy(ConsequenceCondSet));
547     TooComplex =
548         isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
549   }
550 
551   if (TooComplex) {
552     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
553                      TI ? TI->getParent() : nullptr /* BasicBlock */);
554     isl_set_free(AlternativeCondSet);
555     isl_set_free(ConsequenceCondSet);
556     return false;
557   }
558 
559   ConditionSets.push_back(ConsequenceCondSet);
560   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
561 
562   return true;
563 }
564 
buildConditionSets(BasicBlock * BB,Instruction * TI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)565 bool ScopBuilder::buildConditionSets(
566     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
567     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
568     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
569   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
570     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
571                               ConditionSets);
572 
573   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
574 
575   if (TI->getNumSuccessors() == 1) {
576     ConditionSets.push_back(isl_set_copy(Domain));
577     return true;
578   }
579 
580   Value *Condition = getConditionFromTerminator(TI);
581   assert(Condition && "No condition for Terminator");
582 
583   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
584                             ConditionSets);
585 }
586 
propagateDomainConstraints(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)587 bool ScopBuilder::propagateDomainConstraints(
588     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
589   // Iterate over the region R and propagate the domain constrains from the
590   // predecessors to the current node. In contrast to the
591   // buildDomainsWithBranchConstraints function, this one will pull the domain
592   // information from the predecessors instead of pushing it to the successors.
593   // Additionally, we assume the domains to be already present in the domain
594   // map here. However, we iterate again in reverse post order so we know all
595   // predecessors have been visited before a block or non-affine subregion is
596   // visited.
597 
598   ReversePostOrderTraversal<Region *> RTraversal(R);
599   for (auto *RN : RTraversal) {
600     // Recurse for affine subregions but go on for basic blocks and non-affine
601     // subregions.
602     if (RN->isSubRegion()) {
603       Region *SubRegion = RN->getNodeAs<Region>();
604       if (!scop->isNonAffineSubRegion(SubRegion)) {
605         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
606           return false;
607         continue;
608       }
609     }
610 
611     BasicBlock *BB = getRegionNodeBasicBlock(RN);
612     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
613     assert(!Domain.is_null());
614 
615     // Under the union of all predecessor conditions we can reach this block.
616     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
617     Domain = Domain.intersect(PredDom).coalesce();
618     Domain = Domain.align_params(scop->getParamSpace());
619 
620     Loop *BBLoop = getRegionNodeLoop(RN, LI);
621     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
622       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
623         return false;
624   }
625 
626   return true;
627 }
628 
propagateDomainConstraintsToRegionExit(BasicBlock * BB,Loop * BBLoop,SmallPtrSetImpl<BasicBlock * > & FinishedExitBlocks,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)629 void ScopBuilder::propagateDomainConstraintsToRegionExit(
630     BasicBlock *BB, Loop *BBLoop,
631     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
632     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
633   // Check if the block @p BB is the entry of a region. If so we propagate it's
634   // domain to the exit block of the region. Otherwise we are done.
635   auto *RI = scop->getRegion().getRegionInfo();
636   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
637   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
638   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
639     return;
640 
641   // Do not propagate the domain if there is a loop backedge inside the region
642   // that would prevent the exit block from being executed.
643   auto *L = BBLoop;
644   while (L && scop->contains(L)) {
645     SmallVector<BasicBlock *, 4> LatchBBs;
646     BBLoop->getLoopLatches(LatchBBs);
647     for (auto *LatchBB : LatchBBs)
648       if (BB != LatchBB && BBReg->contains(LatchBB))
649         return;
650     L = L->getParentLoop();
651   }
652 
653   isl::set Domain = scop->getOrInitEmptyDomain(BB);
654   assert(!Domain.is_null() && "Cannot propagate a nullptr");
655 
656   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
657 
658   // Since the dimensions of @p BB and @p ExitBB might be different we have to
659   // adjust the domain before we can propagate it.
660   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
661   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
662 
663   // If the exit domain is not yet created we set it otherwise we "add" the
664   // current domain.
665   ExitDomain =
666       !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
667 
668   // Initialize the invalid domain.
669   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
670 
671   FinishedExitBlocks.insert(ExitBB);
672 }
673 
getPredecessorDomainConstraints(BasicBlock * BB,isl::set Domain)674 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
675                                                       isl::set Domain) {
676   // If @p BB is the ScopEntry we are done
677   if (scop->getRegion().getEntry() == BB)
678     return isl::set::universe(Domain.get_space());
679 
680   // The region info of this function.
681   auto &RI = *scop->getRegion().getRegionInfo();
682 
683   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
684 
685   // A domain to collect all predecessor domains, thus all conditions under
686   // which the block is executed. To this end we start with the empty domain.
687   isl::set PredDom = isl::set::empty(Domain.get_space());
688 
689   // Set of regions of which the entry block domain has been propagated to BB.
690   // all predecessors inside any of the regions can be skipped.
691   SmallSet<Region *, 8> PropagatedRegions;
692 
693   for (auto *PredBB : predecessors(BB)) {
694     // Skip backedges.
695     if (DT.dominates(BB, PredBB))
696       continue;
697 
698     // If the predecessor is in a region we used for propagation we can skip it.
699     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
700     if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
701       continue;
702     }
703 
704     // Check if there is a valid region we can use for propagation, thus look
705     // for a region that contains the predecessor and has @p BB as exit block.
706     // FIXME: This was an side-effect-free (and possibly infinite) loop when
707     //        committed and seems not to be needed.
708     auto *PredR = RI.getRegionFor(PredBB);
709     while (PredR->getExit() != BB && !PredR->contains(BB))
710       PredR = PredR->getParent();
711 
712     // If a valid region for propagation was found use the entry of that region
713     // for propagation, otherwise the PredBB directly.
714     if (PredR->getExit() == BB) {
715       PredBB = PredR->getEntry();
716       PropagatedRegions.insert(PredR);
717     }
718 
719     isl::set PredBBDom = scop->getDomainConditions(PredBB);
720     Loop *PredBBLoop =
721         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
722     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
723     PredDom = PredDom.unite(PredBBDom);
724   }
725 
726   return PredDom;
727 }
728 
addLoopBoundsToHeaderDomain(Loop * L,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)729 bool ScopBuilder::addLoopBoundsToHeaderDomain(
730     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
731   int LoopDepth = scop->getRelativeLoopDepth(L);
732   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
733 
734   BasicBlock *HeaderBB = L->getHeader();
735   assert(scop->isDomainDefined(HeaderBB));
736   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
737 
738   isl::map NextIterationMap =
739       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
740 
741   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
742 
743   SmallVector<BasicBlock *, 4> LatchBlocks;
744   L->getLoopLatches(LatchBlocks);
745 
746   for (BasicBlock *LatchBB : LatchBlocks) {
747     // If the latch is only reachable via error statements we skip it.
748     if (!scop->isDomainDefined(LatchBB))
749       continue;
750 
751     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
752 
753     isl::set BackedgeCondition;
754 
755     Instruction *TI = LatchBB->getTerminator();
756     BranchInst *BI = dyn_cast<BranchInst>(TI);
757     assert(BI && "Only branch instructions allowed in loop latches");
758 
759     if (BI->isUnconditional())
760       BackedgeCondition = LatchBBDom;
761     else {
762       SmallVector<isl_set *, 8> ConditionSets;
763       int idx = BI->getSuccessor(0) != HeaderBB;
764       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
765                               InvalidDomainMap, ConditionSets))
766         return false;
767 
768       // Free the non back edge condition set as we do not need it.
769       isl_set_free(ConditionSets[1 - idx]);
770 
771       BackedgeCondition = isl::manage(ConditionSets[idx]);
772     }
773 
774     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
775     assert(LatchLoopDepth >= LoopDepth);
776     BackedgeCondition = BackedgeCondition.project_out(
777         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
778     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
779   }
780 
781   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
782   for (int i = 0; i < LoopDepth; i++)
783     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
784 
785   isl::set UnionBackedgeConditionComplement =
786       UnionBackedgeCondition.complement();
787   UnionBackedgeConditionComplement =
788       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
789                                                       0);
790   UnionBackedgeConditionComplement =
791       UnionBackedgeConditionComplement.apply(ForwardMap);
792   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
793   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
794 
795   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
796   HeaderBBDom = Parts.second;
797 
798   // Check if there is a <nsw> tagged AddRec for this loop and if so do not
799   // require a runtime check. The assumption is already implied by the <nsw>
800   // tag.
801   bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
802 
803   isl::set UnboundedCtx = Parts.first.params();
804   recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
805                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
806                    nullptr, RequiresRTC);
807   return true;
808 }
809 
buildInvariantEquivalenceClasses()810 void ScopBuilder::buildInvariantEquivalenceClasses() {
811   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
812 
813   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
814   for (LoadInst *LInst : RIL) {
815     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
816 
817     Type *Ty = LInst->getType();
818     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
819     if (ClassRep) {
820       scop->addInvariantLoadMapping(LInst, ClassRep);
821       continue;
822     }
823 
824     ClassRep = LInst;
825     scop->addInvariantEquivClass(
826         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
827   }
828 }
829 
buildDomains(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)830 bool ScopBuilder::buildDomains(
831     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
832   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
833   auto *EntryBB = R->getEntry();
834   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
835   int LD = scop->getRelativeLoopDepth(L);
836   auto *S =
837       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
838 
839   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
840   isl::set Domain = isl::manage(S);
841   scop->setDomain(EntryBB, Domain);
842 
843   if (IsOnlyNonAffineRegion)
844     return !containsErrorBlock(R->getNode(), *R, &SD);
845 
846   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
847     return false;
848 
849   if (!propagateDomainConstraints(R, InvalidDomainMap))
850     return false;
851 
852   // Error blocks and blocks dominated by them have been assumed to never be
853   // executed. Representing them in the Scop does not add any value. In fact,
854   // it is likely to cause issues during construction of the ScopStmts. The
855   // contents of error blocks have not been verified to be expressible and
856   // will cause problems when building up a ScopStmt for them.
857   // Furthermore, basic blocks dominated by error blocks may reference
858   // instructions in the error block which, if the error block is not modeled,
859   // can themselves not be constructed properly. To this end we will replace
860   // the domains of error blocks and those only reachable via error blocks
861   // with an empty set. Additionally, we will record for each block under which
862   // parameter combination it would be reached via an error block in its
863   // InvalidDomain. This information is needed during load hoisting.
864   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
865     return false;
866 
867   return true;
868 }
869 
buildDomainsWithBranchConstraints(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)870 bool ScopBuilder::buildDomainsWithBranchConstraints(
871     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
872   // To create the domain for each block in R we iterate over all blocks and
873   // subregions in R and propagate the conditions under which the current region
874   // element is executed. To this end we iterate in reverse post order over R as
875   // it ensures that we first visit all predecessors of a region node (either a
876   // basic block or a subregion) before we visit the region node itself.
877   // Initially, only the domain for the SCoP region entry block is set and from
878   // there we propagate the current domain to all successors, however we add the
879   // condition that the successor is actually executed next.
880   // As we are only interested in non-loop carried constraints here we can
881   // simply skip loop back edges.
882 
883   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
884   ReversePostOrderTraversal<Region *> RTraversal(R);
885   for (auto *RN : RTraversal) {
886     // Recurse for affine subregions but go on for basic blocks and non-affine
887     // subregions.
888     if (RN->isSubRegion()) {
889       Region *SubRegion = RN->getNodeAs<Region>();
890       if (!scop->isNonAffineSubRegion(SubRegion)) {
891         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
892           return false;
893         continue;
894       }
895     }
896 
897     if (containsErrorBlock(RN, scop->getRegion(), &SD))
898       scop->notifyErrorBlock();
899     ;
900 
901     BasicBlock *BB = getRegionNodeBasicBlock(RN);
902     Instruction *TI = BB->getTerminator();
903 
904     if (isa<UnreachableInst>(TI))
905       continue;
906 
907     if (!scop->isDomainDefined(BB))
908       continue;
909     isl::set Domain = scop->getDomainConditions(BB);
910 
911     scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
912 
913     auto *BBLoop = getRegionNodeLoop(RN, LI);
914     // Propagate the domain from BB directly to blocks that have a superset
915     // domain, at the moment only region exit nodes of regions that start in BB.
916     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
917                                            InvalidDomainMap);
918 
919     // If all successors of BB have been set a domain through the propagation
920     // above we do not need to build condition sets but can just skip this
921     // block. However, it is important to note that this is a local property
922     // with regards to the region @p R. To this end FinishedExitBlocks is a
923     // local variable.
924     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
925       return FinishedExitBlocks.count(SuccBB);
926     };
927     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
928       continue;
929 
930     // Build the condition sets for the successor nodes of the current region
931     // node. If it is a non-affine subregion we will always execute the single
932     // exit node, hence the single entry node domain is the condition set. For
933     // basic blocks we use the helper function buildConditionSets.
934     SmallVector<isl_set *, 8> ConditionSets;
935     if (RN->isSubRegion())
936       ConditionSets.push_back(Domain.copy());
937     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
938                                  ConditionSets))
939       return false;
940 
941     // Now iterate over the successors and set their initial domain based on
942     // their condition set. We skip back edges here and have to be careful when
943     // we leave a loop not to keep constraints over a dimension that doesn't
944     // exist anymore.
945     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
946     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
947       isl::set CondSet = isl::manage(ConditionSets[u]);
948       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
949 
950       // Skip blocks outside the region.
951       if (!scop->contains(SuccBB))
952         continue;
953 
954       // If we propagate the domain of some block to "SuccBB" we do not have to
955       // adjust the domain.
956       if (FinishedExitBlocks.count(SuccBB))
957         continue;
958 
959       // Skip back edges.
960       if (DT.dominates(SuccBB, BB))
961         continue;
962 
963       Loop *SuccBBLoop =
964           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
965 
966       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
967 
968       // Set the domain for the successor or merge it with an existing domain in
969       // case there are multiple paths (without loop back edges) to the
970       // successor block.
971       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
972 
973       if (!SuccDomain.is_null()) {
974         SuccDomain = SuccDomain.unite(CondSet).coalesce();
975       } else {
976         // Initialize the invalid domain.
977         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
978         SuccDomain = CondSet;
979       }
980 
981       SuccDomain = SuccDomain.detect_equalities();
982 
983       // Check if the maximal number of domain disjunctions was reached.
984       // In case this happens we will clean up and bail.
985       if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
986         continue;
987 
988       scop->invalidate(COMPLEXITY, DebugLoc());
989       while (++u < ConditionSets.size())
990         isl_set_free(ConditionSets[u]);
991       return false;
992     }
993   }
994 
995   return true;
996 }
997 
propagateInvalidStmtDomains(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)998 bool ScopBuilder::propagateInvalidStmtDomains(
999     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1000   ReversePostOrderTraversal<Region *> RTraversal(R);
1001   for (auto *RN : RTraversal) {
1002 
1003     // Recurse for affine subregions but go on for basic blocks and non-affine
1004     // subregions.
1005     if (RN->isSubRegion()) {
1006       Region *SubRegion = RN->getNodeAs<Region>();
1007       if (!scop->isNonAffineSubRegion(SubRegion)) {
1008         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1009         continue;
1010       }
1011     }
1012 
1013     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
1014     BasicBlock *BB = getRegionNodeBasicBlock(RN);
1015     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1016     assert(!Domain.is_null() && "Cannot propagate a nullptr");
1017 
1018     isl::set InvalidDomain = InvalidDomainMap[BB];
1019 
1020     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1021 
1022     if (!IsInvalidBlock) {
1023       InvalidDomain = InvalidDomain.intersect(Domain);
1024     } else {
1025       InvalidDomain = Domain;
1026       isl::set DomPar = Domain.params();
1027       recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
1028                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1029       Domain = isl::set::empty(Domain.get_space());
1030     }
1031 
1032     if (InvalidDomain.is_empty()) {
1033       InvalidDomainMap[BB] = InvalidDomain;
1034       continue;
1035     }
1036 
1037     auto *BBLoop = getRegionNodeLoop(RN, LI);
1038     auto *TI = BB->getTerminator();
1039     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1040     for (unsigned u = 0; u < NumSuccs; u++) {
1041       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1042 
1043       // Skip successors outside the SCoP.
1044       if (!scop->contains(SuccBB))
1045         continue;
1046 
1047       // Skip backedges.
1048       if (DT.dominates(SuccBB, BB))
1049         continue;
1050 
1051       Loop *SuccBBLoop =
1052           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1053 
1054       auto AdjustedInvalidDomain =
1055           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1056 
1057       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1058       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1059       SuccInvalidDomain = SuccInvalidDomain.coalesce();
1060 
1061       InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1062 
1063       // Check if the maximal number of domain disjunctions was reached.
1064       // In case this happens we will bail.
1065       if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
1066           MaxDisjunctsInDomain)
1067         continue;
1068 
1069       InvalidDomainMap.erase(BB);
1070       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1071       return false;
1072     }
1073 
1074     InvalidDomainMap[BB] = InvalidDomain;
1075   }
1076 
1077   return true;
1078 }
1079 
buildPHIAccesses(ScopStmt * PHIStmt,PHINode * PHI,Region * NonAffineSubRegion,bool IsExitBlock)1080 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1081                                    Region *NonAffineSubRegion,
1082                                    bool IsExitBlock) {
1083   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1084   // true, are not modeled as ordinary PHI nodes as they are not part of the
1085   // region. However, we model the operands in the predecessor blocks that are
1086   // part of the region as regular scalar accesses.
1087 
1088   // If we can synthesize a PHI we can skip it, however only if it is in
1089   // the region. If it is not it can only be in the exit block of the region.
1090   // In this case we model the operands but not the PHI itself.
1091   auto *Scope = LI.getLoopFor(PHI->getParent());
1092   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1093     return;
1094 
1095   // PHI nodes are modeled as if they had been demoted prior to the SCoP
1096   // detection. Hence, the PHI is a load of a new memory location in which the
1097   // incoming value was written at the end of the incoming basic block.
1098   bool OnlyNonAffineSubRegionOperands = true;
1099   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1100     Value *Op = PHI->getIncomingValue(u);
1101     BasicBlock *OpBB = PHI->getIncomingBlock(u);
1102     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1103 
1104     // Do not build PHI dependences inside a non-affine subregion, but make
1105     // sure that the necessary scalar values are still made available.
1106     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1107       auto *OpInst = dyn_cast<Instruction>(Op);
1108       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1109         ensureValueRead(Op, OpStmt);
1110       continue;
1111     }
1112 
1113     OnlyNonAffineSubRegionOperands = false;
1114     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1115   }
1116 
1117   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1118     addPHIReadAccess(PHIStmt, PHI);
1119   }
1120 }
1121 
buildScalarDependences(ScopStmt * UserStmt,Instruction * Inst)1122 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1123                                          Instruction *Inst) {
1124   assert(!isa<PHINode>(Inst));
1125 
1126   // Pull-in required operands.
1127   for (Use &Op : Inst->operands())
1128     ensureValueRead(Op.get(), UserStmt);
1129 }
1130 
1131 // Create a sequence of two schedules. Either argument may be null and is
1132 // interpreted as the empty schedule. Can also return null if both schedules are
1133 // empty.
combineInSequence(isl::schedule Prev,isl::schedule Succ)1134 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1135   if (Prev.is_null())
1136     return Succ;
1137   if (Succ.is_null())
1138     return Prev;
1139 
1140   return Prev.sequence(Succ);
1141 }
1142 
1143 // Create an isl_multi_union_aff that defines an identity mapping from the
1144 // elements of USet to their N-th dimension.
1145 //
1146 // # Example:
1147 //
1148 //            Domain: { A[i,j]; B[i,j,k] }
1149 //                 N: 1
1150 //
1151 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1152 //
1153 // @param USet   A union set describing the elements for which to generate a
1154 //               mapping.
1155 // @param N      The dimension to map to.
1156 // @returns      A mapping from USet to its N-th dimension.
mapToDimension(isl::union_set USet,unsigned N)1157 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1158   assert(!USet.is_null());
1159   assert(!USet.is_empty());
1160 
1161   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1162 
1163   for (isl::set S : USet.get_set_list()) {
1164     unsigned Dim = unsignedFromIslSize(S.tuple_dim());
1165     assert(Dim >= N);
1166     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1167                                                   N, Dim - N);
1168     if (N > 1)
1169       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1170 
1171     Result = Result.add_pw_multi_aff(PMA);
1172   }
1173 
1174   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1175 }
1176 
buildSchedule()1177 void ScopBuilder::buildSchedule() {
1178   Loop *L = getLoopSurroundingScop(*scop, LI);
1179   LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1180   buildSchedule(scop->getRegion().getNode(), LoopStack);
1181   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1182   scop->setScheduleTree(LoopStack[0].Schedule);
1183 }
1184 
1185 /// To generate a schedule for the elements in a Region we traverse the Region
1186 /// in reverse-post-order and add the contained RegionNodes in traversal order
1187 /// to the schedule of the loop that is currently at the top of the LoopStack.
1188 /// For loop-free codes, this results in a correct sequential ordering.
1189 ///
1190 /// Example:
1191 ///           bb1(0)
1192 ///         /     \.
1193 ///      bb2(1)   bb3(2)
1194 ///         \    /  \.
1195 ///          bb4(3)  bb5(4)
1196 ///             \   /
1197 ///              bb6(5)
1198 ///
1199 /// Including loops requires additional processing. Whenever a loop header is
1200 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1201 /// from an empty schedule, we first process all RegionNodes that are within
1202 /// this loop and complete the sequential schedule at this loop-level before
1203 /// processing about any other nodes. To implement this
1204 /// loop-nodes-first-processing, the reverse post-order traversal is
1205 /// insufficient. Hence, we additionally check if the traversal yields
1206 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1207 /// These region-nodes are then queue and only traverse after the all nodes
1208 /// within the current loop have been processed.
buildSchedule(Region * R,LoopStackTy & LoopStack)1209 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1210   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1211 
1212   ReversePostOrderTraversal<Region *> RTraversal(R);
1213   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1214   std::deque<RegionNode *> DelayList;
1215   bool LastRNWaiting = false;
1216 
1217   // Iterate over the region @p R in reverse post-order but queue
1218   // sub-regions/blocks iff they are not part of the last encountered but not
1219   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1220   // that we queued the last sub-region/block from the reverse post-order
1221   // iterator. If it is set we have to explore the next sub-region/block from
1222   // the iterator (if any) to guarantee progress. If it is not set we first try
1223   // the next queued sub-region/blocks.
1224   while (!WorkList.empty() || !DelayList.empty()) {
1225     RegionNode *RN;
1226 
1227     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1228       RN = WorkList.front();
1229       WorkList.pop_front();
1230       LastRNWaiting = false;
1231     } else {
1232       RN = DelayList.front();
1233       DelayList.pop_front();
1234     }
1235 
1236     Loop *L = getRegionNodeLoop(RN, LI);
1237     if (!scop->contains(L))
1238       L = OuterScopLoop;
1239 
1240     Loop *LastLoop = LoopStack.back().L;
1241     if (LastLoop != L) {
1242       if (LastLoop && !LastLoop->contains(L)) {
1243         LastRNWaiting = true;
1244         DelayList.push_back(RN);
1245         continue;
1246       }
1247       LoopStack.push_back({L, {}, 0});
1248     }
1249     buildSchedule(RN, LoopStack);
1250   }
1251 }
1252 
buildSchedule(RegionNode * RN,LoopStackTy & LoopStack)1253 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1254   if (RN->isSubRegion()) {
1255     auto *LocalRegion = RN->getNodeAs<Region>();
1256     if (!scop->isNonAffineSubRegion(LocalRegion)) {
1257       buildSchedule(LocalRegion, LoopStack);
1258       return;
1259     }
1260   }
1261 
1262   assert(LoopStack.rbegin() != LoopStack.rend());
1263   auto LoopData = LoopStack.rbegin();
1264   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1265 
1266   for (auto *Stmt : scop->getStmtListFor(RN)) {
1267     isl::union_set UDomain{Stmt->getDomain()};
1268     auto StmtSchedule = isl::schedule::from_domain(UDomain);
1269     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1270   }
1271 
1272   // Check if we just processed the last node in this loop. If we did, finalize
1273   // the loop by:
1274   //
1275   //   - adding new schedule dimensions
1276   //   - folding the resulting schedule into the parent loop schedule
1277   //   - dropping the loop schedule from the LoopStack.
1278   //
1279   // Then continue to check surrounding loops, which might also have been
1280   // completed by this node.
1281   size_t Dimension = LoopStack.size();
1282   while (LoopData->L &&
1283          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1284     isl::schedule Schedule = LoopData->Schedule;
1285     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1286 
1287     assert(std::next(LoopData) != LoopStack.rend());
1288     Loop *L = LoopData->L;
1289     ++LoopData;
1290     --Dimension;
1291 
1292     if (!Schedule.is_null()) {
1293       isl::union_set Domain = Schedule.get_domain();
1294       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1295       Schedule = Schedule.insert_partial_schedule(MUPA);
1296 
1297       if (hasDisableAllTransformsHint(L)) {
1298         /// If any of the loops has a disable_nonforced heuristic, mark the
1299         /// entire SCoP as such. The ISL rescheduler can only reschedule the
1300         /// SCoP in its entirety.
1301         /// TODO: ScopDetection could avoid including such loops or warp them as
1302         /// boxed loop. It still needs to pass-through loop with user-defined
1303         /// metadata.
1304         scop->markDisableHeuristics();
1305       }
1306 
1307       // It is easier to insert the marks here that do it retroactively.
1308       isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
1309       if (!IslLoopId.is_null())
1310         Schedule =
1311             Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
1312 
1313       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1314     }
1315 
1316     LoopData->NumBlocksProcessed += NumBlocksProcessed;
1317   }
1318   // Now pop all loops processed up there from the LoopStack
1319   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1320 }
1321 
buildEscapingDependences(Instruction * Inst)1322 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1323   // Check for uses of this instruction outside the scop. Because we do not
1324   // iterate over such instructions and therefore did not "ensure" the existence
1325   // of a write, we must determine such use here.
1326   if (scop->isEscaping(Inst))
1327     ensureValueWrite(Inst);
1328 }
1329 
addRecordedAssumptions()1330 void ScopBuilder::addRecordedAssumptions() {
1331   for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1332 
1333     if (!AS.BB) {
1334       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1335                           nullptr /* BasicBlock */, AS.RequiresRTC);
1336       continue;
1337     }
1338 
1339     // If the domain was deleted the assumptions are void.
1340     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1341     if (!Dom)
1342       continue;
1343 
1344     // If a basic block was given use its domain to simplify the assumption.
1345     // In case of restrictions we know they only have to hold on the domain,
1346     // thus we can intersect them with the domain of the block. However, for
1347     // assumptions the domain has to imply them, thus:
1348     //                     _              _____
1349     //   Dom => S   <==>   A v B   <==>   A - B
1350     //
1351     // To avoid the complement we will register A - B as a restriction not an
1352     // assumption.
1353     isl_set *S = AS.Set.copy();
1354     if (AS.Sign == AS_RESTRICTION)
1355       S = isl_set_params(isl_set_intersect(S, Dom));
1356     else /* (AS.Sign == AS_ASSUMPTION) */
1357       S = isl_set_params(isl_set_subtract(Dom, S));
1358 
1359     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1360                         AS.RequiresRTC);
1361   }
1362 }
1363 
addUserAssumptions(AssumptionCache & AC,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)1364 void ScopBuilder::addUserAssumptions(
1365     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1366   for (auto &Assumption : AC.assumptions()) {
1367     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1368     if (!CI || CI->arg_size() != 1)
1369       continue;
1370 
1371     bool InScop = scop->contains(CI);
1372     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1373       continue;
1374 
1375     auto *L = LI.getLoopFor(CI->getParent());
1376     auto *Val = CI->getArgOperand(0);
1377     ParameterSetTy DetectedParams;
1378     auto &R = scop->getRegion();
1379     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1380       ORE.emit(
1381           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1382           << "Non-affine user assumption ignored.");
1383       continue;
1384     }
1385 
1386     // Collect all newly introduced parameters.
1387     ParameterSetTy NewParams;
1388     for (auto *Param : DetectedParams) {
1389       Param = extractConstantFactor(Param, SE).second;
1390       Param = scop->getRepresentingInvariantLoadSCEV(Param);
1391       if (scop->isParam(Param))
1392         continue;
1393       NewParams.insert(Param);
1394     }
1395 
1396     SmallVector<isl_set *, 2> ConditionSets;
1397     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1398     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1399     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1400                        : isl_set_copy(scop->getContext().get());
1401     assert(Dom && "Cannot propagate a nullptr.");
1402     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1403                                     ConditionSets);
1404     isl_set_free(Dom);
1405 
1406     if (!Valid)
1407       continue;
1408 
1409     isl_set *AssumptionCtx = nullptr;
1410     if (InScop) {
1411       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1412       isl_set_free(ConditionSets[0]);
1413     } else {
1414       AssumptionCtx = isl_set_complement(ConditionSets[1]);
1415       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1416     }
1417 
1418     // Project out newly introduced parameters as they are not otherwise useful.
1419     if (!NewParams.empty()) {
1420       for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1421         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1422         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1423         isl_id_free(Id);
1424 
1425         if (!NewParams.count(Param))
1426           continue;
1427 
1428         AssumptionCtx =
1429             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1430       }
1431     }
1432     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1433              << "Use user assumption: "
1434              << stringFromIslObj(AssumptionCtx, "null"));
1435     isl::set newContext =
1436         scop->getContext().intersect(isl::manage(AssumptionCtx));
1437     scop->setContext(newContext);
1438   }
1439 }
1440 
buildAccessMultiDimFixed(MemAccInst Inst,ScopStmt * Stmt)1441 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1442   Value *Val = Inst.getValueOperand();
1443   Type *ElementType = Val->getType();
1444   Value *Address = Inst.getPointerOperand();
1445   const SCEV *AccessFunction =
1446       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1447   const SCEVUnknown *BasePointer =
1448       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1449   enum MemoryAccess::AccessType AccType =
1450       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1451 
1452   if (auto *BitCast = dyn_cast<BitCastInst>(Address))
1453     Address = BitCast->getOperand(0);
1454 
1455   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1456   if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
1457                   DL.getTypeAllocSize(ElementType))
1458     return false;
1459 
1460   SmallVector<const SCEV *, 4> Subscripts;
1461   SmallVector<int, 4> Sizes;
1462   getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1463   auto *BasePtr = GEP->getOperand(0);
1464 
1465   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1466     BasePtr = BasePtrCast->getOperand(0);
1467 
1468   // Check for identical base pointers to ensure that we do not miss index
1469   // offsets that have been added before this GEP is applied.
1470   if (BasePtr != BasePointer->getValue())
1471     return false;
1472 
1473   std::vector<const SCEV *> SizesSCEV;
1474 
1475   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1476 
1477   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1478   for (auto *Subscript : Subscripts) {
1479     InvariantLoadsSetTy AccessILS;
1480     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1481                       &AccessILS))
1482       return false;
1483 
1484     for (LoadInst *LInst : AccessILS)
1485       if (!ScopRIL.count(LInst))
1486         return false;
1487   }
1488 
1489   if (Sizes.empty())
1490     return false;
1491 
1492   SizesSCEV.push_back(nullptr);
1493 
1494   for (auto V : Sizes)
1495     SizesSCEV.push_back(SE.getSCEV(
1496         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1497 
1498   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1499                  true, Subscripts, SizesSCEV, Val);
1500   return true;
1501 }
1502 
buildAccessMultiDimParam(MemAccInst Inst,ScopStmt * Stmt)1503 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1504   if (!PollyDelinearize)
1505     return false;
1506 
1507   Value *Address = Inst.getPointerOperand();
1508   Value *Val = Inst.getValueOperand();
1509   Type *ElementType = Val->getType();
1510   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1511   enum MemoryAccess::AccessType AccType =
1512       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1513 
1514   const SCEV *AccessFunction =
1515       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1516   const SCEVUnknown *BasePointer =
1517       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1518 
1519   assert(BasePointer && "Could not find base pointer");
1520 
1521   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1522   auto AccItr = InsnToMemAcc.find(Inst);
1523   if (AccItr == InsnToMemAcc.end())
1524     return false;
1525 
1526   std::vector<const SCEV *> Sizes = {nullptr};
1527 
1528   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1529                AccItr->second.Shape->DelinearizedSizes.end());
1530 
1531   // In case only the element size is contained in the 'Sizes' array, the
1532   // access does not access a real multi-dimensional array. Hence, we allow
1533   // the normal single-dimensional access construction to handle this.
1534   if (Sizes.size() == 1)
1535     return false;
1536 
1537   // Remove the element size. This information is already provided by the
1538   // ElementSize parameter. In case the element size of this access and the
1539   // element size used for delinearization differs the delinearization is
1540   // incorrect. Hence, we invalidate the scop.
1541   //
1542   // TODO: Handle delinearization with differing element sizes.
1543   auto DelinearizedSize =
1544       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1545   Sizes.pop_back();
1546   if (ElementSize != DelinearizedSize)
1547     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1548 
1549   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1550                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1551   return true;
1552 }
1553 
buildAccessMemIntrinsic(MemAccInst Inst,ScopStmt * Stmt)1554 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1555   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1556 
1557   if (MemIntr == nullptr)
1558     return false;
1559 
1560   auto *L = LI.getLoopFor(Inst->getParent());
1561   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1562   assert(LengthVal);
1563 
1564   // Check if the length val is actually affine or if we overapproximate it
1565   InvariantLoadsSetTy AccessILS;
1566   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1567 
1568   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1569   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1570                                      LengthVal, SE, &AccessILS);
1571   for (LoadInst *LInst : AccessILS)
1572     if (!ScopRIL.count(LInst))
1573       LengthIsAffine = false;
1574   if (!LengthIsAffine)
1575     LengthVal = nullptr;
1576 
1577   auto *DestPtrVal = MemIntr->getDest();
1578   assert(DestPtrVal);
1579 
1580   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1581   assert(DestAccFunc);
1582   // Ignore accesses to "NULL".
1583   // TODO: We could use this to optimize the region further, e.g., intersect
1584   //       the context with
1585   //          isl_set_complement(isl_set_params(getDomain()))
1586   //       as we know it would be undefined to execute this instruction anyway.
1587   if (DestAccFunc->isZero())
1588     return true;
1589 
1590   if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1591     if (isa<ConstantPointerNull>(U->getValue()))
1592       return true;
1593   }
1594 
1595   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1596   assert(DestPtrSCEV);
1597   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1598   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1599                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
1600                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1601                  Inst.getValueOperand());
1602 
1603   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1604   if (!MemTrans)
1605     return true;
1606 
1607   auto *SrcPtrVal = MemTrans->getSource();
1608   assert(SrcPtrVal);
1609 
1610   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1611   assert(SrcAccFunc);
1612   // Ignore accesses to "NULL".
1613   // TODO: See above TODO
1614   if (SrcAccFunc->isZero())
1615     return true;
1616 
1617   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1618   assert(SrcPtrSCEV);
1619   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1620   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1621                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1622                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1623                  Inst.getValueOperand());
1624 
1625   return true;
1626 }
1627 
buildAccessCallInst(MemAccInst Inst,ScopStmt * Stmt)1628 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1629   auto *CI = dyn_cast_or_null<CallInst>(Inst);
1630 
1631   if (CI == nullptr)
1632     return false;
1633 
1634   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1635     return true;
1636 
1637   bool ReadOnly = false;
1638   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1639   auto *CalledFunction = CI->getCalledFunction();
1640   switch (AA.getModRefBehavior(CalledFunction)) {
1641   case FMRB_UnknownModRefBehavior:
1642     llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
1643   case FMRB_DoesNotAccessMemory:
1644     return true;
1645   case FMRB_OnlyWritesMemory:
1646   case FMRB_OnlyWritesInaccessibleMem:
1647   case FMRB_OnlyWritesInaccessibleOrArgMem:
1648   case FMRB_OnlyAccessesInaccessibleMem:
1649   case FMRB_OnlyAccessesInaccessibleOrArgMem:
1650     return false;
1651   case FMRB_OnlyReadsMemory:
1652   case FMRB_OnlyReadsInaccessibleMem:
1653   case FMRB_OnlyReadsInaccessibleOrArgMem:
1654     GlobalReads.emplace_back(Stmt, CI);
1655     return true;
1656   case FMRB_OnlyReadsArgumentPointees:
1657     ReadOnly = true;
1658     LLVM_FALLTHROUGH;
1659   case FMRB_OnlyWritesArgumentPointees:
1660   case FMRB_OnlyAccessesArgumentPointees: {
1661     auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1662     Loop *L = LI.getLoopFor(Inst->getParent());
1663     for (const auto &Arg : CI->args()) {
1664       if (!Arg->getType()->isPointerTy())
1665         continue;
1666 
1667       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1668       if (ArgSCEV->isZero())
1669         continue;
1670 
1671       if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1672         if (isa<ConstantPointerNull>(U->getValue()))
1673           return true;
1674       }
1675 
1676       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1677       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1678                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1679     }
1680     return true;
1681   }
1682   }
1683 
1684   return true;
1685 }
1686 
buildAccessSingleDim(MemAccInst Inst,ScopStmt * Stmt)1687 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1688   Value *Address = Inst.getPointerOperand();
1689   Value *Val = Inst.getValueOperand();
1690   Type *ElementType = Val->getType();
1691   enum MemoryAccess::AccessType AccType =
1692       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1693 
1694   const SCEV *AccessFunction =
1695       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1696   const SCEVUnknown *BasePointer =
1697       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1698 
1699   assert(BasePointer && "Could not find base pointer");
1700   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1701 
1702   // Check if the access depends on a loop contained in a non-affine subregion.
1703   bool isVariantInNonAffineLoop = false;
1704   SetVector<const Loop *> Loops;
1705   findLoops(AccessFunction, Loops);
1706   for (const Loop *L : Loops)
1707     if (Stmt->contains(L)) {
1708       isVariantInNonAffineLoop = true;
1709       break;
1710     }
1711 
1712   InvariantLoadsSetTy AccessILS;
1713 
1714   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1715   bool IsAffine = !isVariantInNonAffineLoop &&
1716                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
1717                                AccessFunction, SE, &AccessILS);
1718 
1719   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1720   for (LoadInst *LInst : AccessILS)
1721     if (!ScopRIL.count(LInst))
1722       IsAffine = false;
1723 
1724   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1725     AccType = MemoryAccess::MAY_WRITE;
1726 
1727   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1728                  IsAffine, {AccessFunction}, {nullptr}, Val);
1729 }
1730 
buildMemoryAccess(MemAccInst Inst,ScopStmt * Stmt)1731 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1732   if (buildAccessMemIntrinsic(Inst, Stmt))
1733     return;
1734 
1735   if (buildAccessCallInst(Inst, Stmt))
1736     return;
1737 
1738   if (buildAccessMultiDimFixed(Inst, Stmt))
1739     return;
1740 
1741   if (buildAccessMultiDimParam(Inst, Stmt))
1742     return;
1743 
1744   buildAccessSingleDim(Inst, Stmt);
1745 }
1746 
buildAccessFunctions()1747 void ScopBuilder::buildAccessFunctions() {
1748   for (auto &Stmt : *scop) {
1749     if (Stmt.isBlockStmt()) {
1750       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1751       continue;
1752     }
1753 
1754     Region *R = Stmt.getRegion();
1755     for (BasicBlock *BB : R->blocks())
1756       buildAccessFunctions(&Stmt, *BB, R);
1757   }
1758 
1759   // Build write accesses for values that are used after the SCoP.
1760   // The instructions defining them might be synthesizable and therefore not
1761   // contained in any statement, hence we iterate over the original instructions
1762   // to identify all escaping values.
1763   for (BasicBlock *BB : scop->getRegion().blocks()) {
1764     for (Instruction &Inst : *BB)
1765       buildEscapingDependences(&Inst);
1766   }
1767 }
1768 
shouldModelInst(Instruction * Inst,Loop * L)1769 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1770   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1771          !canSynthesize(Inst, *scop, &SE, L);
1772 }
1773 
1774 /// Generate a name for a statement.
1775 ///
1776 /// @param BB     The basic block the statement will represent.
1777 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
1778 /// @param Count  The index of the created statement in @p BB.
1779 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1780 ///               no suffix will be added.
1781 /// @param IsLast Uses a special indicator for the last statement of a BB.
makeStmtName(BasicBlock * BB,long BBIdx,int Count,bool IsMain,bool IsLast=false)1782 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1783                                 bool IsMain, bool IsLast = false) {
1784   std::string Suffix;
1785   if (!IsMain) {
1786     if (UseInstructionNames)
1787       Suffix = '_';
1788     if (IsLast)
1789       Suffix += "last";
1790     else if (Count < 26)
1791       Suffix += 'a' + Count;
1792     else
1793       Suffix += std::to_string(Count);
1794   }
1795   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1796 }
1797 
1798 /// Generate a name for a statement that represents a non-affine subregion.
1799 ///
1800 /// @param R    The region the statement will represent.
1801 /// @param RIdx The index of the @p R relative to other BBs/regions.
makeStmtName(Region * R,long RIdx)1802 static std::string makeStmtName(Region *R, long RIdx) {
1803   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1804                               UseInstructionNames);
1805 }
1806 
buildSequentialBlockStmts(BasicBlock * BB,bool SplitOnStore)1807 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1808   Loop *SurroundingLoop = LI.getLoopFor(BB);
1809 
1810   int Count = 0;
1811   long BBIdx = scop->getNextStmtIdx();
1812   std::vector<Instruction *> Instructions;
1813   for (Instruction &Inst : *BB) {
1814     if (shouldModelInst(&Inst, SurroundingLoop))
1815       Instructions.push_back(&Inst);
1816     if (Inst.getMetadata("polly_split_after") ||
1817         (SplitOnStore && isa<StoreInst>(Inst))) {
1818       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1819       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1820       Count++;
1821       Instructions.clear();
1822     }
1823   }
1824 
1825   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1826   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1827 }
1828 
1829 /// Is @p Inst an ordered instruction?
1830 ///
1831 /// An unordered instruction is an instruction, such that a sequence of
1832 /// unordered instructions can be permuted without changing semantics. Any
1833 /// instruction for which this is not always the case is ordered.
isOrderedInstruction(Instruction * Inst)1834 static bool isOrderedInstruction(Instruction *Inst) {
1835   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1836 }
1837 
1838 /// Join instructions to the same statement if one uses the scalar result of the
1839 /// other.
joinOperandTree(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)1840 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1841                             ArrayRef<Instruction *> ModeledInsts) {
1842   for (Instruction *Inst : ModeledInsts) {
1843     if (isa<PHINode>(Inst))
1844       continue;
1845 
1846     for (Use &Op : Inst->operands()) {
1847       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1848       if (!OpInst)
1849         continue;
1850 
1851       // Check if OpInst is in the BB and is a modeled instruction.
1852       auto OpVal = UnionFind.findValue(OpInst);
1853       if (OpVal == UnionFind.end())
1854         continue;
1855 
1856       UnionFind.unionSets(Inst, OpInst);
1857     }
1858   }
1859 }
1860 
1861 /// Ensure that the order of ordered instructions does not change.
1862 ///
1863 /// If we encounter an ordered instruction enclosed in instructions belonging to
1864 /// a different statement (which might as well contain ordered instructions, but
1865 /// this is not tested here), join them.
1866 static void
joinOrderedInstructions(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)1867 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1868                         ArrayRef<Instruction *> ModeledInsts) {
1869   SetVector<Instruction *> SeenLeaders;
1870   for (Instruction *Inst : ModeledInsts) {
1871     if (!isOrderedInstruction(Inst))
1872       continue;
1873 
1874     Instruction *Leader = UnionFind.getLeaderValue(Inst);
1875     // Since previous iterations might have merged sets, some items in
1876     // SeenLeaders are not leaders anymore. However, The new leader of
1877     // previously merged instructions must be one of the former leaders of
1878     // these merged instructions.
1879     bool Inserted = SeenLeaders.insert(Leader);
1880     if (Inserted)
1881       continue;
1882 
1883     // Merge statements to close holes. Say, we have already seen statements A
1884     // and B, in this order. Then we see an instruction of A again and we would
1885     // see the pattern "A B A". This function joins all statements until the
1886     // only seen occurrence of A.
1887     for (Instruction *Prev : reverse(SeenLeaders)) {
1888       // We are backtracking from the last element until we see Inst's leader
1889       // in SeenLeaders and merge all into one set. Although leaders of
1890       // instructions change during the execution of this loop, it's irrelevant
1891       // as we are just searching for the element that we already confirmed is
1892       // in the list.
1893       if (Prev == Leader)
1894         break;
1895       UnionFind.unionSets(Prev, Leader);
1896     }
1897   }
1898 }
1899 
1900 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1901 /// the incoming values from this block are executed after the PHI READ.
1902 ///
1903 /// Otherwise it could overwrite the incoming value from before the BB with the
1904 /// value for the next execution. This can happen if the PHI WRITE is added to
1905 /// the statement with the instruction that defines the incoming value (instead
1906 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1907 /// are in order, we put both into the statement. PHI WRITEs are always executed
1908 /// after PHI READs when they are in the same statement.
1909 ///
1910 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1911 /// WRITE is not put into a statement containing the PHI itself. That could also
1912 /// be done by
1913 /// - having all (strongly connected) PHIs in a single statement,
1914 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1915 ///   has a chance of being lifted before a PHI by being in a statement with a
1916 ///   PHI that comes before in the basic block), or
1917 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
joinOrderedPHIs(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)1918 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1919                             ArrayRef<Instruction *> ModeledInsts) {
1920   for (Instruction *Inst : ModeledInsts) {
1921     PHINode *PHI = dyn_cast<PHINode>(Inst);
1922     if (!PHI)
1923       continue;
1924 
1925     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1926     if (Idx < 0)
1927       continue;
1928 
1929     Instruction *IncomingVal =
1930         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1931     if (!IncomingVal)
1932       continue;
1933 
1934     UnionFind.unionSets(PHI, IncomingVal);
1935   }
1936 }
1937 
buildEqivClassBlockStmts(BasicBlock * BB)1938 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1939   Loop *L = LI.getLoopFor(BB);
1940 
1941   // Extracting out modeled instructions saves us from checking
1942   // shouldModelInst() repeatedly.
1943   SmallVector<Instruction *, 32> ModeledInsts;
1944   EquivalenceClasses<Instruction *> UnionFind;
1945   Instruction *MainInst = nullptr, *MainLeader = nullptr;
1946   for (Instruction &Inst : *BB) {
1947     if (!shouldModelInst(&Inst, L))
1948       continue;
1949     ModeledInsts.push_back(&Inst);
1950     UnionFind.insert(&Inst);
1951 
1952     // When a BB is split into multiple statements, the main statement is the
1953     // one containing the 'main' instruction. We select the first instruction
1954     // that is unlikely to be removed (because it has side-effects) as the main
1955     // one. It is used to ensure that at least one statement from the bb has the
1956     // same name as with -polly-stmt-granularity=bb.
1957     if (!MainInst && (isa<StoreInst>(Inst) ||
1958                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1959       MainInst = &Inst;
1960   }
1961 
1962   joinOperandTree(UnionFind, ModeledInsts);
1963   joinOrderedInstructions(UnionFind, ModeledInsts);
1964   joinOrderedPHIs(UnionFind, ModeledInsts);
1965 
1966   // The list of instructions for statement (statement represented by the leader
1967   // instruction).
1968   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1969 
1970   // The order of statements must be preserved w.r.t. their ordered
1971   // instructions. Without this explicit scan, we would also use non-ordered
1972   // instructions (whose order is arbitrary) to determine statement order.
1973   for (Instruction *Inst : ModeledInsts) {
1974     if (!isOrderedInstruction(Inst))
1975       continue;
1976 
1977     auto LeaderIt = UnionFind.findLeader(Inst);
1978     if (LeaderIt == UnionFind.member_end())
1979       continue;
1980 
1981     // Insert element for the leader instruction.
1982     (void)LeaderToInstList[*LeaderIt];
1983   }
1984 
1985   // Collect the instructions of all leaders. UnionFind's member iterator
1986   // unfortunately are not in any specific order.
1987   for (Instruction *Inst : ModeledInsts) {
1988     auto LeaderIt = UnionFind.findLeader(Inst);
1989     if (LeaderIt == UnionFind.member_end())
1990       continue;
1991 
1992     if (Inst == MainInst)
1993       MainLeader = *LeaderIt;
1994     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
1995     InstList.push_back(Inst);
1996   }
1997 
1998   // Finally build the statements.
1999   int Count = 0;
2000   long BBIdx = scop->getNextStmtIdx();
2001   for (auto &Instructions : LeaderToInstList) {
2002     std::vector<Instruction *> &InstList = Instructions.second;
2003 
2004     // If there is no main instruction, make the first statement the main.
2005     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2006 
2007     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2008     scop->addScopStmt(BB, Name, L, std::move(InstList));
2009     Count += 1;
2010   }
2011 
2012   // Unconditionally add an epilogue (last statement). It contains no
2013   // instructions, but holds the PHI write accesses for successor basic blocks,
2014   // if the incoming value is not defined in another statement if the same BB.
2015   // The epilogue becomes the main statement only if there is no other
2016   // statement that could become main.
2017   // The epilogue will be removed if no PHIWrite is added to it.
2018   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2019   scop->addScopStmt(BB, EpilogueName, L, {});
2020 }
2021 
buildStmts(Region & SR)2022 void ScopBuilder::buildStmts(Region &SR) {
2023   if (scop->isNonAffineSubRegion(&SR)) {
2024     std::vector<Instruction *> Instructions;
2025     Loop *SurroundingLoop =
2026         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2027     for (Instruction &Inst : *SR.getEntry())
2028       if (shouldModelInst(&Inst, SurroundingLoop))
2029         Instructions.push_back(&Inst);
2030     long RIdx = scop->getNextStmtIdx();
2031     std::string Name = makeStmtName(&SR, RIdx);
2032     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2033     return;
2034   }
2035 
2036   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2037     if (I->isSubRegion())
2038       buildStmts(*I->getNodeAs<Region>());
2039     else {
2040       BasicBlock *BB = I->getNodeAs<BasicBlock>();
2041       switch (StmtGranularity) {
2042       case GranularityChoice::BasicBlocks:
2043         buildSequentialBlockStmts(BB);
2044         break;
2045       case GranularityChoice::ScalarIndependence:
2046         buildEqivClassBlockStmts(BB);
2047         break;
2048       case GranularityChoice::Stores:
2049         buildSequentialBlockStmts(BB, true);
2050         break;
2051       }
2052     }
2053 }
2054 
buildAccessFunctions(ScopStmt * Stmt,BasicBlock & BB,Region * NonAffineSubRegion)2055 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2056                                        Region *NonAffineSubRegion) {
2057   assert(
2058       Stmt &&
2059       "The exit BB is the only one that cannot be represented by a statement");
2060   assert(Stmt->represents(&BB));
2061 
2062   // We do not build access functions for error blocks, as they may contain
2063   // instructions we can not model.
2064   if (SD.isErrorBlock(BB, scop->getRegion()))
2065     return;
2066 
2067   auto BuildAccessesForInst = [this, Stmt,
2068                                NonAffineSubRegion](Instruction *Inst) {
2069     PHINode *PHI = dyn_cast<PHINode>(Inst);
2070     if (PHI)
2071       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2072 
2073     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2074       assert(Stmt && "Cannot build access function in non-existing statement");
2075       buildMemoryAccess(MemInst, Stmt);
2076     }
2077 
2078     // PHI nodes have already been modeled above and terminators that are
2079     // not part of a non-affine subregion are fully modeled and regenerated
2080     // from the polyhedral domains. Hence, they do not need to be modeled as
2081     // explicit data dependences.
2082     if (!PHI)
2083       buildScalarDependences(Stmt, Inst);
2084   };
2085 
2086   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2087   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2088   if (IsEntryBlock) {
2089     for (Instruction *Inst : Stmt->getInstructions())
2090       BuildAccessesForInst(Inst);
2091     if (Stmt->isRegionStmt())
2092       BuildAccessesForInst(BB.getTerminator());
2093   } else {
2094     for (Instruction &Inst : BB) {
2095       if (isIgnoredIntrinsic(&Inst))
2096         continue;
2097 
2098       // Invariant loads already have been processed.
2099       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2100         continue;
2101 
2102       BuildAccessesForInst(&Inst);
2103     }
2104   }
2105 }
2106 
addMemoryAccess(ScopStmt * Stmt,Instruction * Inst,MemoryAccess::AccessType AccType,Value * BaseAddress,Type * ElementType,bool Affine,Value * AccessValue,ArrayRef<const SCEV * > Subscripts,ArrayRef<const SCEV * > Sizes,MemoryKind Kind)2107 MemoryAccess *ScopBuilder::addMemoryAccess(
2108     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2109     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2110     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2111     MemoryKind Kind) {
2112   bool isKnownMustAccess = false;
2113 
2114   // Accesses in single-basic block statements are always executed.
2115   if (Stmt->isBlockStmt())
2116     isKnownMustAccess = true;
2117 
2118   if (Stmt->isRegionStmt()) {
2119     // Accesses that dominate the exit block of a non-affine region are always
2120     // executed. In non-affine regions there may exist MemoryKind::Values that
2121     // do not dominate the exit. MemoryKind::Values will always dominate the
2122     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2123     // non-affine region.
2124     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2125       isKnownMustAccess = true;
2126   }
2127 
2128   // Non-affine PHI writes do not "happen" at a particular instruction, but
2129   // after exiting the statement. Therefore they are guaranteed to execute and
2130   // overwrite the old value.
2131   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2132     isKnownMustAccess = true;
2133 
2134   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2135     AccType = MemoryAccess::MAY_WRITE;
2136 
2137   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2138                                   Affine, Subscripts, Sizes, AccessValue, Kind);
2139 
2140   scop->addAccessFunction(Access);
2141   Stmt->addAccess(Access);
2142   return Access;
2143 }
2144 
addArrayAccess(ScopStmt * Stmt,MemAccInst MemAccInst,MemoryAccess::AccessType AccType,Value * BaseAddress,Type * ElementType,bool IsAffine,ArrayRef<const SCEV * > Subscripts,ArrayRef<const SCEV * > Sizes,Value * AccessValue)2145 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2146                                  MemoryAccess::AccessType AccType,
2147                                  Value *BaseAddress, Type *ElementType,
2148                                  bool IsAffine,
2149                                  ArrayRef<const SCEV *> Subscripts,
2150                                  ArrayRef<const SCEV *> Sizes,
2151                                  Value *AccessValue) {
2152   ArrayBasePointers.insert(BaseAddress);
2153   addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
2154                   AccessValue, Subscripts, Sizes, MemoryKind::Array);
2155 }
2156 
2157 /// Check if @p Expr is divisible by @p Size.
isDivisible(const SCEV * Expr,unsigned Size,ScalarEvolution & SE)2158 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2159   assert(Size != 0);
2160   if (Size == 1)
2161     return true;
2162 
2163   // Only one factor needs to be divisible.
2164   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2165     for (auto *FactorExpr : MulExpr->operands())
2166       if (isDivisible(FactorExpr, Size, SE))
2167         return true;
2168     return false;
2169   }
2170 
2171   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2172   // to be divisible.
2173   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2174     for (auto *OpExpr : NAryExpr->operands())
2175       if (!isDivisible(OpExpr, Size, SE))
2176         return false;
2177     return true;
2178   }
2179 
2180   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2181   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2182   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2183   return MulSCEV == Expr;
2184 }
2185 
foldSizeConstantsToRight()2186 void ScopBuilder::foldSizeConstantsToRight() {
2187   isl::union_set Accessed = scop->getAccesses().range();
2188 
2189   for (auto Array : scop->arrays()) {
2190     if (Array->getNumberOfDimensions() <= 1)
2191       continue;
2192 
2193     isl::space Space = Array->getSpace();
2194     Space = Space.align_params(Accessed.get_space());
2195 
2196     if (!Accessed.contains(Space))
2197       continue;
2198 
2199     isl::set Elements = Accessed.extract_set(Space);
2200     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2201 
2202     std::vector<int> Int;
2203     unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
2204     for (unsigned i = 0; i < Dims; i++) {
2205       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2206       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2207       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2208 
2209       isl::basic_set DimHull = DimOnly.affine_hull();
2210 
2211       if (i == Dims - 1) {
2212         Int.push_back(1);
2213         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2214         continue;
2215       }
2216 
2217       if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
2218         isl::aff Diff = DimHull.get_div(0);
2219         isl::val Val = Diff.get_denominator_val();
2220 
2221         int ValInt = 1;
2222         if (Val.is_int()) {
2223           auto ValAPInt = APIntFromVal(Val);
2224           if (ValAPInt.isSignedIntN(32))
2225             ValInt = ValAPInt.getSExtValue();
2226         } else {
2227         }
2228 
2229         Int.push_back(ValInt);
2230         isl::constraint C = isl::constraint::alloc_equality(
2231             isl::local_space(Transform.get_space()));
2232         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2233         C = C.set_coefficient_si(isl::dim::in, i, -1);
2234         Transform = Transform.add_constraint(C);
2235         continue;
2236       }
2237 
2238       isl::basic_set ZeroSet = isl::basic_set(DimHull);
2239       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2240 
2241       int ValInt = 1;
2242       if (ZeroSet.is_equal(DimHull)) {
2243         ValInt = 0;
2244       }
2245 
2246       Int.push_back(ValInt);
2247       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2248     }
2249 
2250     isl::set MappedElements = isl::map(Transform).domain();
2251     if (!Elements.is_subset(MappedElements))
2252       continue;
2253 
2254     bool CanFold = true;
2255     if (Int[0] <= 1)
2256       CanFold = false;
2257 
2258     unsigned NumDims = Array->getNumberOfDimensions();
2259     for (unsigned i = 1; i < NumDims - 1; i++)
2260       if (Int[0] != Int[i] && Int[i])
2261         CanFold = false;
2262 
2263     if (!CanFold)
2264       continue;
2265 
2266     for (auto &Access : scop->access_functions())
2267       if (Access->getScopArrayInfo() == Array)
2268         Access->setAccessRelation(
2269             Access->getAccessRelation().apply_range(Transform));
2270 
2271     std::vector<const SCEV *> Sizes;
2272     for (unsigned i = 0; i < NumDims; i++) {
2273       auto Size = Array->getDimensionSize(i);
2274 
2275       if (i == NumDims - 1)
2276         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2277       Sizes.push_back(Size);
2278     }
2279 
2280     Array->updateSizes(Sizes, false /* CheckConsistency */);
2281   }
2282 }
2283 
finalizeAccesses()2284 void ScopBuilder::finalizeAccesses() {
2285   updateAccessDimensionality();
2286   foldSizeConstantsToRight();
2287   foldAccessRelations();
2288   assumeNoOutOfBounds();
2289 }
2290 
updateAccessDimensionality()2291 void ScopBuilder::updateAccessDimensionality() {
2292   // Check all array accesses for each base pointer and find a (virtual) element
2293   // size for the base pointer that divides all access functions.
2294   for (ScopStmt &Stmt : *scop)
2295     for (MemoryAccess *Access : Stmt) {
2296       if (!Access->isArrayKind())
2297         continue;
2298       ScopArrayInfo *Array =
2299           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2300 
2301       if (Array->getNumberOfDimensions() != 1)
2302         continue;
2303       unsigned DivisibleSize = Array->getElemSizeInBytes();
2304       const SCEV *Subscript = Access->getSubscript(0);
2305       while (!isDivisible(Subscript, DivisibleSize, SE))
2306         DivisibleSize /= 2;
2307       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2308       Array->updateElementType(Ty);
2309     }
2310 
2311   for (auto &Stmt : *scop)
2312     for (auto &Access : Stmt)
2313       Access->updateDimensionality();
2314 }
2315 
foldAccessRelations()2316 void ScopBuilder::foldAccessRelations() {
2317   for (auto &Stmt : *scop)
2318     for (auto &Access : Stmt)
2319       Access->foldAccessRelation();
2320 }
2321 
assumeNoOutOfBounds()2322 void ScopBuilder::assumeNoOutOfBounds() {
2323   if (PollyIgnoreInbounds)
2324     return;
2325   for (auto &Stmt : *scop)
2326     for (auto &Access : Stmt) {
2327       isl::set Outside = Access->assumeNoOutOfBound();
2328       const auto &Loc = Access->getAccessInstruction()
2329                             ? Access->getAccessInstruction()->getDebugLoc()
2330                             : DebugLoc();
2331       recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
2332                        AS_ASSUMPTION);
2333     }
2334 }
2335 
ensureValueWrite(Instruction * Inst)2336 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2337   // Find the statement that defines the value of Inst. That statement has to
2338   // write the value to make it available to those statements that read it.
2339   ScopStmt *Stmt = scop->getStmtFor(Inst);
2340 
2341   // It is possible that the value is synthesizable within a loop (such that it
2342   // is not part of any statement), but not after the loop (where you need the
2343   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2344   // avoid this. In case the IR has no such PHI, use the last statement (where
2345   // the value is synthesizable) to write the value.
2346   if (!Stmt)
2347     Stmt = scop->getLastStmtFor(Inst->getParent());
2348 
2349   // Inst not defined within this SCoP.
2350   if (!Stmt)
2351     return;
2352 
2353   // Do not process further if the instruction is already written.
2354   if (Stmt->lookupValueWriteOf(Inst))
2355     return;
2356 
2357   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2358                   true, Inst, ArrayRef<const SCEV *>(),
2359                   ArrayRef<const SCEV *>(), MemoryKind::Value);
2360 }
2361 
ensureValueRead(Value * V,ScopStmt * UserStmt)2362 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2363   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2364   // to be able to replace this one. Currently, there is a split responsibility.
2365   // In a first step, the MemoryAccess is created, but without the
2366   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2367   // AccessRelation is created. At least for scalar accesses, there is no new
2368   // information available at ScopStmt::buildAccessRelations(), so we could
2369   // create the AccessRelation right away. This is what
2370   // ScopStmt::ensureValueRead(Value*) does.
2371 
2372   auto *Scope = UserStmt->getSurroundingLoop();
2373   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2374   switch (VUse.getKind()) {
2375   case VirtualUse::Constant:
2376   case VirtualUse::Block:
2377   case VirtualUse::Synthesizable:
2378   case VirtualUse::Hoisted:
2379   case VirtualUse::Intra:
2380     // Uses of these kinds do not need a MemoryAccess.
2381     break;
2382 
2383   case VirtualUse::ReadOnly:
2384     // Add MemoryAccess for invariant values only if requested.
2385     if (!ModelReadOnlyScalars)
2386       break;
2387 
2388     LLVM_FALLTHROUGH;
2389   case VirtualUse::Inter:
2390 
2391     // Do not create another MemoryAccess for reloading the value if one already
2392     // exists.
2393     if (UserStmt->lookupValueReadOf(V))
2394       break;
2395 
2396     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2397                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2398                     MemoryKind::Value);
2399 
2400     // Inter-statement uses need to write the value in their defining statement.
2401     if (VUse.isInter())
2402       ensureValueWrite(cast<Instruction>(V));
2403     break;
2404   }
2405 }
2406 
ensurePHIWrite(PHINode * PHI,ScopStmt * IncomingStmt,BasicBlock * IncomingBlock,Value * IncomingValue,bool IsExitBlock)2407 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2408                                  BasicBlock *IncomingBlock,
2409                                  Value *IncomingValue, bool IsExitBlock) {
2410   // As the incoming block might turn out to be an error statement ensure we
2411   // will create an exit PHI SAI object. It is needed during code generation
2412   // and would be created later anyway.
2413   if (IsExitBlock)
2414     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2415                                    MemoryKind::ExitPHI);
2416 
2417   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2418   // from outside the SCoP's region have no statement representation.
2419   if (!IncomingStmt)
2420     return;
2421 
2422   // Take care for the incoming value being available in the incoming block.
2423   // This must be done before the check for multiple PHI writes because multiple
2424   // exiting edges from subregion each can be the effective written value of the
2425   // subregion. As such, all of them must be made available in the subregion
2426   // statement.
2427   ensureValueRead(IncomingValue, IncomingStmt);
2428 
2429   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2430   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2431     assert(Acc->getAccessInstruction() == PHI);
2432     Acc->addIncoming(IncomingBlock, IncomingValue);
2433     return;
2434   }
2435 
2436   MemoryAccess *Acc = addMemoryAccess(
2437       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2438       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2439       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2440   assert(Acc);
2441   Acc->addIncoming(IncomingBlock, IncomingValue);
2442 }
2443 
addPHIReadAccess(ScopStmt * PHIStmt,PHINode * PHI)2444 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2445   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2446                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2447                   MemoryKind::PHI);
2448 }
2449 
buildDomain(ScopStmt & Stmt)2450 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2451   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2452 
2453   Stmt.Domain = scop->getDomainConditions(&Stmt);
2454   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2455 }
2456 
collectSurroundingLoops(ScopStmt & Stmt)2457 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2458   isl::set Domain = Stmt.getDomain();
2459   BasicBlock *BB = Stmt.getEntryBlock();
2460 
2461   Loop *L = LI.getLoopFor(BB);
2462 
2463   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2464     L = L->getParentLoop();
2465 
2466   SmallVector<llvm::Loop *, 8> Loops;
2467 
2468   while (L && Stmt.getParent()->getRegion().contains(L)) {
2469     Loops.push_back(L);
2470     L = L->getParentLoop();
2471   }
2472 
2473   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2474 }
2475 
2476 /// Return the reduction type for a given binary operator.
getReductionType(const BinaryOperator * BinOp,const Instruction * Load)2477 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2478                                                     const Instruction *Load) {
2479   if (!BinOp)
2480     return MemoryAccess::RT_NONE;
2481   switch (BinOp->getOpcode()) {
2482   case Instruction::FAdd:
2483     if (!BinOp->isFast())
2484       return MemoryAccess::RT_NONE;
2485     LLVM_FALLTHROUGH;
2486   case Instruction::Add:
2487     return MemoryAccess::RT_ADD;
2488   case Instruction::Or:
2489     return MemoryAccess::RT_BOR;
2490   case Instruction::Xor:
2491     return MemoryAccess::RT_BXOR;
2492   case Instruction::And:
2493     return MemoryAccess::RT_BAND;
2494   case Instruction::FMul:
2495     if (!BinOp->isFast())
2496       return MemoryAccess::RT_NONE;
2497     LLVM_FALLTHROUGH;
2498   case Instruction::Mul:
2499     if (DisableMultiplicativeReductions)
2500       return MemoryAccess::RT_NONE;
2501     return MemoryAccess::RT_MUL;
2502   default:
2503     return MemoryAccess::RT_NONE;
2504   }
2505 }
2506 
checkForReductions(ScopStmt & Stmt)2507 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2508   SmallVector<MemoryAccess *, 2> Loads;
2509   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2510 
2511   // First collect candidate load-store reduction chains by iterating over all
2512   // stores and collecting possible reduction loads.
2513   for (MemoryAccess *StoreMA : Stmt) {
2514     if (StoreMA->isRead())
2515       continue;
2516 
2517     Loads.clear();
2518     collectCandidateReductionLoads(StoreMA, Loads);
2519     for (MemoryAccess *LoadMA : Loads)
2520       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
2521   }
2522 
2523   // Then check each possible candidate pair.
2524   for (const auto &CandidatePair : Candidates) {
2525     bool Valid = true;
2526     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
2527     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
2528 
2529     // Skip those with obviously unequal base addresses.
2530     if (!LoadAccs.has_equal_space(StoreAccs)) {
2531       continue;
2532     }
2533 
2534     // And check if the remaining for overlap with other memory accesses.
2535     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2536     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
2537     isl::set AllAccs = AllAccsRel.range();
2538 
2539     for (MemoryAccess *MA : Stmt) {
2540       if (MA == CandidatePair.first || MA == CandidatePair.second)
2541         continue;
2542 
2543       isl::map AccRel =
2544           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
2545       isl::set Accs = AccRel.range();
2546 
2547       if (AllAccs.has_equal_space(Accs)) {
2548         isl::set OverlapAccs = Accs.intersect(AllAccs);
2549         Valid = Valid && OverlapAccs.is_empty();
2550       }
2551     }
2552 
2553     if (!Valid)
2554       continue;
2555 
2556     const LoadInst *Load =
2557         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
2558     MemoryAccess::ReductionType RT =
2559         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
2560 
2561     // If no overlapping access was found we mark the load and store as
2562     // reduction like.
2563     CandidatePair.first->markAsReductionLike(RT);
2564     CandidatePair.second->markAsReductionLike(RT);
2565   }
2566 }
2567 
verifyInvariantLoads()2568 void ScopBuilder::verifyInvariantLoads() {
2569   auto &RIL = scop->getRequiredInvariantLoads();
2570   for (LoadInst *LI : RIL) {
2571     assert(LI && scop->contains(LI));
2572     // If there exists a statement in the scop which has a memory access for
2573     // @p LI, then mark this scop as infeasible for optimization.
2574     for (ScopStmt &Stmt : *scop)
2575       if (Stmt.getArrayAccessOrNULLFor(LI)) {
2576         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2577         return;
2578       }
2579   }
2580 }
2581 
hoistInvariantLoads()2582 void ScopBuilder::hoistInvariantLoads() {
2583   if (!PollyInvariantLoadHoisting)
2584     return;
2585 
2586   isl::union_map Writes = scop->getWrites();
2587   for (ScopStmt &Stmt : *scop) {
2588     InvariantAccessesTy InvariantAccesses;
2589 
2590     for (MemoryAccess *Access : Stmt) {
2591       isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2592       if (!NHCtx.is_null())
2593         InvariantAccesses.push_back({Access, NHCtx});
2594     }
2595 
2596     // Transfer the memory access from the statement to the SCoP.
2597     for (auto InvMA : InvariantAccesses)
2598       Stmt.removeMemoryAccess(InvMA.MA);
2599     addInvariantLoads(Stmt, InvariantAccesses);
2600   }
2601 }
2602 
2603 /// Check if an access range is too complex.
2604 ///
2605 /// An access range is too complex, if it contains either many disjuncts or
2606 /// very complex expressions. As a simple heuristic, we assume if a set to
2607 /// be too complex if the sum of existentially quantified dimensions and
2608 /// set dimensions is larger than a threshold. This reliably detects both
2609 /// sets with many disjuncts as well as sets with many divisions as they
2610 /// arise in h264.
2611 ///
2612 /// @param AccessRange The range to check for complexity.
2613 ///
2614 /// @returns True if the access range is too complex.
isAccessRangeTooComplex(isl::set AccessRange)2615 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2616   unsigned NumTotalDims = 0;
2617 
2618   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2619     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
2620     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
2621   }
2622 
2623   if (NumTotalDims > MaxDimensionsInAccessRange)
2624     return true;
2625 
2626   return false;
2627 }
2628 
hasNonHoistableBasePtrInScop(MemoryAccess * MA,isl::union_map Writes)2629 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2630                                                isl::union_map Writes) {
2631   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2632     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2633   }
2634 
2635   Value *BaseAddr = MA->getOriginalBaseAddr();
2636   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2637     if (!isa<LoadInst>(BasePtrInst))
2638       return scop->contains(BasePtrInst);
2639 
2640   return false;
2641 }
2642 
addUserContext()2643 void ScopBuilder::addUserContext() {
2644   if (UserContextStr.empty())
2645     return;
2646 
2647   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2648   isl::space Space = scop->getParamSpace();
2649   isl::size SpaceParams = Space.dim(isl::dim::param);
2650   if (unsignedFromIslSize(SpaceParams) !=
2651       unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
2652     std::string SpaceStr = stringFromIslObj(Space, "null");
2653     errs() << "Error: the context provided in -polly-context has not the same "
2654            << "number of dimensions than the computed context. Due to this "
2655            << "mismatch, the -polly-context option is ignored. Please provide "
2656            << "the context in the parameter space: " << SpaceStr << ".\n";
2657     return;
2658   }
2659 
2660   for (auto i : rangeIslSize(0, SpaceParams)) {
2661     std::string NameContext =
2662         scop->getContext().get_dim_name(isl::dim::param, i);
2663     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2664 
2665     if (NameContext != NameUserContext) {
2666       std::string SpaceStr = stringFromIslObj(Space, "null");
2667       errs() << "Error: the name of dimension " << i
2668              << " provided in -polly-context "
2669              << "is '" << NameUserContext << "', but the name in the computed "
2670              << "context is '" << NameContext
2671              << "'. Due to this name mismatch, "
2672              << "the -polly-context option is ignored. Please provide "
2673              << "the context in the parameter space: " << SpaceStr << ".\n";
2674       return;
2675     }
2676 
2677     UserContext = UserContext.set_dim_id(isl::dim::param, i,
2678                                          Space.get_dim_id(isl::dim::param, i));
2679   }
2680   isl::set newContext = scop->getContext().intersect(UserContext);
2681   scop->setContext(newContext);
2682 }
2683 
getNonHoistableCtx(MemoryAccess * Access,isl::union_map Writes)2684 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2685                                          isl::union_map Writes) {
2686   // TODO: Loads that are not loop carried, hence are in a statement with
2687   //       zero iterators, are by construction invariant, though we
2688   //       currently "hoist" them anyway. This is necessary because we allow
2689   //       them to be treated as parameters (e.g., in conditions) and our code
2690   //       generation would otherwise use the old value.
2691 
2692   auto &Stmt = *Access->getStatement();
2693   BasicBlock *BB = Stmt.getEntryBlock();
2694 
2695   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2696       Access->isMemoryIntrinsic())
2697     return {};
2698 
2699   // Skip accesses that have an invariant base pointer which is defined but
2700   // not loaded inside the SCoP. This can happened e.g., if a readnone call
2701   // returns a pointer that is used as a base address. However, as we want
2702   // to hoist indirect pointers, we allow the base pointer to be defined in
2703   // the region if it is also a memory access. Each ScopArrayInfo object
2704   // that has a base pointer origin has a base pointer that is loaded and
2705   // that it is invariant, thus it will be hoisted too. However, if there is
2706   // no base pointer origin we check that the base pointer is defined
2707   // outside the region.
2708   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2709   if (hasNonHoistableBasePtrInScop(Access, Writes))
2710     return {};
2711 
2712   isl::map AccessRelation = Access->getAccessRelation();
2713   assert(!AccessRelation.is_empty());
2714 
2715   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2716     return {};
2717 
2718   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2719   isl::set SafeToLoad;
2720 
2721   auto &DL = scop->getFunction().getParent()->getDataLayout();
2722   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2723                                   LI->getAlign(), DL)) {
2724     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2725   } else if (BB != LI->getParent()) {
2726     // Skip accesses in non-affine subregions as they might not be executed
2727     // under the same condition as the entry of the non-affine subregion.
2728     return {};
2729   } else {
2730     SafeToLoad = AccessRelation.range();
2731   }
2732 
2733   if (isAccessRangeTooComplex(AccessRelation.range()))
2734     return {};
2735 
2736   isl::union_map Written = Writes.intersect_range(SafeToLoad);
2737   isl::set WrittenCtx = Written.params();
2738   bool IsWritten = !WrittenCtx.is_empty();
2739 
2740   if (!IsWritten)
2741     return WrittenCtx;
2742 
2743   WrittenCtx = WrittenCtx.remove_divs();
2744   bool TooComplex =
2745       unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2746   if (TooComplex || !isRequiredInvariantLoad(LI))
2747     return {};
2748 
2749   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2750                       AS_RESTRICTION, LI->getParent());
2751   return WrittenCtx;
2752 }
2753 
isAParameter(llvm::Value * maybeParam,const Function & F)2754 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2755   for (const llvm::Argument &Arg : F.args())
2756     if (&Arg == maybeParam)
2757       return true;
2758 
2759   return false;
2760 }
2761 
canAlwaysBeHoisted(MemoryAccess * MA,bool StmtInvalidCtxIsEmpty,bool MAInvalidCtxIsEmpty,bool NonHoistableCtxIsEmpty)2762 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2763                                      bool StmtInvalidCtxIsEmpty,
2764                                      bool MAInvalidCtxIsEmpty,
2765                                      bool NonHoistableCtxIsEmpty) {
2766   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2767   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2768   if (PollyAllowDereferenceOfAllFunctionParams &&
2769       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2770     return true;
2771 
2772   // TODO: We can provide more information for better but more expensive
2773   //       results.
2774   if (!isDereferenceableAndAlignedPointer(
2775           LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2776     return false;
2777 
2778   // If the location might be overwritten we do not hoist it unconditionally.
2779   //
2780   // TODO: This is probably too conservative.
2781   if (!NonHoistableCtxIsEmpty)
2782     return false;
2783 
2784   // If a dereferenceable load is in a statement that is modeled precisely we
2785   // can hoist it.
2786   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2787     return true;
2788 
2789   // Even if the statement is not modeled precisely we can hoist the load if it
2790   // does not involve any parameters that might have been specialized by the
2791   // statement domain.
2792   for (const SCEV *Subscript : MA->subscripts())
2793     if (!isa<SCEVConstant>(Subscript))
2794       return false;
2795   return true;
2796 }
2797 
addInvariantLoads(ScopStmt & Stmt,InvariantAccessesTy & InvMAs)2798 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2799                                     InvariantAccessesTy &InvMAs) {
2800   if (InvMAs.empty())
2801     return;
2802 
2803   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2804   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2805 
2806   // Get the context under which the statement is executed but remove the error
2807   // context under which this statement is reached.
2808   isl::set DomainCtx = Stmt.getDomain().params();
2809   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
2810 
2811   if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
2812     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2813     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
2814     return;
2815   }
2816 
2817   // Project out all parameters that relate to loads in the statement. Otherwise
2818   // we could have cyclic dependences on the constraints under which the
2819   // hoisted loads are executed and we could not determine an order in which to
2820   // pre-load them. This happens because not only lower bounds are part of the
2821   // domain but also upper bounds.
2822   for (auto &InvMA : InvMAs) {
2823     auto *MA = InvMA.MA;
2824     Instruction *AccInst = MA->getAccessInstruction();
2825     if (SE.isSCEVable(AccInst->getType())) {
2826       SetVector<Value *> Values;
2827       for (const SCEV *Parameter : scop->parameters()) {
2828         Values.clear();
2829         findValues(Parameter, SE, Values);
2830         if (!Values.count(AccInst))
2831           continue;
2832 
2833         isl::id ParamId = scop->getIdForParam(Parameter);
2834         if (!ParamId.is_null()) {
2835           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
2836           if (Dim >= 0)
2837             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
2838         }
2839       }
2840     }
2841   }
2842 
2843   for (auto &InvMA : InvMAs) {
2844     auto *MA = InvMA.MA;
2845     isl::set NHCtx = InvMA.NonHoistableCtx;
2846 
2847     // Check for another invariant access that accesses the same location as
2848     // MA and if found consolidate them. Otherwise create a new equivalence
2849     // class at the end of InvariantEquivClasses.
2850     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2851     Type *Ty = LInst->getType();
2852     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
2853 
2854     isl::set MAInvalidCtx = MA->getInvalidContext();
2855     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2856     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2857 
2858     isl::set MACtx;
2859     // Check if we know that this pointer can be speculatively accessed.
2860     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2861                            NonHoistableCtxIsEmpty)) {
2862       MACtx = isl::set::universe(DomainCtx.get_space());
2863     } else {
2864       MACtx = DomainCtx;
2865       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
2866       MACtx = MACtx.gist_params(scop->getContext());
2867     }
2868 
2869     bool Consolidated = false;
2870     for (auto &IAClass : scop->invariantEquivClasses()) {
2871       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
2872         continue;
2873 
2874       // If the pointer and the type is equal check if the access function wrt.
2875       // to the domain is equal too. It can happen that the domain fixes
2876       // parameter values and these can be different for distinct part of the
2877       // SCoP. If this happens we cannot consolidate the loads but need to
2878       // create a new invariant load equivalence class.
2879       auto &MAs = IAClass.InvariantAccesses;
2880       if (!MAs.empty()) {
2881         auto *LastMA = MAs.front();
2882 
2883         isl::set AR = MA->getAccessRelation().range();
2884         isl::set LastAR = LastMA->getAccessRelation().range();
2885         bool SameAR = AR.is_equal(LastAR);
2886 
2887         if (!SameAR)
2888           continue;
2889       }
2890 
2891       // Add MA to the list of accesses that are in this class.
2892       MAs.push_front(MA);
2893 
2894       Consolidated = true;
2895 
2896       // Unify the execution context of the class and this statement.
2897       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2898       if (!IAClassDomainCtx.is_null())
2899         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
2900       else
2901         IAClassDomainCtx = MACtx;
2902       IAClass.ExecutionContext = IAClassDomainCtx;
2903       break;
2904     }
2905 
2906     if (Consolidated)
2907       continue;
2908 
2909     MACtx = MACtx.coalesce();
2910 
2911     // If we did not consolidate MA, thus did not find an equivalence class
2912     // for it, we create a new one.
2913     scop->addInvariantEquivClass(
2914         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
2915   }
2916 }
2917 
collectCandidateReductionLoads(MemoryAccess * StoreMA,SmallVectorImpl<MemoryAccess * > & Loads)2918 void ScopBuilder::collectCandidateReductionLoads(
2919     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
2920   ScopStmt *Stmt = StoreMA->getStatement();
2921 
2922   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
2923   if (!Store)
2924     return;
2925 
2926   // Skip if there is not one binary operator between the load and the store
2927   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
2928   if (!BinOp)
2929     return;
2930 
2931   // Skip if the binary operators has multiple uses
2932   if (BinOp->getNumUses() != 1)
2933     return;
2934 
2935   // Skip if the opcode of the binary operator is not commutative/associative
2936   if (!BinOp->isCommutative() || !BinOp->isAssociative())
2937     return;
2938 
2939   // Skip if the binary operator is outside the current SCoP
2940   if (BinOp->getParent() != Store->getParent())
2941     return;
2942 
2943   // Skip if it is a multiplicative reduction and we disabled them
2944   if (DisableMultiplicativeReductions &&
2945       (BinOp->getOpcode() == Instruction::Mul ||
2946        BinOp->getOpcode() == Instruction::FMul))
2947     return;
2948 
2949   // Check the binary operator operands for a candidate load
2950   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
2951   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
2952   if (!PossibleLoad0 && !PossibleLoad1)
2953     return;
2954 
2955   // A load is only a candidate if it cannot escape (thus has only this use)
2956   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
2957     if (PossibleLoad0->getParent() == Store->getParent())
2958       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
2959   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
2960     if (PossibleLoad1->getParent() == Store->getParent())
2961       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
2962 }
2963 
2964 /// Find the canonical scop array info object for a set of invariant load
2965 /// hoisted loads. The canonical array is the one that corresponds to the
2966 /// first load in the list of accesses which is used as base pointer of a
2967 /// scop array.
findCanonicalArray(Scop & S,MemoryAccessList & Accesses)2968 static const ScopArrayInfo *findCanonicalArray(Scop &S,
2969                                                MemoryAccessList &Accesses) {
2970   for (MemoryAccess *Access : Accesses) {
2971     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
2972         Access->getAccessInstruction(), MemoryKind::Array);
2973     if (CanonicalArray)
2974       return CanonicalArray;
2975   }
2976   return nullptr;
2977 }
2978 
2979 /// Check if @p Array severs as base array in an invariant load.
isUsedForIndirectHoistedLoad(Scop & S,const ScopArrayInfo * Array)2980 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
2981   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
2982     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
2983       if (Access2->getScopArrayInfo() == Array)
2984         return true;
2985   return false;
2986 }
2987 
2988 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
2989 /// with a reference to @p New.
replaceBasePtrArrays(Scop & S,const ScopArrayInfo * Old,const ScopArrayInfo * New)2990 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
2991                                  const ScopArrayInfo *New) {
2992   for (ScopStmt &Stmt : S)
2993     for (MemoryAccess *Access : Stmt) {
2994       if (Access->getLatestScopArrayInfo() != Old)
2995         continue;
2996 
2997       isl::id Id = New->getBasePtrId();
2998       isl::map Map = Access->getAccessRelation();
2999       Map = Map.set_tuple_id(isl::dim::out, Id);
3000       Access->setAccessRelation(Map);
3001     }
3002 }
3003 
canonicalizeDynamicBasePtrs()3004 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3005   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3006     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3007 
3008     const ScopArrayInfo *CanonicalBasePtrSAI =
3009         findCanonicalArray(*scop, BasePtrAccesses);
3010 
3011     if (!CanonicalBasePtrSAI)
3012       continue;
3013 
3014     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3015       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3016           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3017       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3018           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3019         continue;
3020 
3021       // we currently do not canonicalize arrays where some accesses are
3022       // hoisted as invariant loads. If we would, we need to update the access
3023       // function of the invariant loads as well. However, as this is not a
3024       // very common situation, we leave this for now to avoid further
3025       // complexity increases.
3026       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3027         continue;
3028 
3029       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3030     }
3031   }
3032 }
3033 
buildAccessRelations(ScopStmt & Stmt)3034 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3035   for (MemoryAccess *Access : Stmt.MemAccs) {
3036     Type *ElementType = Access->getElementType();
3037 
3038     MemoryKind Ty;
3039     if (Access->isPHIKind())
3040       Ty = MemoryKind::PHI;
3041     else if (Access->isExitPHIKind())
3042       Ty = MemoryKind::ExitPHI;
3043     else if (Access->isValueKind())
3044       Ty = MemoryKind::Value;
3045     else
3046       Ty = MemoryKind::Array;
3047 
3048     // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3049     // assumptions which are taken. isl::pw_aff objects are cached internally
3050     // and they are used later by scop.
3051     for (const SCEV *Size : Access->Sizes) {
3052       if (!Size)
3053         continue;
3054       scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3055     }
3056     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3057                                                ElementType, Access->Sizes, Ty);
3058 
3059     // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3060     // assumptions which are taken. isl::pw_aff objects are cached internally
3061     // and they are used later by scop.
3062     for (const SCEV *Subscript : Access->subscripts()) {
3063       if (!Access->isAffine() || !Subscript)
3064         continue;
3065       scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3066                      &RecordedAssumptions);
3067     }
3068     Access->buildAccessRelation(SAI);
3069     scop->addAccessData(Access);
3070   }
3071 }
3072 
3073 /// Add the minimal/maximal access in @p Set to @p User.
3074 ///
3075 /// @return True if more accesses should be added, false if we reached the
3076 ///         maximal number of run-time checks to be generated.
buildMinMaxAccess(isl::set Set,Scop::MinMaxVectorTy & MinMaxAccesses,Scop & S)3077 static bool buildMinMaxAccess(isl::set Set,
3078                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3079   isl::pw_multi_aff MinPMA, MaxPMA;
3080   isl::pw_aff LastDimAff;
3081   isl::aff OneAff;
3082   unsigned Pos;
3083 
3084   Set = Set.remove_divs();
3085   polly::simplify(Set);
3086 
3087   if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3088     Set = Set.simple_hull();
3089 
3090   // Restrict the number of parameters involved in the access as the lexmin/
3091   // lexmax computation will take too long if this number is high.
3092   //
3093   // Experiments with a simple test case using an i7 4800MQ:
3094   //
3095   //  #Parameters involved | Time (in sec)
3096   //            6          |     0.01
3097   //            7          |     0.04
3098   //            8          |     0.12
3099   //            9          |     0.40
3100   //           10          |     1.54
3101   //           11          |     6.78
3102   //           12          |    30.38
3103   //
3104   if (isl_set_n_param(Set.get()) >
3105       static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3106     unsigned InvolvedParams = 0;
3107     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3108       if (Set.involves_dims(isl::dim::param, u, 1))
3109         InvolvedParams++;
3110 
3111     if (InvolvedParams > RunTimeChecksMaxParameters)
3112       return false;
3113   }
3114 
3115   MinPMA = Set.lexmin_pw_multi_aff();
3116   MaxPMA = Set.lexmax_pw_multi_aff();
3117 
3118   MinPMA = MinPMA.coalesce();
3119   MaxPMA = MaxPMA.coalesce();
3120 
3121   if (MaxPMA.is_null())
3122     return false;
3123 
3124   unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
3125 
3126   // Adjust the last dimension of the maximal access by one as we want to
3127   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3128   // we test during code generation might now point after the end of the
3129   // allocated array but we will never dereference it anyway.
3130   assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3131 
3132   Pos = MaxOutputSize - 1;
3133   LastDimAff = MaxPMA.at(Pos);
3134   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3135   OneAff = OneAff.add_constant_si(1);
3136   LastDimAff = LastDimAff.add(OneAff);
3137   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3138 
3139   if (MinPMA.is_null() || MaxPMA.is_null())
3140     return false;
3141 
3142   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3143 
3144   return true;
3145 }
3146 
3147 /// Wrapper function to calculate minimal/maximal accesses to each array.
calculateMinMaxAccess(AliasGroupTy AliasGroup,Scop::MinMaxVectorTy & MinMaxAccesses)3148 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3149                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
3150   MinMaxAccesses.reserve(AliasGroup.size());
3151 
3152   isl::union_set Domains = scop->getDomains();
3153   isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
3154 
3155   for (MemoryAccess *MA : AliasGroup)
3156     Accesses = Accesses.unite(MA->getAccessRelation());
3157 
3158   Accesses = Accesses.intersect_domain(Domains);
3159   isl::union_set Locations = Accesses.range();
3160 
3161   bool LimitReached = false;
3162   for (isl::set Set : Locations.get_set_list()) {
3163     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3164     if (LimitReached)
3165       break;
3166   }
3167 
3168   return !LimitReached;
3169 }
3170 
getAccessDomain(MemoryAccess * MA)3171 static isl::set getAccessDomain(MemoryAccess *MA) {
3172   isl::set Domain = MA->getStatement()->getDomain();
3173   Domain = Domain.project_out(isl::dim::set, 0,
3174                               unsignedFromIslSize(Domain.tuple_dim()));
3175   return Domain.reset_tuple_id();
3176 }
3177 
buildAliasChecks()3178 bool ScopBuilder::buildAliasChecks() {
3179   if (!PollyUseRuntimeAliasChecks)
3180     return true;
3181 
3182   if (buildAliasGroups()) {
3183     // Aliasing assumptions do not go through addAssumption but we still want to
3184     // collect statistics so we do it here explicitly.
3185     if (scop->getAliasGroups().size())
3186       Scop::incrementNumberOfAliasingAssumptions(1);
3187     return true;
3188   }
3189 
3190   // If a problem occurs while building the alias groups we need to delete
3191   // this SCoP and pretend it wasn't valid in the first place. To this end
3192   // we make the assumed context infeasible.
3193   scop->invalidate(ALIASING, DebugLoc());
3194 
3195   LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3196                     << " could not be created. This SCoP has been dismissed.");
3197   return false;
3198 }
3199 
3200 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
buildAliasGroupsForAccesses()3201 ScopBuilder::buildAliasGroupsForAccesses() {
3202   AliasSetTracker AST(AA);
3203 
3204   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3205   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3206   for (ScopStmt &Stmt : *scop) {
3207 
3208     isl::set StmtDomain = Stmt.getDomain();
3209     bool StmtDomainEmpty = StmtDomain.is_empty();
3210 
3211     // Statements with an empty domain will never be executed.
3212     if (StmtDomainEmpty)
3213       continue;
3214 
3215     for (MemoryAccess *MA : Stmt) {
3216       if (MA->isScalarKind())
3217         continue;
3218       if (!MA->isRead())
3219         HasWriteAccess.insert(MA->getScopArrayInfo());
3220       MemAccInst Acc(MA->getAccessInstruction());
3221       if (MA->isRead() && isa<MemTransferInst>(Acc))
3222         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3223       else
3224         PtrToAcc[Acc.getPointerOperand()] = MA;
3225       AST.add(Acc);
3226     }
3227   }
3228 
3229   AliasGroupVectorTy AliasGroups;
3230   for (AliasSet &AS : AST) {
3231     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3232       continue;
3233     AliasGroupTy AG;
3234     for (auto &PR : AS)
3235       AG.push_back(PtrToAcc[PR.getValue()]);
3236     if (AG.size() < 2)
3237       continue;
3238     AliasGroups.push_back(std::move(AG));
3239   }
3240 
3241   return std::make_tuple(AliasGroups, HasWriteAccess);
3242 }
3243 
buildAliasGroups()3244 bool ScopBuilder::buildAliasGroups() {
3245   // To create sound alias checks we perform the following steps:
3246   //   o) We partition each group into read only and non read only accesses.
3247   //   o) For each group with more than one base pointer we then compute minimal
3248   //      and maximal accesses to each array of a group in read only and non
3249   //      read only partitions separately.
3250   AliasGroupVectorTy AliasGroups;
3251   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3252 
3253   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3254 
3255   splitAliasGroupsByDomain(AliasGroups);
3256 
3257   for (AliasGroupTy &AG : AliasGroups) {
3258     if (!scop->hasFeasibleRuntimeContext())
3259       return false;
3260 
3261     {
3262       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3263       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3264       if (!Valid)
3265         return false;
3266     }
3267     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3268       scop->invalidate(COMPLEXITY, DebugLoc());
3269       return false;
3270     }
3271   }
3272 
3273   return true;
3274 }
3275 
buildAliasGroup(AliasGroupTy & AliasGroup,DenseSet<const ScopArrayInfo * > HasWriteAccess)3276 bool ScopBuilder::buildAliasGroup(
3277     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3278   AliasGroupTy ReadOnlyAccesses;
3279   AliasGroupTy ReadWriteAccesses;
3280   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3281   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3282 
3283   if (AliasGroup.size() < 2)
3284     return true;
3285 
3286   for (MemoryAccess *Access : AliasGroup) {
3287     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3288                                         Access->getAccessInstruction())
3289              << "Possibly aliasing pointer, use restrict keyword.");
3290     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3291     if (HasWriteAccess.count(Array)) {
3292       ReadWriteArrays.insert(Array);
3293       ReadWriteAccesses.push_back(Access);
3294     } else {
3295       ReadOnlyArrays.insert(Array);
3296       ReadOnlyAccesses.push_back(Access);
3297     }
3298   }
3299 
3300   // If there are no read-only pointers, and less than two read-write pointers,
3301   // no alias check is needed.
3302   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3303     return true;
3304 
3305   // If there is no read-write pointer, no alias check is needed.
3306   if (ReadWriteArrays.empty())
3307     return true;
3308 
3309   // For non-affine accesses, no alias check can be generated as we cannot
3310   // compute a sufficiently tight lower and upper bound: bail out.
3311   for (MemoryAccess *MA : AliasGroup) {
3312     if (!MA->isAffine()) {
3313       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3314                        MA->getAccessInstruction()->getParent());
3315       return false;
3316     }
3317   }
3318 
3319   // Ensure that for all memory accesses for which we generate alias checks,
3320   // their base pointers are available.
3321   for (MemoryAccess *MA : AliasGroup) {
3322     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3323       scop->addRequiredInvariantLoad(
3324           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3325   }
3326 
3327   //  scop->getAliasGroups().emplace_back();
3328   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3329   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3330   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3331 
3332   bool Valid;
3333 
3334   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3335 
3336   if (!Valid)
3337     return false;
3338 
3339   // Bail out if the number of values we need to compare is too large.
3340   // This is important as the number of comparisons grows quadratically with
3341   // the number of values we need to compare.
3342   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3343       RunTimeChecksMaxArraysPerGroup)
3344     return false;
3345 
3346   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3347 
3348   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3349   if (!Valid)
3350     return false;
3351 
3352   return true;
3353 }
3354 
splitAliasGroupsByDomain(AliasGroupVectorTy & AliasGroups)3355 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3356   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3357     AliasGroupTy NewAG;
3358     AliasGroupTy &AG = AliasGroups[u];
3359     AliasGroupTy::iterator AGI = AG.begin();
3360     isl::set AGDomain = getAccessDomain(*AGI);
3361     while (AGI != AG.end()) {
3362       MemoryAccess *MA = *AGI;
3363       isl::set MADomain = getAccessDomain(MA);
3364       if (AGDomain.is_disjoint(MADomain)) {
3365         NewAG.push_back(MA);
3366         AGI = AG.erase(AGI);
3367       } else {
3368         AGDomain = AGDomain.unite(MADomain);
3369         AGI++;
3370       }
3371     }
3372     if (NewAG.size() > 1)
3373       AliasGroups.push_back(std::move(NewAG));
3374   }
3375 }
3376 
3377 #ifndef NDEBUG
verifyUse(Scop * S,Use & Op,LoopInfo & LI)3378 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3379   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3380   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3381   assert(PhysUse.getKind() == VirtUse.getKind());
3382 }
3383 
3384 /// Check the consistency of every statement's MemoryAccesses.
3385 ///
3386 /// The check is carried out by expecting the "physical" kind of use (derived
3387 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3388 /// kind of use (derived from a statement's MemoryAccess).
3389 ///
3390 /// The "physical" uses are taken by ensureValueRead to determine whether to
3391 /// create MemoryAccesses. When done, the kind of scalar access should be the
3392 /// same no matter which way it was derived.
3393 ///
3394 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3395 /// can intentionally influence on the kind of uses (not corresponding to the
3396 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3397 /// to pick up the virtual uses. But here in the code generator, this has not
3398 /// happened yet, such that virtual and physical uses are equivalent.
verifyUses(Scop * S,LoopInfo & LI,DominatorTree & DT)3399 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3400   for (auto *BB : S->getRegion().blocks()) {
3401     for (auto &Inst : *BB) {
3402       auto *Stmt = S->getStmtFor(&Inst);
3403       if (!Stmt)
3404         continue;
3405 
3406       if (isIgnoredIntrinsic(&Inst))
3407         continue;
3408 
3409       // Branch conditions are encoded in the statement domains.
3410       if (Inst.isTerminator() && Stmt->isBlockStmt())
3411         continue;
3412 
3413       // Verify all uses.
3414       for (auto &Op : Inst.operands())
3415         verifyUse(S, Op, LI);
3416 
3417       // Stores do not produce values used by other statements.
3418       if (isa<StoreInst>(Inst))
3419         continue;
3420 
3421       // For every value defined in the block, also check that a use of that
3422       // value in the same statement would not be an inter-statement use. It can
3423       // still be synthesizable or load-hoisted, but these kind of instructions
3424       // are not directly copied in code-generation.
3425       auto VirtDef =
3426           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3427       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3428              VirtDef.getKind() == VirtualUse::Intra ||
3429              VirtDef.getKind() == VirtualUse::Hoisted);
3430     }
3431   }
3432 
3433   if (S->hasSingleExitEdge())
3434     return;
3435 
3436   // PHINodes in the SCoP region's exit block are also uses to be checked.
3437   if (!S->getRegion().isTopLevelRegion()) {
3438     for (auto &Inst : *S->getRegion().getExit()) {
3439       if (!isa<PHINode>(Inst))
3440         break;
3441 
3442       for (auto &Op : Inst.operands())
3443         verifyUse(S, Op, LI);
3444     }
3445   }
3446 }
3447 #endif
3448 
buildScop(Region & R,AssumptionCache & AC)3449 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3450   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3451                       SD.getNextID()));
3452 
3453   buildStmts(R);
3454 
3455   // Create all invariant load instructions first. These are categorized as
3456   // 'synthesizable', therefore are not part of any ScopStmt but need to be
3457   // created somewhere.
3458   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3459   for (BasicBlock *BB : scop->getRegion().blocks()) {
3460     if (SD.isErrorBlock(*BB, scop->getRegion()))
3461       continue;
3462 
3463     for (Instruction &Inst : *BB) {
3464       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3465       if (!Load)
3466         continue;
3467 
3468       if (!RIL.count(Load))
3469         continue;
3470 
3471       // Invariant loads require a MemoryAccess to be created in some statement.
3472       // It is not important to which statement the MemoryAccess is added
3473       // because it will later be removed from the ScopStmt again. We chose the
3474       // first statement of the basic block the LoadInst is in.
3475       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3476       assert(!List.empty());
3477       ScopStmt *RILStmt = List.front();
3478       buildMemoryAccess(Load, RILStmt);
3479     }
3480   }
3481   buildAccessFunctions();
3482 
3483   // In case the region does not have an exiting block we will later (during
3484   // code generation) split the exit block. This will move potential PHI nodes
3485   // from the current exit block into the new region exiting block. Hence, PHI
3486   // nodes that are at this point not part of the region will be.
3487   // To handle these PHI nodes later we will now model their operands as scalar
3488   // accesses. Note that we do not model anything in the exit block if we have
3489   // an exiting block in the region, as there will not be any splitting later.
3490   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3491     for (Instruction &Inst : *R.getExit()) {
3492       PHINode *PHI = dyn_cast<PHINode>(&Inst);
3493       if (!PHI)
3494         break;
3495 
3496       buildPHIAccesses(nullptr, PHI, nullptr, true);
3497     }
3498   }
3499 
3500   // Create memory accesses for global reads since all arrays are now known.
3501   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3502   for (auto GlobalReadPair : GlobalReads) {
3503     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3504     Instruction *GlobalRead = GlobalReadPair.second;
3505     for (auto *BP : ArrayBasePointers)
3506       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3507                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3508   }
3509 
3510   buildInvariantEquivalenceClasses();
3511 
3512   /// A map from basic blocks to their invalid domains.
3513   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3514 
3515   if (!buildDomains(&R, InvalidDomainMap)) {
3516     LLVM_DEBUG(
3517         dbgs() << "Bailing-out because buildDomains encountered problems\n");
3518     return;
3519   }
3520 
3521   addUserAssumptions(AC, InvalidDomainMap);
3522 
3523   // Initialize the invalid domain.
3524   for (ScopStmt &Stmt : scop->Stmts)
3525     if (Stmt.isBlockStmt())
3526       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3527     else
3528       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3529           Stmt.getRegion()->getNode())]);
3530 
3531   // Remove empty statements.
3532   // Exit early in case there are no executable statements left in this scop.
3533   scop->removeStmtNotInDomainMap();
3534   scop->simplifySCoP(false);
3535   if (scop->isEmpty()) {
3536     LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3537     return;
3538   }
3539 
3540   // The ScopStmts now have enough information to initialize themselves.
3541   for (ScopStmt &Stmt : *scop) {
3542     collectSurroundingLoops(Stmt);
3543 
3544     buildDomain(Stmt);
3545     buildAccessRelations(Stmt);
3546 
3547     if (DetectReductions)
3548       checkForReductions(Stmt);
3549   }
3550 
3551   // Check early for a feasible runtime context.
3552   if (!scop->hasFeasibleRuntimeContext()) {
3553     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3554     return;
3555   }
3556 
3557   // Check early for profitability. Afterwards it cannot change anymore,
3558   // only the runtime context could become infeasible.
3559   if (!scop->isProfitable(UnprofitableScalarAccs)) {
3560     scop->invalidate(PROFITABLE, DebugLoc());
3561     LLVM_DEBUG(
3562         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3563     return;
3564   }
3565 
3566   buildSchedule();
3567 
3568   finalizeAccesses();
3569 
3570   scop->realignParams();
3571   addUserContext();
3572 
3573   // After the context was fully constructed, thus all our knowledge about
3574   // the parameters is in there, we add all recorded assumptions to the
3575   // assumed/invalid context.
3576   addRecordedAssumptions();
3577 
3578   scop->simplifyContexts();
3579   if (!buildAliasChecks()) {
3580     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3581     return;
3582   }
3583 
3584   hoistInvariantLoads();
3585   canonicalizeDynamicBasePtrs();
3586   verifyInvariantLoads();
3587   scop->simplifySCoP(true);
3588 
3589   // Check late for a feasible runtime context because profitability did not
3590   // change.
3591   if (!scop->hasFeasibleRuntimeContext()) {
3592     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3593     return;
3594   }
3595 
3596 #ifndef NDEBUG
3597   verifyUses(scop.get(), LI, DT);
3598 #endif
3599 }
3600 
ScopBuilder(Region * R,AssumptionCache & AC,AAResults & AA,const DataLayout & DL,DominatorTree & DT,LoopInfo & LI,ScopDetection & SD,ScalarEvolution & SE,OptimizationRemarkEmitter & ORE)3601 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3602                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3603                          ScopDetection &SD, ScalarEvolution &SE,
3604                          OptimizationRemarkEmitter &ORE)
3605     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3606   DebugLoc Beg, End;
3607   auto P = getBBPairForRegion(R);
3608   getDebugLocations(P, Beg, End);
3609 
3610   std::string Msg = "SCoP begins here.";
3611   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3612            << Msg);
3613 
3614   buildScop(*R, AC);
3615 
3616   LLVM_DEBUG(dbgs() << *scop);
3617 
3618   if (!scop->hasFeasibleRuntimeContext()) {
3619     InfeasibleScops++;
3620     Msg = "SCoP ends here but was dismissed.";
3621     LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3622     RecordedAssumptions.clear();
3623     scop.reset();
3624   } else {
3625     Msg = "SCoP ends here.";
3626     ++ScopFound;
3627     if (scop->getMaxLoopDepth() > 0)
3628       ++RichScopFound;
3629   }
3630 
3631   if (R->isTopLevelRegion())
3632     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3633              << Msg);
3634   else
3635     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3636              << Msg);
3637 }
3638