1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Create a polyhedral description for a static control flow region.
10 //
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/Sequence.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/AssumptionCache.h"
33 #include "llvm/Analysis/Delinearization.h"
34 #include "llvm/Analysis/Loads.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
37 #include "llvm/Analysis/RegionInfo.h"
38 #include "llvm/Analysis/RegionIterator.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include <cassert>
59 
60 using namespace llvm;
61 using namespace polly;
62 
63 #define DEBUG_TYPE "polly-scops"
64 
65 STATISTIC(ScopFound, "Number of valid Scops");
66 STATISTIC(RichScopFound, "Number of Scops containing a loop");
67 STATISTIC(InfeasibleScops,
68           "Number of SCoPs with statically infeasible context.");
69 
70 bool polly::ModelReadOnlyScalars;
71 
72 // The maximal number of dimensions we allow during invariant load construction.
73 // More complex access ranges will result in very high compile time and are also
74 // unlikely to result in good code. This value is very high and should only
75 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
76 static unsigned const MaxDimensionsInAccessRange = 9;
77 
78 static cl::opt<bool, true> XModelReadOnlyScalars(
79     "polly-analyze-read-only-scalars",
80     cl::desc("Model read-only scalar values in the scop description"),
81     cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore,
82     cl::init(true), cl::cat(PollyCategory));
83 
84 static cl::opt<int>
85     OptComputeOut("polly-analysis-computeout",
86                   cl::desc("Bound the scop analysis by a maximal amount of "
87                            "computational steps (0 means no bound)"),
88                   cl::Hidden, cl::init(800000), cl::ZeroOrMore,
89                   cl::cat(PollyCategory));
90 
91 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
92     "polly-allow-dereference-of-all-function-parameters",
93     cl::desc(
94         "Treat all parameters to functions that are pointers as dereferencible."
95         " This is useful for invariant load hoisting, since we can generate"
96         " less runtime checks. This is only valid if all pointers to functions"
97         " are always initialized, so that Polly can choose to hoist"
98         " their loads. "),
99     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
100 
101 static cl::opt<bool>
102     PollyIgnoreInbounds("polly-ignore-inbounds",
103                         cl::desc("Do not take inbounds assumptions at all"),
104                         cl::Hidden, cl::init(false), cl::cat(PollyCategory));
105 
106 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
107     "polly-rtc-max-arrays-per-group",
108     cl::desc("The maximal number of arrays to compare in each alias group."),
109     cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory));
110 
111 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
112     "polly-rtc-max-array-disjuncts",
113     cl::desc("The maximal number of disjunts allowed in memory accesses to "
114              "to build RTCs."),
115     cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
116 
117 static cl::opt<unsigned> RunTimeChecksMaxParameters(
118     "polly-rtc-max-parameters",
119     cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
120     cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory));
121 
122 static cl::opt<bool> UnprofitableScalarAccs(
123     "polly-unprofitable-scalar-accs",
124     cl::desc("Count statements with scalar accesses as not optimizable"),
125     cl::Hidden, cl::init(false), cl::cat(PollyCategory));
126 
127 static cl::opt<std::string> UserContextStr(
128     "polly-context", cl::value_desc("isl parameter set"),
129     cl::desc("Provide additional constraints on the context parameters"),
130     cl::init(""), cl::cat(PollyCategory));
131 
132 static cl::opt<bool> DetectReductions("polly-detect-reductions",
133                                       cl::desc("Detect and exploit reductions"),
134                                       cl::Hidden, cl::ZeroOrMore,
135                                       cl::init(true), cl::cat(PollyCategory));
136 
137 // Multiplicative reductions can be disabled separately as these kind of
138 // operations can overflow easily. Additive reductions and bit operations
139 // are in contrast pretty stable.
140 static cl::opt<bool> DisableMultiplicativeReductions(
141     "polly-disable-multiplicative-reductions",
142     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
143     cl::init(false), cl::cat(PollyCategory));
144 
145 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
146 
147 static cl::opt<GranularityChoice> StmtGranularity(
148     "polly-stmt-granularity",
149     cl::desc(
150         "Algorithm to use for splitting basic blocks into multiple statements"),
151     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
152                           "One statement per basic block"),
153                clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
154                           "Scalar independence heuristic"),
155                clEnumValN(GranularityChoice::Stores, "store",
156                           "Store-level granularity")),
157     cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
158 
159 /// Helper to treat non-affine regions and basic blocks the same.
160 ///
161 ///{
162 
163 /// Return the block that is the representing block for @p RN.
164 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
165   return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
166                            : RN->getNodeAs<BasicBlock>();
167 }
168 
169 /// Return the @p idx'th block that is executed after @p RN.
170 static inline BasicBlock *
171 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
172   if (RN->isSubRegion()) {
173     assert(idx == 0);
174     return RN->getNodeAs<Region>()->getExit();
175   }
176   return TI->getSuccessor(idx);
177 }
178 
179 static bool containsErrorBlock(RegionNode *RN, const Region &R,
180                                ScopDetection *SD) {
181   if (!RN->isSubRegion())
182     return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
183   for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
184     if (SD->isErrorBlock(*BB, R))
185       return true;
186   return false;
187 }
188 
189 ///}
190 
191 /// Create a map to map from a given iteration to a subsequent iteration.
192 ///
193 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
194 /// is incremented by one and all other dimensions are equal, e.g.,
195 ///             [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
196 ///
197 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
198 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
199   isl::space MapSpace = SetSpace.map_from_set();
200   isl::map NextIterationMap = isl::map::universe(MapSpace);
201   for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
202     if (u != Dim)
203       NextIterationMap =
204           NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
205   isl::constraint C =
206       isl::constraint::alloc_equality(isl::local_space(MapSpace));
207   C = C.set_constant_si(1);
208   C = C.set_coefficient_si(isl::dim::in, Dim, 1);
209   C = C.set_coefficient_si(isl::dim::out, Dim, -1);
210   NextIterationMap = NextIterationMap.add_constraint(C);
211   return NextIterationMap;
212 }
213 
214 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
215 static isl::set collectBoundedParts(isl::set S) {
216   isl::set BoundedParts = isl::set::empty(S.get_space());
217   for (isl::basic_set BSet : S.get_basic_set_list())
218     if (BSet.is_bounded())
219       BoundedParts = BoundedParts.unite(isl::set(BSet));
220   return BoundedParts;
221 }
222 
223 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
224 ///
225 /// @returns A separation of @p S into first an unbounded then a bounded subset,
226 ///          both with regards to the dimension @p Dim.
227 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
228                                                        unsigned Dim) {
229   for (unsigned u : rangeIslSize(0, S.tuple_dim()))
230     S = S.lower_bound_si(isl::dim::set, u, 0);
231 
232   unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
233   isl::set OnlyDimS = S;
234 
235   // Remove dimensions that are greater than Dim as they are not interesting.
236   assert(NumDimsS >= Dim + 1);
237   OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
238 
239   // Create artificial parametric upper bounds for dimensions smaller than Dim
240   // as we are not interested in them.
241   OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
242 
243   for (unsigned u = 0; u < Dim; u++) {
244     isl::constraint C = isl::constraint::alloc_inequality(
245         isl::local_space(OnlyDimS.get_space()));
246     C = C.set_coefficient_si(isl::dim::param, u, 1);
247     C = C.set_coefficient_si(isl::dim::set, u, -1);
248     OnlyDimS = OnlyDimS.add_constraint(C);
249   }
250 
251   // Collect all bounded parts of OnlyDimS.
252   isl::set BoundedParts = collectBoundedParts(OnlyDimS);
253 
254   // Create the dimensions greater than Dim again.
255   BoundedParts =
256       BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
257 
258   // Remove the artificial upper bound parameters again.
259   BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
260 
261   isl::set UnboundedParts = S.subtract(BoundedParts);
262   return std::make_pair(UnboundedParts, BoundedParts);
263 }
264 
265 /// Create the conditions under which @p L @p Pred @p R is true.
266 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
267                                   isl::pw_aff R) {
268   switch (Pred) {
269   case ICmpInst::ICMP_EQ:
270     return L.eq_set(R);
271   case ICmpInst::ICMP_NE:
272     return L.ne_set(R);
273   case ICmpInst::ICMP_SLT:
274     return L.lt_set(R);
275   case ICmpInst::ICMP_SLE:
276     return L.le_set(R);
277   case ICmpInst::ICMP_SGT:
278     return L.gt_set(R);
279   case ICmpInst::ICMP_SGE:
280     return L.ge_set(R);
281   case ICmpInst::ICMP_ULT:
282     return L.lt_set(R);
283   case ICmpInst::ICMP_UGT:
284     return L.gt_set(R);
285   case ICmpInst::ICMP_ULE:
286     return L.le_set(R);
287   case ICmpInst::ICMP_UGE:
288     return L.ge_set(R);
289   default:
290     llvm_unreachable("Non integer predicate not supported");
291   }
292 }
293 
294 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
295                                              Loop *NewL) {
296   // If the loops are the same there is nothing to do.
297   if (NewL == OldL)
298     return Dom;
299 
300   int OldDepth = scop->getRelativeLoopDepth(OldL);
301   int NewDepth = scop->getRelativeLoopDepth(NewL);
302   // If both loops are non-affine loops there is nothing to do.
303   if (OldDepth == -1 && NewDepth == -1)
304     return Dom;
305 
306   // Distinguish three cases:
307   //   1) The depth is the same but the loops are not.
308   //      => One loop was left one was entered.
309   //   2) The depth increased from OldL to NewL.
310   //      => One loop was entered, none was left.
311   //   3) The depth decreased from OldL to NewL.
312   //      => Loops were left were difference of the depths defines how many.
313   if (OldDepth == NewDepth) {
314     assert(OldL->getParentLoop() == NewL->getParentLoop());
315     Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
316     Dom = Dom.add_dims(isl::dim::set, 1);
317   } else if (OldDepth < NewDepth) {
318     assert(OldDepth + 1 == NewDepth);
319     auto &R = scop->getRegion();
320     (void)R;
321     assert(NewL->getParentLoop() == OldL ||
322            ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
323     Dom = Dom.add_dims(isl::dim::set, 1);
324   } else {
325     assert(OldDepth > NewDepth);
326     unsigned Diff = OldDepth - NewDepth;
327     unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
328     assert(NumDim >= Diff);
329     Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
330   }
331 
332   return Dom;
333 }
334 
335 /// Compute the isl representation for the SCEV @p E in this BB.
336 ///
337 /// @param BB               The BB for which isl representation is to be
338 /// computed.
339 /// @param InvalidDomainMap A map of BB to their invalid domains.
340 /// @param E                The SCEV that should be translated.
341 /// @param NonNegative      Flag to indicate the @p E has to be non-negative.
342 ///
343 /// Note that this function will also adjust the invalid context accordingly.
344 
345 __isl_give isl_pw_aff *
346 ScopBuilder::getPwAff(BasicBlock *BB,
347                       DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
348                       const SCEV *E, bool NonNegative) {
349   PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
350   InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
351   return PWAC.first.release();
352 }
353 
354 /// Build condition sets for unsigned ICmpInst(s).
355 /// Special handling is required for unsigned operands to ensure that if
356 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
357 /// it should wrap around.
358 ///
359 /// @param IsStrictUpperBound holds information on the predicate relation
360 /// between TestVal and UpperBound, i.e,
361 /// TestVal < UpperBound  OR  TestVal <= UpperBound
362 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
363     BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
364     const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
365     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
366     bool IsStrictUpperBound) {
367   // Do not take NonNeg assumption on TestVal
368   // as it might have MSB (Sign bit) set.
369   isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
370   // Take NonNeg assumption on UpperBound.
371   isl_pw_aff *UpperBound =
372       getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
373 
374   // 0 <= TestVal
375   isl_set *First =
376       isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
377                             isl_pw_aff_get_domain_space(TestVal))),
378                         isl_pw_aff_copy(TestVal));
379 
380   isl_set *Second;
381   if (IsStrictUpperBound)
382     // TestVal < UpperBound
383     Second = isl_pw_aff_lt_set(TestVal, UpperBound);
384   else
385     // TestVal <= UpperBound
386     Second = isl_pw_aff_le_set(TestVal, UpperBound);
387 
388   isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
389   return ConsequenceCondSet;
390 }
391 
392 bool ScopBuilder::buildConditionSets(
393     BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
394     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
395     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
396   Value *Condition = getConditionFromTerminator(SI);
397   assert(Condition && "No condition for switch");
398 
399   isl_pw_aff *LHS, *RHS;
400   LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
401 
402   unsigned NumSuccessors = SI->getNumSuccessors();
403   ConditionSets.resize(NumSuccessors);
404   for (auto &Case : SI->cases()) {
405     unsigned Idx = Case.getSuccessorIndex();
406     ConstantInt *CaseValue = Case.getCaseValue();
407 
408     RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
409     isl_set *CaseConditionSet =
410         buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
411                           isl::manage(RHS))
412             .release();
413     ConditionSets[Idx] = isl_set_coalesce(
414         isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
415   }
416 
417   assert(ConditionSets[0] == nullptr && "Default condition set was set");
418   isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
419   for (unsigned u = 2; u < NumSuccessors; u++)
420     ConditionSetUnion =
421         isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
422   ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
423 
424   isl_pw_aff_free(LHS);
425 
426   return true;
427 }
428 
429 bool ScopBuilder::buildConditionSets(
430     BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
431     __isl_keep isl_set *Domain,
432     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
433     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
434   isl_set *ConsequenceCondSet = nullptr;
435 
436   if (auto Load = dyn_cast<LoadInst>(Condition)) {
437     const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
438     const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
439     bool NonNeg = false;
440     isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
441     isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
442     ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
443                                            isl::manage(RHS))
444                              .release();
445   } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
446     auto *Unique = dyn_cast<ConstantInt>(
447         getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
448     assert(Unique &&
449            "A PHINode condition should only be accepted by ScopDetection if "
450            "getUniqueNonErrorValue returns non-NULL");
451 
452     if (Unique->isZero())
453       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
454     else
455       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
456   } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
457     if (CCond->isZero())
458       ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
459     else
460       ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
461   } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
462     auto Opcode = BinOp->getOpcode();
463     assert(Opcode == Instruction::And || Opcode == Instruction::Or);
464 
465     bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
466                                     InvalidDomainMap, ConditionSets) &&
467                  buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
468                                     InvalidDomainMap, ConditionSets);
469     if (!Valid) {
470       while (!ConditionSets.empty())
471         isl_set_free(ConditionSets.pop_back_val());
472       return false;
473     }
474 
475     isl_set_free(ConditionSets.pop_back_val());
476     isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
477     isl_set_free(ConditionSets.pop_back_val());
478     isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
479 
480     if (Opcode == Instruction::And)
481       ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
482     else
483       ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
484   } else {
485     auto *ICond = dyn_cast<ICmpInst>(Condition);
486     assert(ICond &&
487            "Condition of exiting branch was neither constant nor ICmp!");
488 
489     Region &R = scop->getRegion();
490 
491     isl_pw_aff *LHS, *RHS;
492     // For unsigned comparisons we assumed the signed bit of neither operand
493     // to be set. The comparison is equal to a signed comparison under this
494     // assumption.
495     bool NonNeg = ICond->isUnsigned();
496     const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
497                *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
498 
499     LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
500     RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
501 
502     switch (ICond->getPredicate()) {
503     case ICmpInst::ICMP_ULT:
504       ConsequenceCondSet =
505           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
506                                      RightOperand, InvalidDomainMap, true);
507       break;
508     case ICmpInst::ICMP_ULE:
509       ConsequenceCondSet =
510           buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
511                                      RightOperand, InvalidDomainMap, false);
512       break;
513     case ICmpInst::ICMP_UGT:
514       ConsequenceCondSet =
515           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
516                                      LeftOperand, InvalidDomainMap, true);
517       break;
518     case ICmpInst::ICMP_UGE:
519       ConsequenceCondSet =
520           buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
521                                      LeftOperand, InvalidDomainMap, false);
522       break;
523     default:
524       LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
525       RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
526       ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
527                                              isl::manage(LHS), isl::manage(RHS))
528                                .release();
529       break;
530     }
531   }
532 
533   // If no terminator was given we are only looking for parameter constraints
534   // under which @p Condition is true/false.
535   if (!TI)
536     ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
537   assert(ConsequenceCondSet);
538   ConsequenceCondSet = isl_set_coalesce(
539       isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
540 
541   isl_set *AlternativeCondSet = nullptr;
542   bool TooComplex =
543       isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
544 
545   if (!TooComplex) {
546     AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
547                                           isl_set_copy(ConsequenceCondSet));
548     TooComplex =
549         isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
550   }
551 
552   if (TooComplex) {
553     scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
554                      TI ? TI->getParent() : nullptr /* BasicBlock */);
555     isl_set_free(AlternativeCondSet);
556     isl_set_free(ConsequenceCondSet);
557     return false;
558   }
559 
560   ConditionSets.push_back(ConsequenceCondSet);
561   ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
562 
563   return true;
564 }
565 
566 bool ScopBuilder::buildConditionSets(
567     BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
568     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
569     SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
570   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
571     return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
572                               ConditionSets);
573 
574   assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
575 
576   if (TI->getNumSuccessors() == 1) {
577     ConditionSets.push_back(isl_set_copy(Domain));
578     return true;
579   }
580 
581   Value *Condition = getConditionFromTerminator(TI);
582   assert(Condition && "No condition for Terminator");
583 
584   return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
585                             ConditionSets);
586 }
587 
588 bool ScopBuilder::propagateDomainConstraints(
589     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
590   // Iterate over the region R and propagate the domain constrains from the
591   // predecessors to the current node. In contrast to the
592   // buildDomainsWithBranchConstraints function, this one will pull the domain
593   // information from the predecessors instead of pushing it to the successors.
594   // Additionally, we assume the domains to be already present in the domain
595   // map here. However, we iterate again in reverse post order so we know all
596   // predecessors have been visited before a block or non-affine subregion is
597   // visited.
598 
599   ReversePostOrderTraversal<Region *> RTraversal(R);
600   for (auto *RN : RTraversal) {
601     // Recurse for affine subregions but go on for basic blocks and non-affine
602     // subregions.
603     if (RN->isSubRegion()) {
604       Region *SubRegion = RN->getNodeAs<Region>();
605       if (!scop->isNonAffineSubRegion(SubRegion)) {
606         if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
607           return false;
608         continue;
609       }
610     }
611 
612     BasicBlock *BB = getRegionNodeBasicBlock(RN);
613     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
614     assert(!Domain.is_null());
615 
616     // Under the union of all predecessor conditions we can reach this block.
617     isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
618     Domain = Domain.intersect(PredDom).coalesce();
619     Domain = Domain.align_params(scop->getParamSpace());
620 
621     Loop *BBLoop = getRegionNodeLoop(RN, LI);
622     if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
623       if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
624         return false;
625   }
626 
627   return true;
628 }
629 
630 void ScopBuilder::propagateDomainConstraintsToRegionExit(
631     BasicBlock *BB, Loop *BBLoop,
632     SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
633     DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
634   // Check if the block @p BB is the entry of a region. If so we propagate it's
635   // domain to the exit block of the region. Otherwise we are done.
636   auto *RI = scop->getRegion().getRegionInfo();
637   auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
638   auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
639   if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
640     return;
641 
642   // Do not propagate the domain if there is a loop backedge inside the region
643   // that would prevent the exit block from being executed.
644   auto *L = BBLoop;
645   while (L && scop->contains(L)) {
646     SmallVector<BasicBlock *, 4> LatchBBs;
647     BBLoop->getLoopLatches(LatchBBs);
648     for (auto *LatchBB : LatchBBs)
649       if (BB != LatchBB && BBReg->contains(LatchBB))
650         return;
651     L = L->getParentLoop();
652   }
653 
654   isl::set Domain = scop->getOrInitEmptyDomain(BB);
655   assert(!Domain.is_null() && "Cannot propagate a nullptr");
656 
657   Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
658 
659   // Since the dimensions of @p BB and @p ExitBB might be different we have to
660   // adjust the domain before we can propagate it.
661   isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
662   isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
663 
664   // If the exit domain is not yet created we set it otherwise we "add" the
665   // current domain.
666   ExitDomain =
667       !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
668 
669   // Initialize the invalid domain.
670   InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
671 
672   FinishedExitBlocks.insert(ExitBB);
673 }
674 
675 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
676                                                       isl::set Domain) {
677   // If @p BB is the ScopEntry we are done
678   if (scop->getRegion().getEntry() == BB)
679     return isl::set::universe(Domain.get_space());
680 
681   // The region info of this function.
682   auto &RI = *scop->getRegion().getRegionInfo();
683 
684   Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
685 
686   // A domain to collect all predecessor domains, thus all conditions under
687   // which the block is executed. To this end we start with the empty domain.
688   isl::set PredDom = isl::set::empty(Domain.get_space());
689 
690   // Set of regions of which the entry block domain has been propagated to BB.
691   // all predecessors inside any of the regions can be skipped.
692   SmallSet<Region *, 8> PropagatedRegions;
693 
694   for (auto *PredBB : predecessors(BB)) {
695     // Skip backedges.
696     if (DT.dominates(BB, PredBB))
697       continue;
698 
699     // If the predecessor is in a region we used for propagation we can skip it.
700     auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
701     if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(),
702                     PredBBInRegion)) {
703       continue;
704     }
705 
706     // Check if there is a valid region we can use for propagation, thus look
707     // for a region that contains the predecessor and has @p BB as exit block.
708     // FIXME: This was an side-effect-free (and possibly infinite) loop when
709     //        committed and seems not to be needed.
710     auto *PredR = RI.getRegionFor(PredBB);
711     while (PredR->getExit() != BB && !PredR->contains(BB))
712       PredR = PredR->getParent();
713 
714     // If a valid region for propagation was found use the entry of that region
715     // for propagation, otherwise the PredBB directly.
716     if (PredR->getExit() == BB) {
717       PredBB = PredR->getEntry();
718       PropagatedRegions.insert(PredR);
719     }
720 
721     isl::set PredBBDom = scop->getDomainConditions(PredBB);
722     Loop *PredBBLoop =
723         getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
724     PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
725     PredDom = PredDom.unite(PredBBDom);
726   }
727 
728   return PredDom;
729 }
730 
731 bool ScopBuilder::addLoopBoundsToHeaderDomain(
732     Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
733   int LoopDepth = scop->getRelativeLoopDepth(L);
734   assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
735 
736   BasicBlock *HeaderBB = L->getHeader();
737   assert(scop->isDomainDefined(HeaderBB));
738   isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
739 
740   isl::map NextIterationMap =
741       createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
742 
743   isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
744 
745   SmallVector<BasicBlock *, 4> LatchBlocks;
746   L->getLoopLatches(LatchBlocks);
747 
748   for (BasicBlock *LatchBB : LatchBlocks) {
749     // If the latch is only reachable via error statements we skip it.
750     if (!scop->isDomainDefined(LatchBB))
751       continue;
752 
753     isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
754 
755     isl::set BackedgeCondition;
756 
757     Instruction *TI = LatchBB->getTerminator();
758     BranchInst *BI = dyn_cast<BranchInst>(TI);
759     assert(BI && "Only branch instructions allowed in loop latches");
760 
761     if (BI->isUnconditional())
762       BackedgeCondition = LatchBBDom;
763     else {
764       SmallVector<isl_set *, 8> ConditionSets;
765       int idx = BI->getSuccessor(0) != HeaderBB;
766       if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
767                               InvalidDomainMap, ConditionSets))
768         return false;
769 
770       // Free the non back edge condition set as we do not need it.
771       isl_set_free(ConditionSets[1 - idx]);
772 
773       BackedgeCondition = isl::manage(ConditionSets[idx]);
774     }
775 
776     int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
777     assert(LatchLoopDepth >= LoopDepth);
778     BackedgeCondition = BackedgeCondition.project_out(
779         isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
780     UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
781   }
782 
783   isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
784   for (int i = 0; i < LoopDepth; i++)
785     ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
786 
787   isl::set UnionBackedgeConditionComplement =
788       UnionBackedgeCondition.complement();
789   UnionBackedgeConditionComplement =
790       UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
791                                                       0);
792   UnionBackedgeConditionComplement =
793       UnionBackedgeConditionComplement.apply(ForwardMap);
794   HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
795   HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
796 
797   auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
798   HeaderBBDom = Parts.second;
799 
800   // Check if there is a <nsw> tagged AddRec for this loop and if so do not
801   // require a runtime check. The assumption is already implied by the <nsw>
802   // tag.
803   bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
804 
805   isl::set UnboundedCtx = Parts.first.params();
806   recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
807                    HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
808                    nullptr, RequiresRTC);
809   return true;
810 }
811 
812 void ScopBuilder::buildInvariantEquivalenceClasses() {
813   DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
814 
815   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
816   for (LoadInst *LInst : RIL) {
817     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
818 
819     Type *Ty = LInst->getType();
820     LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
821     if (ClassRep) {
822       scop->addInvariantLoadMapping(LInst, ClassRep);
823       continue;
824     }
825 
826     ClassRep = LInst;
827     scop->addInvariantEquivClass(
828         InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
829   }
830 }
831 
832 bool ScopBuilder::buildDomains(
833     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
834   bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
835   auto *EntryBB = R->getEntry();
836   auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
837   int LD = scop->getRelativeLoopDepth(L);
838   auto *S =
839       isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
840 
841   InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
842   isl::set Domain = isl::manage(S);
843   scop->setDomain(EntryBB, Domain);
844 
845   if (IsOnlyNonAffineRegion)
846     return !containsErrorBlock(R->getNode(), *R, &SD);
847 
848   if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
849     return false;
850 
851   if (!propagateDomainConstraints(R, InvalidDomainMap))
852     return false;
853 
854   // Error blocks and blocks dominated by them have been assumed to never be
855   // executed. Representing them in the Scop does not add any value. In fact,
856   // it is likely to cause issues during construction of the ScopStmts. The
857   // contents of error blocks have not been verified to be expressible and
858   // will cause problems when building up a ScopStmt for them.
859   // Furthermore, basic blocks dominated by error blocks may reference
860   // instructions in the error block which, if the error block is not modeled,
861   // can themselves not be constructed properly. To this end we will replace
862   // the domains of error blocks and those only reachable via error blocks
863   // with an empty set. Additionally, we will record for each block under which
864   // parameter combination it would be reached via an error block in its
865   // InvalidDomain. This information is needed during load hoisting.
866   if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
867     return false;
868 
869   return true;
870 }
871 
872 bool ScopBuilder::buildDomainsWithBranchConstraints(
873     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
874   // To create the domain for each block in R we iterate over all blocks and
875   // subregions in R and propagate the conditions under which the current region
876   // element is executed. To this end we iterate in reverse post order over R as
877   // it ensures that we first visit all predecessors of a region node (either a
878   // basic block or a subregion) before we visit the region node itself.
879   // Initially, only the domain for the SCoP region entry block is set and from
880   // there we propagate the current domain to all successors, however we add the
881   // condition that the successor is actually executed next.
882   // As we are only interested in non-loop carried constraints here we can
883   // simply skip loop back edges.
884 
885   SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
886   ReversePostOrderTraversal<Region *> RTraversal(R);
887   for (auto *RN : RTraversal) {
888     // Recurse for affine subregions but go on for basic blocks and non-affine
889     // subregions.
890     if (RN->isSubRegion()) {
891       Region *SubRegion = RN->getNodeAs<Region>();
892       if (!scop->isNonAffineSubRegion(SubRegion)) {
893         if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
894           return false;
895         continue;
896       }
897     }
898 
899     if (containsErrorBlock(RN, scop->getRegion(), &SD))
900       scop->notifyErrorBlock();
901     ;
902 
903     BasicBlock *BB = getRegionNodeBasicBlock(RN);
904     Instruction *TI = BB->getTerminator();
905 
906     if (isa<UnreachableInst>(TI))
907       continue;
908 
909     if (!scop->isDomainDefined(BB))
910       continue;
911     isl::set Domain = scop->getDomainConditions(BB);
912 
913     scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
914 
915     auto *BBLoop = getRegionNodeLoop(RN, LI);
916     // Propagate the domain from BB directly to blocks that have a superset
917     // domain, at the moment only region exit nodes of regions that start in BB.
918     propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
919                                            InvalidDomainMap);
920 
921     // If all successors of BB have been set a domain through the propagation
922     // above we do not need to build condition sets but can just skip this
923     // block. However, it is important to note that this is a local property
924     // with regards to the region @p R. To this end FinishedExitBlocks is a
925     // local variable.
926     auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
927       return FinishedExitBlocks.count(SuccBB);
928     };
929     if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
930       continue;
931 
932     // Build the condition sets for the successor nodes of the current region
933     // node. If it is a non-affine subregion we will always execute the single
934     // exit node, hence the single entry node domain is the condition set. For
935     // basic blocks we use the helper function buildConditionSets.
936     SmallVector<isl_set *, 8> ConditionSets;
937     if (RN->isSubRegion())
938       ConditionSets.push_back(Domain.copy());
939     else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
940                                  ConditionSets))
941       return false;
942 
943     // Now iterate over the successors and set their initial domain based on
944     // their condition set. We skip back edges here and have to be careful when
945     // we leave a loop not to keep constraints over a dimension that doesn't
946     // exist anymore.
947     assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
948     for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
949       isl::set CondSet = isl::manage(ConditionSets[u]);
950       BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
951 
952       // Skip blocks outside the region.
953       if (!scop->contains(SuccBB))
954         continue;
955 
956       // If we propagate the domain of some block to "SuccBB" we do not have to
957       // adjust the domain.
958       if (FinishedExitBlocks.count(SuccBB))
959         continue;
960 
961       // Skip back edges.
962       if (DT.dominates(SuccBB, BB))
963         continue;
964 
965       Loop *SuccBBLoop =
966           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
967 
968       CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
969 
970       // Set the domain for the successor or merge it with an existing domain in
971       // case there are multiple paths (without loop back edges) to the
972       // successor block.
973       isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
974 
975       if (!SuccDomain.is_null()) {
976         SuccDomain = SuccDomain.unite(CondSet).coalesce();
977       } else {
978         // Initialize the invalid domain.
979         InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
980         SuccDomain = CondSet;
981       }
982 
983       SuccDomain = SuccDomain.detect_equalities();
984 
985       // Check if the maximal number of domain disjunctions was reached.
986       // In case this happens we will clean up and bail.
987       if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
988         continue;
989 
990       scop->invalidate(COMPLEXITY, DebugLoc());
991       while (++u < ConditionSets.size())
992         isl_set_free(ConditionSets[u]);
993       return false;
994     }
995   }
996 
997   return true;
998 }
999 
1000 bool ScopBuilder::propagateInvalidStmtDomains(
1001     Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1002   ReversePostOrderTraversal<Region *> RTraversal(R);
1003   for (auto *RN : RTraversal) {
1004 
1005     // Recurse for affine subregions but go on for basic blocks and non-affine
1006     // subregions.
1007     if (RN->isSubRegion()) {
1008       Region *SubRegion = RN->getNodeAs<Region>();
1009       if (!scop->isNonAffineSubRegion(SubRegion)) {
1010         propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1011         continue;
1012       }
1013     }
1014 
1015     bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
1016     BasicBlock *BB = getRegionNodeBasicBlock(RN);
1017     isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1018     assert(!Domain.is_null() && "Cannot propagate a nullptr");
1019 
1020     isl::set InvalidDomain = InvalidDomainMap[BB];
1021 
1022     bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1023 
1024     if (!IsInvalidBlock) {
1025       InvalidDomain = InvalidDomain.intersect(Domain);
1026     } else {
1027       InvalidDomain = Domain;
1028       isl::set DomPar = Domain.params();
1029       recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
1030                        BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1031       Domain = isl::set::empty(Domain.get_space());
1032     }
1033 
1034     if (InvalidDomain.is_empty()) {
1035       InvalidDomainMap[BB] = InvalidDomain;
1036       continue;
1037     }
1038 
1039     auto *BBLoop = getRegionNodeLoop(RN, LI);
1040     auto *TI = BB->getTerminator();
1041     unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1042     for (unsigned u = 0; u < NumSuccs; u++) {
1043       auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1044 
1045       // Skip successors outside the SCoP.
1046       if (!scop->contains(SuccBB))
1047         continue;
1048 
1049       // Skip backedges.
1050       if (DT.dominates(SuccBB, BB))
1051         continue;
1052 
1053       Loop *SuccBBLoop =
1054           getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1055 
1056       auto AdjustedInvalidDomain =
1057           adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1058 
1059       isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1060       SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1061       SuccInvalidDomain = SuccInvalidDomain.coalesce();
1062 
1063       InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1064 
1065       // Check if the maximal number of domain disjunctions was reached.
1066       // In case this happens we will bail.
1067       if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
1068           MaxDisjunctsInDomain)
1069         continue;
1070 
1071       InvalidDomainMap.erase(BB);
1072       scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1073       return false;
1074     }
1075 
1076     InvalidDomainMap[BB] = InvalidDomain;
1077   }
1078 
1079   return true;
1080 }
1081 
1082 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1083                                    Region *NonAffineSubRegion,
1084                                    bool IsExitBlock) {
1085   // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1086   // true, are not modeled as ordinary PHI nodes as they are not part of the
1087   // region. However, we model the operands in the predecessor blocks that are
1088   // part of the region as regular scalar accesses.
1089 
1090   // If we can synthesize a PHI we can skip it, however only if it is in
1091   // the region. If it is not it can only be in the exit block of the region.
1092   // In this case we model the operands but not the PHI itself.
1093   auto *Scope = LI.getLoopFor(PHI->getParent());
1094   if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1095     return;
1096 
1097   // PHI nodes are modeled as if they had been demoted prior to the SCoP
1098   // detection. Hence, the PHI is a load of a new memory location in which the
1099   // incoming value was written at the end of the incoming basic block.
1100   bool OnlyNonAffineSubRegionOperands = true;
1101   for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1102     Value *Op = PHI->getIncomingValue(u);
1103     BasicBlock *OpBB = PHI->getIncomingBlock(u);
1104     ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1105 
1106     // Do not build PHI dependences inside a non-affine subregion, but make
1107     // sure that the necessary scalar values are still made available.
1108     if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1109       auto *OpInst = dyn_cast<Instruction>(Op);
1110       if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1111         ensureValueRead(Op, OpStmt);
1112       continue;
1113     }
1114 
1115     OnlyNonAffineSubRegionOperands = false;
1116     ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1117   }
1118 
1119   if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1120     addPHIReadAccess(PHIStmt, PHI);
1121   }
1122 }
1123 
1124 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1125                                          Instruction *Inst) {
1126   assert(!isa<PHINode>(Inst));
1127 
1128   // Pull-in required operands.
1129   for (Use &Op : Inst->operands())
1130     ensureValueRead(Op.get(), UserStmt);
1131 }
1132 
1133 // Create a sequence of two schedules. Either argument may be null and is
1134 // interpreted as the empty schedule. Can also return null if both schedules are
1135 // empty.
1136 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1137   if (Prev.is_null())
1138     return Succ;
1139   if (Succ.is_null())
1140     return Prev;
1141 
1142   return Prev.sequence(Succ);
1143 }
1144 
1145 // Create an isl_multi_union_aff that defines an identity mapping from the
1146 // elements of USet to their N-th dimension.
1147 //
1148 // # Example:
1149 //
1150 //            Domain: { A[i,j]; B[i,j,k] }
1151 //                 N: 1
1152 //
1153 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1154 //
1155 // @param USet   A union set describing the elements for which to generate a
1156 //               mapping.
1157 // @param N      The dimension to map to.
1158 // @returns      A mapping from USet to its N-th dimension.
1159 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1160   assert(!USet.is_null());
1161   assert(!USet.is_empty());
1162 
1163   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1164 
1165   for (isl::set S : USet.get_set_list()) {
1166     unsigned Dim = unsignedFromIslSize(S.tuple_dim());
1167     assert(Dim >= N);
1168     auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1169                                                   N, Dim - N);
1170     if (N > 1)
1171       PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1172 
1173     Result = Result.add_pw_multi_aff(PMA);
1174   }
1175 
1176   return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1177 }
1178 
1179 void ScopBuilder::buildSchedule() {
1180   Loop *L = getLoopSurroundingScop(*scop, LI);
1181   LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1182   buildSchedule(scop->getRegion().getNode(), LoopStack);
1183   assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1184   scop->setScheduleTree(LoopStack[0].Schedule);
1185 }
1186 
1187 /// To generate a schedule for the elements in a Region we traverse the Region
1188 /// in reverse-post-order and add the contained RegionNodes in traversal order
1189 /// to the schedule of the loop that is currently at the top of the LoopStack.
1190 /// For loop-free codes, this results in a correct sequential ordering.
1191 ///
1192 /// Example:
1193 ///           bb1(0)
1194 ///         /     \.
1195 ///      bb2(1)   bb3(2)
1196 ///         \    /  \.
1197 ///          bb4(3)  bb5(4)
1198 ///             \   /
1199 ///              bb6(5)
1200 ///
1201 /// Including loops requires additional processing. Whenever a loop header is
1202 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1203 /// from an empty schedule, we first process all RegionNodes that are within
1204 /// this loop and complete the sequential schedule at this loop-level before
1205 /// processing about any other nodes. To implement this
1206 /// loop-nodes-first-processing, the reverse post-order traversal is
1207 /// insufficient. Hence, we additionally check if the traversal yields
1208 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1209 /// These region-nodes are then queue and only traverse after the all nodes
1210 /// within the current loop have been processed.
1211 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1212   Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1213 
1214   ReversePostOrderTraversal<Region *> RTraversal(R);
1215   std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1216   std::deque<RegionNode *> DelayList;
1217   bool LastRNWaiting = false;
1218 
1219   // Iterate over the region @p R in reverse post-order but queue
1220   // sub-regions/blocks iff they are not part of the last encountered but not
1221   // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1222   // that we queued the last sub-region/block from the reverse post-order
1223   // iterator. If it is set we have to explore the next sub-region/block from
1224   // the iterator (if any) to guarantee progress. If it is not set we first try
1225   // the next queued sub-region/blocks.
1226   while (!WorkList.empty() || !DelayList.empty()) {
1227     RegionNode *RN;
1228 
1229     if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1230       RN = WorkList.front();
1231       WorkList.pop_front();
1232       LastRNWaiting = false;
1233     } else {
1234       RN = DelayList.front();
1235       DelayList.pop_front();
1236     }
1237 
1238     Loop *L = getRegionNodeLoop(RN, LI);
1239     if (!scop->contains(L))
1240       L = OuterScopLoop;
1241 
1242     Loop *LastLoop = LoopStack.back().L;
1243     if (LastLoop != L) {
1244       if (LastLoop && !LastLoop->contains(L)) {
1245         LastRNWaiting = true;
1246         DelayList.push_back(RN);
1247         continue;
1248       }
1249       LoopStack.push_back({L, {}, 0});
1250     }
1251     buildSchedule(RN, LoopStack);
1252   }
1253 }
1254 
1255 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1256   if (RN->isSubRegion()) {
1257     auto *LocalRegion = RN->getNodeAs<Region>();
1258     if (!scop->isNonAffineSubRegion(LocalRegion)) {
1259       buildSchedule(LocalRegion, LoopStack);
1260       return;
1261     }
1262   }
1263 
1264   assert(LoopStack.rbegin() != LoopStack.rend());
1265   auto LoopData = LoopStack.rbegin();
1266   LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1267 
1268   for (auto *Stmt : scop->getStmtListFor(RN)) {
1269     isl::union_set UDomain{Stmt->getDomain()};
1270     auto StmtSchedule = isl::schedule::from_domain(UDomain);
1271     LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1272   }
1273 
1274   // Check if we just processed the last node in this loop. If we did, finalize
1275   // the loop by:
1276   //
1277   //   - adding new schedule dimensions
1278   //   - folding the resulting schedule into the parent loop schedule
1279   //   - dropping the loop schedule from the LoopStack.
1280   //
1281   // Then continue to check surrounding loops, which might also have been
1282   // completed by this node.
1283   size_t Dimension = LoopStack.size();
1284   while (LoopData->L &&
1285          LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1286     isl::schedule Schedule = LoopData->Schedule;
1287     auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1288 
1289     assert(std::next(LoopData) != LoopStack.rend());
1290     Loop *L = LoopData->L;
1291     ++LoopData;
1292     --Dimension;
1293 
1294     if (!Schedule.is_null()) {
1295       isl::union_set Domain = Schedule.get_domain();
1296       isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1297       Schedule = Schedule.insert_partial_schedule(MUPA);
1298 
1299       if (hasDisableAllTransformsHint(L)) {
1300         /// If any of the loops has a disable_nonforced heuristic, mark the
1301         /// entire SCoP as such. The ISL rescheduler can only reschedule the
1302         /// SCoP in its entirety.
1303         /// TODO: ScopDetection could avoid including such loops or warp them as
1304         /// boxed loop. It still needs to pass-through loop with user-defined
1305         /// metadata.
1306         scop->markDisableHeuristics();
1307       }
1308 
1309       // It is easier to insert the marks here that do it retroactively.
1310       isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
1311       if (!IslLoopId.is_null())
1312         Schedule =
1313             Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
1314 
1315       LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1316     }
1317 
1318     LoopData->NumBlocksProcessed += NumBlocksProcessed;
1319   }
1320   // Now pop all loops processed up there from the LoopStack
1321   LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1322 }
1323 
1324 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1325   // Check for uses of this instruction outside the scop. Because we do not
1326   // iterate over such instructions and therefore did not "ensure" the existence
1327   // of a write, we must determine such use here.
1328   if (scop->isEscaping(Inst))
1329     ensureValueWrite(Inst);
1330 }
1331 
1332 void ScopBuilder::addRecordedAssumptions() {
1333   for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1334 
1335     if (!AS.BB) {
1336       scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1337                           nullptr /* BasicBlock */, AS.RequiresRTC);
1338       continue;
1339     }
1340 
1341     // If the domain was deleted the assumptions are void.
1342     isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1343     if (!Dom)
1344       continue;
1345 
1346     // If a basic block was given use its domain to simplify the assumption.
1347     // In case of restrictions we know they only have to hold on the domain,
1348     // thus we can intersect them with the domain of the block. However, for
1349     // assumptions the domain has to imply them, thus:
1350     //                     _              _____
1351     //   Dom => S   <==>   A v B   <==>   A - B
1352     //
1353     // To avoid the complement we will register A - B as a restriction not an
1354     // assumption.
1355     isl_set *S = AS.Set.copy();
1356     if (AS.Sign == AS_RESTRICTION)
1357       S = isl_set_params(isl_set_intersect(S, Dom));
1358     else /* (AS.Sign == AS_ASSUMPTION) */
1359       S = isl_set_params(isl_set_subtract(Dom, S));
1360 
1361     scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1362                         AS.RequiresRTC);
1363   }
1364 }
1365 
1366 void ScopBuilder::addUserAssumptions(
1367     AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1368   for (auto &Assumption : AC.assumptions()) {
1369     auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1370     if (!CI || CI->arg_size() != 1)
1371       continue;
1372 
1373     bool InScop = scop->contains(CI);
1374     if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1375       continue;
1376 
1377     auto *L = LI.getLoopFor(CI->getParent());
1378     auto *Val = CI->getArgOperand(0);
1379     ParameterSetTy DetectedParams;
1380     auto &R = scop->getRegion();
1381     if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1382       ORE.emit(
1383           OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1384           << "Non-affine user assumption ignored.");
1385       continue;
1386     }
1387 
1388     // Collect all newly introduced parameters.
1389     ParameterSetTy NewParams;
1390     for (auto *Param : DetectedParams) {
1391       Param = extractConstantFactor(Param, SE).second;
1392       Param = scop->getRepresentingInvariantLoadSCEV(Param);
1393       if (scop->isParam(Param))
1394         continue;
1395       NewParams.insert(Param);
1396     }
1397 
1398     SmallVector<isl_set *, 2> ConditionSets;
1399     auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1400     BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1401     auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1402                        : isl_set_copy(scop->getContext().get());
1403     assert(Dom && "Cannot propagate a nullptr.");
1404     bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1405                                     ConditionSets);
1406     isl_set_free(Dom);
1407 
1408     if (!Valid)
1409       continue;
1410 
1411     isl_set *AssumptionCtx = nullptr;
1412     if (InScop) {
1413       AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1414       isl_set_free(ConditionSets[0]);
1415     } else {
1416       AssumptionCtx = isl_set_complement(ConditionSets[1]);
1417       AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1418     }
1419 
1420     // Project out newly introduced parameters as they are not otherwise useful.
1421     if (!NewParams.empty()) {
1422       for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1423         auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1424         auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1425         isl_id_free(Id);
1426 
1427         if (!NewParams.count(Param))
1428           continue;
1429 
1430         AssumptionCtx =
1431             isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1432       }
1433     }
1434     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1435              << "Use user assumption: "
1436              << stringFromIslObj(AssumptionCtx, "null"));
1437     isl::set newContext =
1438         scop->getContext().intersect(isl::manage(AssumptionCtx));
1439     scop->setContext(newContext);
1440   }
1441 }
1442 
1443 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1444   Value *Val = Inst.getValueOperand();
1445   Type *ElementType = Val->getType();
1446   Value *Address = Inst.getPointerOperand();
1447   const SCEV *AccessFunction =
1448       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1449   const SCEVUnknown *BasePointer =
1450       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1451   enum MemoryAccess::AccessType AccType =
1452       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1453 
1454   if (auto *BitCast = dyn_cast<BitCastInst>(Address)) {
1455     auto *Src = BitCast->getOperand(0);
1456     auto *SrcTy = Src->getType();
1457     auto *DstTy = BitCast->getType();
1458     // Do not try to delinearize non-sized (opaque) pointers.
1459     if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) ||
1460         (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) {
1461       return false;
1462     }
1463     if (SrcTy->isPointerTy() && DstTy->isPointerTy() &&
1464         DL.getTypeAllocSize(SrcTy->getPointerElementType()) ==
1465             DL.getTypeAllocSize(DstTy->getPointerElementType()))
1466       Address = Src;
1467   }
1468 
1469   auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1470   if (!GEP)
1471     return false;
1472 
1473   SmallVector<const SCEV *, 4> Subscripts;
1474   SmallVector<int, 4> Sizes;
1475   getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1476   auto *BasePtr = GEP->getOperand(0);
1477 
1478   if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1479     BasePtr = BasePtrCast->getOperand(0);
1480 
1481   // Check for identical base pointers to ensure that we do not miss index
1482   // offsets that have been added before this GEP is applied.
1483   if (BasePtr != BasePointer->getValue())
1484     return false;
1485 
1486   std::vector<const SCEV *> SizesSCEV;
1487 
1488   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1489 
1490   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1491   for (auto *Subscript : Subscripts) {
1492     InvariantLoadsSetTy AccessILS;
1493     if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1494                       &AccessILS))
1495       return false;
1496 
1497     for (LoadInst *LInst : AccessILS)
1498       if (!ScopRIL.count(LInst))
1499         return false;
1500   }
1501 
1502   if (Sizes.empty())
1503     return false;
1504 
1505   SizesSCEV.push_back(nullptr);
1506 
1507   for (auto V : Sizes)
1508     SizesSCEV.push_back(SE.getSCEV(
1509         ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1510 
1511   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1512                  true, Subscripts, SizesSCEV, Val);
1513   return true;
1514 }
1515 
1516 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1517   if (!PollyDelinearize)
1518     return false;
1519 
1520   Value *Address = Inst.getPointerOperand();
1521   Value *Val = Inst.getValueOperand();
1522   Type *ElementType = Val->getType();
1523   unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1524   enum MemoryAccess::AccessType AccType =
1525       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1526 
1527   const SCEV *AccessFunction =
1528       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1529   const SCEVUnknown *BasePointer =
1530       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1531 
1532   assert(BasePointer && "Could not find base pointer");
1533 
1534   auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1535   auto AccItr = InsnToMemAcc.find(Inst);
1536   if (AccItr == InsnToMemAcc.end())
1537     return false;
1538 
1539   std::vector<const SCEV *> Sizes = {nullptr};
1540 
1541   Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1542                AccItr->second.Shape->DelinearizedSizes.end());
1543 
1544   // In case only the element size is contained in the 'Sizes' array, the
1545   // access does not access a real multi-dimensional array. Hence, we allow
1546   // the normal single-dimensional access construction to handle this.
1547   if (Sizes.size() == 1)
1548     return false;
1549 
1550   // Remove the element size. This information is already provided by the
1551   // ElementSize parameter. In case the element size of this access and the
1552   // element size used for delinearization differs the delinearization is
1553   // incorrect. Hence, we invalidate the scop.
1554   //
1555   // TODO: Handle delinearization with differing element sizes.
1556   auto DelinearizedSize =
1557       cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1558   Sizes.pop_back();
1559   if (ElementSize != DelinearizedSize)
1560     scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1561 
1562   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1563                  true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1564   return true;
1565 }
1566 
1567 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1568   auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1569 
1570   if (MemIntr == nullptr)
1571     return false;
1572 
1573   auto *L = LI.getLoopFor(Inst->getParent());
1574   auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1575   assert(LengthVal);
1576 
1577   // Check if the length val is actually affine or if we overapproximate it
1578   InvariantLoadsSetTy AccessILS;
1579   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1580 
1581   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1582   bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1583                                      LengthVal, SE, &AccessILS);
1584   for (LoadInst *LInst : AccessILS)
1585     if (!ScopRIL.count(LInst))
1586       LengthIsAffine = false;
1587   if (!LengthIsAffine)
1588     LengthVal = nullptr;
1589 
1590   auto *DestPtrVal = MemIntr->getDest();
1591   assert(DestPtrVal);
1592 
1593   auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1594   assert(DestAccFunc);
1595   // Ignore accesses to "NULL".
1596   // TODO: We could use this to optimize the region further, e.g., intersect
1597   //       the context with
1598   //          isl_set_complement(isl_set_params(getDomain()))
1599   //       as we know it would be undefined to execute this instruction anyway.
1600   if (DestAccFunc->isZero())
1601     return true;
1602 
1603   if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1604     if (isa<ConstantPointerNull>(U->getValue()))
1605       return true;
1606   }
1607 
1608   auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1609   assert(DestPtrSCEV);
1610   DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1611   addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1612                  IntegerType::getInt8Ty(DestPtrVal->getContext()),
1613                  LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1614                  Inst.getValueOperand());
1615 
1616   auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1617   if (!MemTrans)
1618     return true;
1619 
1620   auto *SrcPtrVal = MemTrans->getSource();
1621   assert(SrcPtrVal);
1622 
1623   auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1624   assert(SrcAccFunc);
1625   // Ignore accesses to "NULL".
1626   // TODO: See above TODO
1627   if (SrcAccFunc->isZero())
1628     return true;
1629 
1630   auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1631   assert(SrcPtrSCEV);
1632   SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1633   addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1634                  IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1635                  LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1636                  Inst.getValueOperand());
1637 
1638   return true;
1639 }
1640 
1641 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1642   auto *CI = dyn_cast_or_null<CallInst>(Inst);
1643 
1644   if (CI == nullptr)
1645     return false;
1646 
1647   if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1648     return true;
1649 
1650   bool ReadOnly = false;
1651   auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1652   auto *CalledFunction = CI->getCalledFunction();
1653   switch (AA.getModRefBehavior(CalledFunction)) {
1654   case FMRB_UnknownModRefBehavior:
1655     llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
1656   case FMRB_DoesNotAccessMemory:
1657     return true;
1658   case FMRB_OnlyWritesMemory:
1659   case FMRB_OnlyWritesInaccessibleMem:
1660   case FMRB_OnlyWritesInaccessibleOrArgMem:
1661   case FMRB_OnlyAccessesInaccessibleMem:
1662   case FMRB_OnlyAccessesInaccessibleOrArgMem:
1663     return false;
1664   case FMRB_OnlyReadsMemory:
1665   case FMRB_OnlyReadsInaccessibleMem:
1666   case FMRB_OnlyReadsInaccessibleOrArgMem:
1667     GlobalReads.emplace_back(Stmt, CI);
1668     return true;
1669   case FMRB_OnlyReadsArgumentPointees:
1670     ReadOnly = true;
1671     LLVM_FALLTHROUGH;
1672   case FMRB_OnlyWritesArgumentPointees:
1673   case FMRB_OnlyAccessesArgumentPointees: {
1674     auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1675     Loop *L = LI.getLoopFor(Inst->getParent());
1676     for (const auto &Arg : CI->args()) {
1677       if (!Arg->getType()->isPointerTy())
1678         continue;
1679 
1680       auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1681       if (ArgSCEV->isZero())
1682         continue;
1683 
1684       if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1685         if (isa<ConstantPointerNull>(U->getValue()))
1686           return true;
1687       }
1688 
1689       auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1690       addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1691                      ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1692     }
1693     return true;
1694   }
1695   }
1696 
1697   return true;
1698 }
1699 
1700 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1701   Value *Address = Inst.getPointerOperand();
1702   Value *Val = Inst.getValueOperand();
1703   Type *ElementType = Val->getType();
1704   enum MemoryAccess::AccessType AccType =
1705       isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1706 
1707   const SCEV *AccessFunction =
1708       SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1709   const SCEVUnknown *BasePointer =
1710       dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1711 
1712   assert(BasePointer && "Could not find base pointer");
1713   AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1714 
1715   // Check if the access depends on a loop contained in a non-affine subregion.
1716   bool isVariantInNonAffineLoop = false;
1717   SetVector<const Loop *> Loops;
1718   findLoops(AccessFunction, Loops);
1719   for (const Loop *L : Loops)
1720     if (Stmt->contains(L)) {
1721       isVariantInNonAffineLoop = true;
1722       break;
1723     }
1724 
1725   InvariantLoadsSetTy AccessILS;
1726 
1727   Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1728   bool IsAffine = !isVariantInNonAffineLoop &&
1729                   isAffineExpr(&scop->getRegion(), SurroundingLoop,
1730                                AccessFunction, SE, &AccessILS);
1731 
1732   const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1733   for (LoadInst *LInst : AccessILS)
1734     if (!ScopRIL.count(LInst))
1735       IsAffine = false;
1736 
1737   if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1738     AccType = MemoryAccess::MAY_WRITE;
1739 
1740   addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1741                  IsAffine, {AccessFunction}, {nullptr}, Val);
1742 }
1743 
1744 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1745   if (buildAccessMemIntrinsic(Inst, Stmt))
1746     return;
1747 
1748   if (buildAccessCallInst(Inst, Stmt))
1749     return;
1750 
1751   if (buildAccessMultiDimFixed(Inst, Stmt))
1752     return;
1753 
1754   if (buildAccessMultiDimParam(Inst, Stmt))
1755     return;
1756 
1757   buildAccessSingleDim(Inst, Stmt);
1758 }
1759 
1760 void ScopBuilder::buildAccessFunctions() {
1761   for (auto &Stmt : *scop) {
1762     if (Stmt.isBlockStmt()) {
1763       buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1764       continue;
1765     }
1766 
1767     Region *R = Stmt.getRegion();
1768     for (BasicBlock *BB : R->blocks())
1769       buildAccessFunctions(&Stmt, *BB, R);
1770   }
1771 
1772   // Build write accesses for values that are used after the SCoP.
1773   // The instructions defining them might be synthesizable and therefore not
1774   // contained in any statement, hence we iterate over the original instructions
1775   // to identify all escaping values.
1776   for (BasicBlock *BB : scop->getRegion().blocks()) {
1777     for (Instruction &Inst : *BB)
1778       buildEscapingDependences(&Inst);
1779   }
1780 }
1781 
1782 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1783   return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1784          !canSynthesize(Inst, *scop, &SE, L);
1785 }
1786 
1787 /// Generate a name for a statement.
1788 ///
1789 /// @param BB     The basic block the statement will represent.
1790 /// @param BBIdx  The index of the @p BB relative to other BBs/regions.
1791 /// @param Count  The index of the created statement in @p BB.
1792 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1793 ///               no suffix will be added.
1794 /// @param IsLast Uses a special indicator for the last statement of a BB.
1795 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1796                                 bool IsMain, bool IsLast = false) {
1797   std::string Suffix;
1798   if (!IsMain) {
1799     if (UseInstructionNames)
1800       Suffix = '_';
1801     if (IsLast)
1802       Suffix += "last";
1803     else if (Count < 26)
1804       Suffix += 'a' + Count;
1805     else
1806       Suffix += std::to_string(Count);
1807   }
1808   return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1809 }
1810 
1811 /// Generate a name for a statement that represents a non-affine subregion.
1812 ///
1813 /// @param R    The region the statement will represent.
1814 /// @param RIdx The index of the @p R relative to other BBs/regions.
1815 static std::string makeStmtName(Region *R, long RIdx) {
1816   return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1817                               UseInstructionNames);
1818 }
1819 
1820 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1821   Loop *SurroundingLoop = LI.getLoopFor(BB);
1822 
1823   int Count = 0;
1824   long BBIdx = scop->getNextStmtIdx();
1825   std::vector<Instruction *> Instructions;
1826   for (Instruction &Inst : *BB) {
1827     if (shouldModelInst(&Inst, SurroundingLoop))
1828       Instructions.push_back(&Inst);
1829     if (Inst.getMetadata("polly_split_after") ||
1830         (SplitOnStore && isa<StoreInst>(Inst))) {
1831       std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1832       scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1833       Count++;
1834       Instructions.clear();
1835     }
1836   }
1837 
1838   std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1839   scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1840 }
1841 
1842 /// Is @p Inst an ordered instruction?
1843 ///
1844 /// An unordered instruction is an instruction, such that a sequence of
1845 /// unordered instructions can be permuted without changing semantics. Any
1846 /// instruction for which this is not always the case is ordered.
1847 static bool isOrderedInstruction(Instruction *Inst) {
1848   return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1849 }
1850 
1851 /// Join instructions to the same statement if one uses the scalar result of the
1852 /// other.
1853 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1854                             ArrayRef<Instruction *> ModeledInsts) {
1855   for (Instruction *Inst : ModeledInsts) {
1856     if (isa<PHINode>(Inst))
1857       continue;
1858 
1859     for (Use &Op : Inst->operands()) {
1860       Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1861       if (!OpInst)
1862         continue;
1863 
1864       // Check if OpInst is in the BB and is a modeled instruction.
1865       auto OpVal = UnionFind.findValue(OpInst);
1866       if (OpVal == UnionFind.end())
1867         continue;
1868 
1869       UnionFind.unionSets(Inst, OpInst);
1870     }
1871   }
1872 }
1873 
1874 /// Ensure that the order of ordered instructions does not change.
1875 ///
1876 /// If we encounter an ordered instruction enclosed in instructions belonging to
1877 /// a different statement (which might as well contain ordered instructions, but
1878 /// this is not tested here), join them.
1879 static void
1880 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1881                         ArrayRef<Instruction *> ModeledInsts) {
1882   SetVector<Instruction *> SeenLeaders;
1883   for (Instruction *Inst : ModeledInsts) {
1884     if (!isOrderedInstruction(Inst))
1885       continue;
1886 
1887     Instruction *Leader = UnionFind.getLeaderValue(Inst);
1888     // Since previous iterations might have merged sets, some items in
1889     // SeenLeaders are not leaders anymore. However, The new leader of
1890     // previously merged instructions must be one of the former leaders of
1891     // these merged instructions.
1892     bool Inserted = SeenLeaders.insert(Leader);
1893     if (Inserted)
1894       continue;
1895 
1896     // Merge statements to close holes. Say, we have already seen statements A
1897     // and B, in this order. Then we see an instruction of A again and we would
1898     // see the pattern "A B A". This function joins all statements until the
1899     // only seen occurrence of A.
1900     for (Instruction *Prev : reverse(SeenLeaders)) {
1901       // We are backtracking from the last element until we see Inst's leader
1902       // in SeenLeaders and merge all into one set. Although leaders of
1903       // instructions change during the execution of this loop, it's irrelevant
1904       // as we are just searching for the element that we already confirmed is
1905       // in the list.
1906       if (Prev == Leader)
1907         break;
1908       UnionFind.unionSets(Prev, Leader);
1909     }
1910   }
1911 }
1912 
1913 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1914 /// the incoming values from this block are executed after the PHI READ.
1915 ///
1916 /// Otherwise it could overwrite the incoming value from before the BB with the
1917 /// value for the next execution. This can happen if the PHI WRITE is added to
1918 /// the statement with the instruction that defines the incoming value (instead
1919 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1920 /// are in order, we put both into the statement. PHI WRITEs are always executed
1921 /// after PHI READs when they are in the same statement.
1922 ///
1923 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1924 /// WRITE is not put into a statement containing the PHI itself. That could also
1925 /// be done by
1926 /// - having all (strongly connected) PHIs in a single statement,
1927 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1928 ///   has a chance of being lifted before a PHI by being in a statement with a
1929 ///   PHI that comes before in the basic block), or
1930 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1931 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1932                             ArrayRef<Instruction *> ModeledInsts) {
1933   for (Instruction *Inst : ModeledInsts) {
1934     PHINode *PHI = dyn_cast<PHINode>(Inst);
1935     if (!PHI)
1936       continue;
1937 
1938     int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1939     if (Idx < 0)
1940       continue;
1941 
1942     Instruction *IncomingVal =
1943         dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1944     if (!IncomingVal)
1945       continue;
1946 
1947     UnionFind.unionSets(PHI, IncomingVal);
1948   }
1949 }
1950 
1951 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1952   Loop *L = LI.getLoopFor(BB);
1953 
1954   // Extracting out modeled instructions saves us from checking
1955   // shouldModelInst() repeatedly.
1956   SmallVector<Instruction *, 32> ModeledInsts;
1957   EquivalenceClasses<Instruction *> UnionFind;
1958   Instruction *MainInst = nullptr, *MainLeader = nullptr;
1959   for (Instruction &Inst : *BB) {
1960     if (!shouldModelInst(&Inst, L))
1961       continue;
1962     ModeledInsts.push_back(&Inst);
1963     UnionFind.insert(&Inst);
1964 
1965     // When a BB is split into multiple statements, the main statement is the
1966     // one containing the 'main' instruction. We select the first instruction
1967     // that is unlikely to be removed (because it has side-effects) as the main
1968     // one. It is used to ensure that at least one statement from the bb has the
1969     // same name as with -polly-stmt-granularity=bb.
1970     if (!MainInst && (isa<StoreInst>(Inst) ||
1971                       (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1972       MainInst = &Inst;
1973   }
1974 
1975   joinOperandTree(UnionFind, ModeledInsts);
1976   joinOrderedInstructions(UnionFind, ModeledInsts);
1977   joinOrderedPHIs(UnionFind, ModeledInsts);
1978 
1979   // The list of instructions for statement (statement represented by the leader
1980   // instruction).
1981   MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1982 
1983   // The order of statements must be preserved w.r.t. their ordered
1984   // instructions. Without this explicit scan, we would also use non-ordered
1985   // instructions (whose order is arbitrary) to determine statement order.
1986   for (Instruction *Inst : ModeledInsts) {
1987     if (!isOrderedInstruction(Inst))
1988       continue;
1989 
1990     auto LeaderIt = UnionFind.findLeader(Inst);
1991     if (LeaderIt == UnionFind.member_end())
1992       continue;
1993 
1994     // Insert element for the leader instruction.
1995     (void)LeaderToInstList[*LeaderIt];
1996   }
1997 
1998   // Collect the instructions of all leaders. UnionFind's member iterator
1999   // unfortunately are not in any specific order.
2000   for (Instruction *Inst : ModeledInsts) {
2001     auto LeaderIt = UnionFind.findLeader(Inst);
2002     if (LeaderIt == UnionFind.member_end())
2003       continue;
2004 
2005     if (Inst == MainInst)
2006       MainLeader = *LeaderIt;
2007     std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2008     InstList.push_back(Inst);
2009   }
2010 
2011   // Finally build the statements.
2012   int Count = 0;
2013   long BBIdx = scop->getNextStmtIdx();
2014   for (auto &Instructions : LeaderToInstList) {
2015     std::vector<Instruction *> &InstList = Instructions.second;
2016 
2017     // If there is no main instruction, make the first statement the main.
2018     bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2019 
2020     std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2021     scop->addScopStmt(BB, Name, L, std::move(InstList));
2022     Count += 1;
2023   }
2024 
2025   // Unconditionally add an epilogue (last statement). It contains no
2026   // instructions, but holds the PHI write accesses for successor basic blocks,
2027   // if the incoming value is not defined in another statement if the same BB.
2028   // The epilogue becomes the main statement only if there is no other
2029   // statement that could become main.
2030   // The epilogue will be removed if no PHIWrite is added to it.
2031   std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2032   scop->addScopStmt(BB, EpilogueName, L, {});
2033 }
2034 
2035 void ScopBuilder::buildStmts(Region &SR) {
2036   if (scop->isNonAffineSubRegion(&SR)) {
2037     std::vector<Instruction *> Instructions;
2038     Loop *SurroundingLoop =
2039         getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2040     for (Instruction &Inst : *SR.getEntry())
2041       if (shouldModelInst(&Inst, SurroundingLoop))
2042         Instructions.push_back(&Inst);
2043     long RIdx = scop->getNextStmtIdx();
2044     std::string Name = makeStmtName(&SR, RIdx);
2045     scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2046     return;
2047   }
2048 
2049   for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2050     if (I->isSubRegion())
2051       buildStmts(*I->getNodeAs<Region>());
2052     else {
2053       BasicBlock *BB = I->getNodeAs<BasicBlock>();
2054       switch (StmtGranularity) {
2055       case GranularityChoice::BasicBlocks:
2056         buildSequentialBlockStmts(BB);
2057         break;
2058       case GranularityChoice::ScalarIndependence:
2059         buildEqivClassBlockStmts(BB);
2060         break;
2061       case GranularityChoice::Stores:
2062         buildSequentialBlockStmts(BB, true);
2063         break;
2064       }
2065     }
2066 }
2067 
2068 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2069                                        Region *NonAffineSubRegion) {
2070   assert(
2071       Stmt &&
2072       "The exit BB is the only one that cannot be represented by a statement");
2073   assert(Stmt->represents(&BB));
2074 
2075   // We do not build access functions for error blocks, as they may contain
2076   // instructions we can not model.
2077   if (SD.isErrorBlock(BB, scop->getRegion()))
2078     return;
2079 
2080   auto BuildAccessesForInst = [this, Stmt,
2081                                NonAffineSubRegion](Instruction *Inst) {
2082     PHINode *PHI = dyn_cast<PHINode>(Inst);
2083     if (PHI)
2084       buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2085 
2086     if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2087       assert(Stmt && "Cannot build access function in non-existing statement");
2088       buildMemoryAccess(MemInst, Stmt);
2089     }
2090 
2091     // PHI nodes have already been modeled above and terminators that are
2092     // not part of a non-affine subregion are fully modeled and regenerated
2093     // from the polyhedral domains. Hence, they do not need to be modeled as
2094     // explicit data dependences.
2095     if (!PHI)
2096       buildScalarDependences(Stmt, Inst);
2097   };
2098 
2099   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2100   bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2101   if (IsEntryBlock) {
2102     for (Instruction *Inst : Stmt->getInstructions())
2103       BuildAccessesForInst(Inst);
2104     if (Stmt->isRegionStmt())
2105       BuildAccessesForInst(BB.getTerminator());
2106   } else {
2107     for (Instruction &Inst : BB) {
2108       if (isIgnoredIntrinsic(&Inst))
2109         continue;
2110 
2111       // Invariant loads already have been processed.
2112       if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2113         continue;
2114 
2115       BuildAccessesForInst(&Inst);
2116     }
2117   }
2118 }
2119 
2120 MemoryAccess *ScopBuilder::addMemoryAccess(
2121     ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2122     Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2123     ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2124     MemoryKind Kind) {
2125   bool isKnownMustAccess = false;
2126 
2127   // Accesses in single-basic block statements are always executed.
2128   if (Stmt->isBlockStmt())
2129     isKnownMustAccess = true;
2130 
2131   if (Stmt->isRegionStmt()) {
2132     // Accesses that dominate the exit block of a non-affine region are always
2133     // executed. In non-affine regions there may exist MemoryKind::Values that
2134     // do not dominate the exit. MemoryKind::Values will always dominate the
2135     // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2136     // non-affine region.
2137     if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2138       isKnownMustAccess = true;
2139   }
2140 
2141   // Non-affine PHI writes do not "happen" at a particular instruction, but
2142   // after exiting the statement. Therefore they are guaranteed to execute and
2143   // overwrite the old value.
2144   if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2145     isKnownMustAccess = true;
2146 
2147   if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2148     AccType = MemoryAccess::MAY_WRITE;
2149 
2150   auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2151                                   Affine, Subscripts, Sizes, AccessValue, Kind);
2152 
2153   scop->addAccessFunction(Access);
2154   Stmt->addAccess(Access);
2155   return Access;
2156 }
2157 
2158 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2159                                  MemoryAccess::AccessType AccType,
2160                                  Value *BaseAddress, Type *ElementType,
2161                                  bool IsAffine,
2162                                  ArrayRef<const SCEV *> Subscripts,
2163                                  ArrayRef<const SCEV *> Sizes,
2164                                  Value *AccessValue) {
2165   ArrayBasePointers.insert(BaseAddress);
2166   addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
2167                   AccessValue, Subscripts, Sizes, MemoryKind::Array);
2168 }
2169 
2170 /// Check if @p Expr is divisible by @p Size.
2171 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2172   assert(Size != 0);
2173   if (Size == 1)
2174     return true;
2175 
2176   // Only one factor needs to be divisible.
2177   if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2178     for (auto *FactorExpr : MulExpr->operands())
2179       if (isDivisible(FactorExpr, Size, SE))
2180         return true;
2181     return false;
2182   }
2183 
2184   // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2185   // to be divisible.
2186   if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2187     for (auto *OpExpr : NAryExpr->operands())
2188       if (!isDivisible(OpExpr, Size, SE))
2189         return false;
2190     return true;
2191   }
2192 
2193   auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2194   auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2195   auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2196   return MulSCEV == Expr;
2197 }
2198 
2199 void ScopBuilder::foldSizeConstantsToRight() {
2200   isl::union_set Accessed = scop->getAccesses().range();
2201 
2202   for (auto Array : scop->arrays()) {
2203     if (Array->getNumberOfDimensions() <= 1)
2204       continue;
2205 
2206     isl::space Space = Array->getSpace();
2207     Space = Space.align_params(Accessed.get_space());
2208 
2209     if (!Accessed.contains(Space))
2210       continue;
2211 
2212     isl::set Elements = Accessed.extract_set(Space);
2213     isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2214 
2215     std::vector<int> Int;
2216     unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
2217     for (unsigned i = 0; i < Dims; i++) {
2218       isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2219       DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2220       DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2221 
2222       isl::basic_set DimHull = DimOnly.affine_hull();
2223 
2224       if (i == Dims - 1) {
2225         Int.push_back(1);
2226         Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2227         continue;
2228       }
2229 
2230       if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
2231         isl::aff Diff = DimHull.get_div(0);
2232         isl::val Val = Diff.get_denominator_val();
2233 
2234         int ValInt = 1;
2235         if (Val.is_int()) {
2236           auto ValAPInt = APIntFromVal(Val);
2237           if (ValAPInt.isSignedIntN(32))
2238             ValInt = ValAPInt.getSExtValue();
2239         } else {
2240         }
2241 
2242         Int.push_back(ValInt);
2243         isl::constraint C = isl::constraint::alloc_equality(
2244             isl::local_space(Transform.get_space()));
2245         C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2246         C = C.set_coefficient_si(isl::dim::in, i, -1);
2247         Transform = Transform.add_constraint(C);
2248         continue;
2249       }
2250 
2251       isl::basic_set ZeroSet = isl::basic_set(DimHull);
2252       ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2253 
2254       int ValInt = 1;
2255       if (ZeroSet.is_equal(DimHull)) {
2256         ValInt = 0;
2257       }
2258 
2259       Int.push_back(ValInt);
2260       Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2261     }
2262 
2263     isl::set MappedElements = isl::map(Transform).domain();
2264     if (!Elements.is_subset(MappedElements))
2265       continue;
2266 
2267     bool CanFold = true;
2268     if (Int[0] <= 1)
2269       CanFold = false;
2270 
2271     unsigned NumDims = Array->getNumberOfDimensions();
2272     for (unsigned i = 1; i < NumDims - 1; i++)
2273       if (Int[0] != Int[i] && Int[i])
2274         CanFold = false;
2275 
2276     if (!CanFold)
2277       continue;
2278 
2279     for (auto &Access : scop->access_functions())
2280       if (Access->getScopArrayInfo() == Array)
2281         Access->setAccessRelation(
2282             Access->getAccessRelation().apply_range(Transform));
2283 
2284     std::vector<const SCEV *> Sizes;
2285     for (unsigned i = 0; i < NumDims; i++) {
2286       auto Size = Array->getDimensionSize(i);
2287 
2288       if (i == NumDims - 1)
2289         Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2290       Sizes.push_back(Size);
2291     }
2292 
2293     Array->updateSizes(Sizes, false /* CheckConsistency */);
2294   }
2295 }
2296 
2297 void ScopBuilder::finalizeAccesses() {
2298   updateAccessDimensionality();
2299   foldSizeConstantsToRight();
2300   foldAccessRelations();
2301   assumeNoOutOfBounds();
2302 }
2303 
2304 void ScopBuilder::updateAccessDimensionality() {
2305   // Check all array accesses for each base pointer and find a (virtual) element
2306   // size for the base pointer that divides all access functions.
2307   for (ScopStmt &Stmt : *scop)
2308     for (MemoryAccess *Access : Stmt) {
2309       if (!Access->isArrayKind())
2310         continue;
2311       ScopArrayInfo *Array =
2312           const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2313 
2314       if (Array->getNumberOfDimensions() != 1)
2315         continue;
2316       unsigned DivisibleSize = Array->getElemSizeInBytes();
2317       const SCEV *Subscript = Access->getSubscript(0);
2318       while (!isDivisible(Subscript, DivisibleSize, SE))
2319         DivisibleSize /= 2;
2320       auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2321       Array->updateElementType(Ty);
2322     }
2323 
2324   for (auto &Stmt : *scop)
2325     for (auto &Access : Stmt)
2326       Access->updateDimensionality();
2327 }
2328 
2329 void ScopBuilder::foldAccessRelations() {
2330   for (auto &Stmt : *scop)
2331     for (auto &Access : Stmt)
2332       Access->foldAccessRelation();
2333 }
2334 
2335 void ScopBuilder::assumeNoOutOfBounds() {
2336   if (PollyIgnoreInbounds)
2337     return;
2338   for (auto &Stmt : *scop)
2339     for (auto &Access : Stmt) {
2340       isl::set Outside = Access->assumeNoOutOfBound();
2341       const auto &Loc = Access->getAccessInstruction()
2342                             ? Access->getAccessInstruction()->getDebugLoc()
2343                             : DebugLoc();
2344       recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
2345                        AS_ASSUMPTION);
2346     }
2347 }
2348 
2349 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2350   // Find the statement that defines the value of Inst. That statement has to
2351   // write the value to make it available to those statements that read it.
2352   ScopStmt *Stmt = scop->getStmtFor(Inst);
2353 
2354   // It is possible that the value is synthesizable within a loop (such that it
2355   // is not part of any statement), but not after the loop (where you need the
2356   // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2357   // avoid this. In case the IR has no such PHI, use the last statement (where
2358   // the value is synthesizable) to write the value.
2359   if (!Stmt)
2360     Stmt = scop->getLastStmtFor(Inst->getParent());
2361 
2362   // Inst not defined within this SCoP.
2363   if (!Stmt)
2364     return;
2365 
2366   // Do not process further if the instruction is already written.
2367   if (Stmt->lookupValueWriteOf(Inst))
2368     return;
2369 
2370   addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2371                   true, Inst, ArrayRef<const SCEV *>(),
2372                   ArrayRef<const SCEV *>(), MemoryKind::Value);
2373 }
2374 
2375 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2376   // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2377   // to be able to replace this one. Currently, there is a split responsibility.
2378   // In a first step, the MemoryAccess is created, but without the
2379   // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2380   // AccessRelation is created. At least for scalar accesses, there is no new
2381   // information available at ScopStmt::buildAccessRelations(), so we could
2382   // create the AccessRelation right away. This is what
2383   // ScopStmt::ensureValueRead(Value*) does.
2384 
2385   auto *Scope = UserStmt->getSurroundingLoop();
2386   auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2387   switch (VUse.getKind()) {
2388   case VirtualUse::Constant:
2389   case VirtualUse::Block:
2390   case VirtualUse::Synthesizable:
2391   case VirtualUse::Hoisted:
2392   case VirtualUse::Intra:
2393     // Uses of these kinds do not need a MemoryAccess.
2394     break;
2395 
2396   case VirtualUse::ReadOnly:
2397     // Add MemoryAccess for invariant values only if requested.
2398     if (!ModelReadOnlyScalars)
2399       break;
2400 
2401     LLVM_FALLTHROUGH;
2402   case VirtualUse::Inter:
2403 
2404     // Do not create another MemoryAccess for reloading the value if one already
2405     // exists.
2406     if (UserStmt->lookupValueReadOf(V))
2407       break;
2408 
2409     addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2410                     true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2411                     MemoryKind::Value);
2412 
2413     // Inter-statement uses need to write the value in their defining statement.
2414     if (VUse.isInter())
2415       ensureValueWrite(cast<Instruction>(V));
2416     break;
2417   }
2418 }
2419 
2420 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2421                                  BasicBlock *IncomingBlock,
2422                                  Value *IncomingValue, bool IsExitBlock) {
2423   // As the incoming block might turn out to be an error statement ensure we
2424   // will create an exit PHI SAI object. It is needed during code generation
2425   // and would be created later anyway.
2426   if (IsExitBlock)
2427     scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2428                                    MemoryKind::ExitPHI);
2429 
2430   // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2431   // from outside the SCoP's region have no statement representation.
2432   if (!IncomingStmt)
2433     return;
2434 
2435   // Take care for the incoming value being available in the incoming block.
2436   // This must be done before the check for multiple PHI writes because multiple
2437   // exiting edges from subregion each can be the effective written value of the
2438   // subregion. As such, all of them must be made available in the subregion
2439   // statement.
2440   ensureValueRead(IncomingValue, IncomingStmt);
2441 
2442   // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2443   if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2444     assert(Acc->getAccessInstruction() == PHI);
2445     Acc->addIncoming(IncomingBlock, IncomingValue);
2446     return;
2447   }
2448 
2449   MemoryAccess *Acc = addMemoryAccess(
2450       IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2451       PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2452       IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2453   assert(Acc);
2454   Acc->addIncoming(IncomingBlock, IncomingValue);
2455 }
2456 
2457 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2458   addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2459                   PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2460                   MemoryKind::PHI);
2461 }
2462 
2463 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2464   isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2465 
2466   Stmt.Domain = scop->getDomainConditions(&Stmt);
2467   Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2468 }
2469 
2470 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2471   isl::set Domain = Stmt.getDomain();
2472   BasicBlock *BB = Stmt.getEntryBlock();
2473 
2474   Loop *L = LI.getLoopFor(BB);
2475 
2476   while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2477     L = L->getParentLoop();
2478 
2479   SmallVector<llvm::Loop *, 8> Loops;
2480 
2481   while (L && Stmt.getParent()->getRegion().contains(L)) {
2482     Loops.push_back(L);
2483     L = L->getParentLoop();
2484   }
2485 
2486   Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2487 }
2488 
2489 /// Return the reduction type for a given binary operator.
2490 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2491                                                     const Instruction *Load) {
2492   if (!BinOp)
2493     return MemoryAccess::RT_NONE;
2494   switch (BinOp->getOpcode()) {
2495   case Instruction::FAdd:
2496     if (!BinOp->isFast())
2497       return MemoryAccess::RT_NONE;
2498     LLVM_FALLTHROUGH;
2499   case Instruction::Add:
2500     return MemoryAccess::RT_ADD;
2501   case Instruction::Or:
2502     return MemoryAccess::RT_BOR;
2503   case Instruction::Xor:
2504     return MemoryAccess::RT_BXOR;
2505   case Instruction::And:
2506     return MemoryAccess::RT_BAND;
2507   case Instruction::FMul:
2508     if (!BinOp->isFast())
2509       return MemoryAccess::RT_NONE;
2510     LLVM_FALLTHROUGH;
2511   case Instruction::Mul:
2512     if (DisableMultiplicativeReductions)
2513       return MemoryAccess::RT_NONE;
2514     return MemoryAccess::RT_MUL;
2515   default:
2516     return MemoryAccess::RT_NONE;
2517   }
2518 }
2519 
2520 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2521   SmallVector<MemoryAccess *, 2> Loads;
2522   SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2523 
2524   // First collect candidate load-store reduction chains by iterating over all
2525   // stores and collecting possible reduction loads.
2526   for (MemoryAccess *StoreMA : Stmt) {
2527     if (StoreMA->isRead())
2528       continue;
2529 
2530     Loads.clear();
2531     collectCandidateReductionLoads(StoreMA, Loads);
2532     for (MemoryAccess *LoadMA : Loads)
2533       Candidates.push_back(std::make_pair(LoadMA, StoreMA));
2534   }
2535 
2536   // Then check each possible candidate pair.
2537   for (const auto &CandidatePair : Candidates) {
2538     bool Valid = true;
2539     isl::map LoadAccs = CandidatePair.first->getAccessRelation();
2540     isl::map StoreAccs = CandidatePair.second->getAccessRelation();
2541 
2542     // Skip those with obviously unequal base addresses.
2543     if (!LoadAccs.has_equal_space(StoreAccs)) {
2544       continue;
2545     }
2546 
2547     // And check if the remaining for overlap with other memory accesses.
2548     isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2549     AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
2550     isl::set AllAccs = AllAccsRel.range();
2551 
2552     for (MemoryAccess *MA : Stmt) {
2553       if (MA == CandidatePair.first || MA == CandidatePair.second)
2554         continue;
2555 
2556       isl::map AccRel =
2557           MA->getAccessRelation().intersect_domain(Stmt.getDomain());
2558       isl::set Accs = AccRel.range();
2559 
2560       if (AllAccs.has_equal_space(Accs)) {
2561         isl::set OverlapAccs = Accs.intersect(AllAccs);
2562         Valid = Valid && OverlapAccs.is_empty();
2563       }
2564     }
2565 
2566     if (!Valid)
2567       continue;
2568 
2569     const LoadInst *Load =
2570         dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
2571     MemoryAccess::ReductionType RT =
2572         getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
2573 
2574     // If no overlapping access was found we mark the load and store as
2575     // reduction like.
2576     CandidatePair.first->markAsReductionLike(RT);
2577     CandidatePair.second->markAsReductionLike(RT);
2578   }
2579 }
2580 
2581 void ScopBuilder::verifyInvariantLoads() {
2582   auto &RIL = scop->getRequiredInvariantLoads();
2583   for (LoadInst *LI : RIL) {
2584     assert(LI && scop->contains(LI));
2585     // If there exists a statement in the scop which has a memory access for
2586     // @p LI, then mark this scop as infeasible for optimization.
2587     for (ScopStmt &Stmt : *scop)
2588       if (Stmt.getArrayAccessOrNULLFor(LI)) {
2589         scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2590         return;
2591       }
2592   }
2593 }
2594 
2595 void ScopBuilder::hoistInvariantLoads() {
2596   if (!PollyInvariantLoadHoisting)
2597     return;
2598 
2599   isl::union_map Writes = scop->getWrites();
2600   for (ScopStmt &Stmt : *scop) {
2601     InvariantAccessesTy InvariantAccesses;
2602 
2603     for (MemoryAccess *Access : Stmt) {
2604       isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2605       if (!NHCtx.is_null())
2606         InvariantAccesses.push_back({Access, NHCtx});
2607     }
2608 
2609     // Transfer the memory access from the statement to the SCoP.
2610     for (auto InvMA : InvariantAccesses)
2611       Stmt.removeMemoryAccess(InvMA.MA);
2612     addInvariantLoads(Stmt, InvariantAccesses);
2613   }
2614 }
2615 
2616 /// Check if an access range is too complex.
2617 ///
2618 /// An access range is too complex, if it contains either many disjuncts or
2619 /// very complex expressions. As a simple heuristic, we assume if a set to
2620 /// be too complex if the sum of existentially quantified dimensions and
2621 /// set dimensions is larger than a threshold. This reliably detects both
2622 /// sets with many disjuncts as well as sets with many divisions as they
2623 /// arise in h264.
2624 ///
2625 /// @param AccessRange The range to check for complexity.
2626 ///
2627 /// @returns True if the access range is too complex.
2628 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2629   unsigned NumTotalDims = 0;
2630 
2631   for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2632     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
2633     NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
2634   }
2635 
2636   if (NumTotalDims > MaxDimensionsInAccessRange)
2637     return true;
2638 
2639   return false;
2640 }
2641 
2642 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2643                                                isl::union_map Writes) {
2644   if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2645     return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2646   }
2647 
2648   Value *BaseAddr = MA->getOriginalBaseAddr();
2649   if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2650     if (!isa<LoadInst>(BasePtrInst))
2651       return scop->contains(BasePtrInst);
2652 
2653   return false;
2654 }
2655 
2656 void ScopBuilder::addUserContext() {
2657   if (UserContextStr.empty())
2658     return;
2659 
2660   isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2661   isl::space Space = scop->getParamSpace();
2662   isl::size SpaceParams = Space.dim(isl::dim::param);
2663   if (unsignedFromIslSize(SpaceParams) !=
2664       unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
2665     std::string SpaceStr = stringFromIslObj(Space, "null");
2666     errs() << "Error: the context provided in -polly-context has not the same "
2667            << "number of dimensions than the computed context. Due to this "
2668            << "mismatch, the -polly-context option is ignored. Please provide "
2669            << "the context in the parameter space: " << SpaceStr << ".\n";
2670     return;
2671   }
2672 
2673   for (auto i : rangeIslSize(0, SpaceParams)) {
2674     std::string NameContext =
2675         scop->getContext().get_dim_name(isl::dim::param, i);
2676     std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2677 
2678     if (NameContext != NameUserContext) {
2679       std::string SpaceStr = stringFromIslObj(Space, "null");
2680       errs() << "Error: the name of dimension " << i
2681              << " provided in -polly-context "
2682              << "is '" << NameUserContext << "', but the name in the computed "
2683              << "context is '" << NameContext
2684              << "'. Due to this name mismatch, "
2685              << "the -polly-context option is ignored. Please provide "
2686              << "the context in the parameter space: " << SpaceStr << ".\n";
2687       return;
2688     }
2689 
2690     UserContext = UserContext.set_dim_id(isl::dim::param, i,
2691                                          Space.get_dim_id(isl::dim::param, i));
2692   }
2693   isl::set newContext = scop->getContext().intersect(UserContext);
2694   scop->setContext(newContext);
2695 }
2696 
2697 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2698                                          isl::union_map Writes) {
2699   // TODO: Loads that are not loop carried, hence are in a statement with
2700   //       zero iterators, are by construction invariant, though we
2701   //       currently "hoist" them anyway. This is necessary because we allow
2702   //       them to be treated as parameters (e.g., in conditions) and our code
2703   //       generation would otherwise use the old value.
2704 
2705   auto &Stmt = *Access->getStatement();
2706   BasicBlock *BB = Stmt.getEntryBlock();
2707 
2708   if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2709       Access->isMemoryIntrinsic())
2710     return {};
2711 
2712   // Skip accesses that have an invariant base pointer which is defined but
2713   // not loaded inside the SCoP. This can happened e.g., if a readnone call
2714   // returns a pointer that is used as a base address. However, as we want
2715   // to hoist indirect pointers, we allow the base pointer to be defined in
2716   // the region if it is also a memory access. Each ScopArrayInfo object
2717   // that has a base pointer origin has a base pointer that is loaded and
2718   // that it is invariant, thus it will be hoisted too. However, if there is
2719   // no base pointer origin we check that the base pointer is defined
2720   // outside the region.
2721   auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2722   if (hasNonHoistableBasePtrInScop(Access, Writes))
2723     return {};
2724 
2725   isl::map AccessRelation = Access->getAccessRelation();
2726   assert(!AccessRelation.is_empty());
2727 
2728   if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2729     return {};
2730 
2731   AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2732   isl::set SafeToLoad;
2733 
2734   auto &DL = scop->getFunction().getParent()->getDataLayout();
2735   if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2736                                   LI->getAlign(), DL)) {
2737     SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2738   } else if (BB != LI->getParent()) {
2739     // Skip accesses in non-affine subregions as they might not be executed
2740     // under the same condition as the entry of the non-affine subregion.
2741     return {};
2742   } else {
2743     SafeToLoad = AccessRelation.range();
2744   }
2745 
2746   if (isAccessRangeTooComplex(AccessRelation.range()))
2747     return {};
2748 
2749   isl::union_map Written = Writes.intersect_range(SafeToLoad);
2750   isl::set WrittenCtx = Written.params();
2751   bool IsWritten = !WrittenCtx.is_empty();
2752 
2753   if (!IsWritten)
2754     return WrittenCtx;
2755 
2756   WrittenCtx = WrittenCtx.remove_divs();
2757   bool TooComplex =
2758       unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2759   if (TooComplex || !isRequiredInvariantLoad(LI))
2760     return {};
2761 
2762   scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2763                       AS_RESTRICTION, LI->getParent());
2764   return WrittenCtx;
2765 }
2766 
2767 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2768   for (const llvm::Argument &Arg : F.args())
2769     if (&Arg == maybeParam)
2770       return true;
2771 
2772   return false;
2773 }
2774 
2775 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2776                                      bool StmtInvalidCtxIsEmpty,
2777                                      bool MAInvalidCtxIsEmpty,
2778                                      bool NonHoistableCtxIsEmpty) {
2779   LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2780   const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2781   if (PollyAllowDereferenceOfAllFunctionParams &&
2782       isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2783     return true;
2784 
2785   // TODO: We can provide more information for better but more expensive
2786   //       results.
2787   if (!isDereferenceableAndAlignedPointer(
2788           LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2789     return false;
2790 
2791   // If the location might be overwritten we do not hoist it unconditionally.
2792   //
2793   // TODO: This is probably too conservative.
2794   if (!NonHoistableCtxIsEmpty)
2795     return false;
2796 
2797   // If a dereferenceable load is in a statement that is modeled precisely we
2798   // can hoist it.
2799   if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2800     return true;
2801 
2802   // Even if the statement is not modeled precisely we can hoist the load if it
2803   // does not involve any parameters that might have been specialized by the
2804   // statement domain.
2805   for (const SCEV *Subscript : MA->subscripts())
2806     if (!isa<SCEVConstant>(Subscript))
2807       return false;
2808   return true;
2809 }
2810 
2811 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2812                                     InvariantAccessesTy &InvMAs) {
2813   if (InvMAs.empty())
2814     return;
2815 
2816   isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2817   bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2818 
2819   // Get the context under which the statement is executed but remove the error
2820   // context under which this statement is reached.
2821   isl::set DomainCtx = Stmt.getDomain().params();
2822   DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
2823 
2824   if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
2825     auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2826     scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
2827     return;
2828   }
2829 
2830   // Project out all parameters that relate to loads in the statement. Otherwise
2831   // we could have cyclic dependences on the constraints under which the
2832   // hoisted loads are executed and we could not determine an order in which to
2833   // pre-load them. This happens because not only lower bounds are part of the
2834   // domain but also upper bounds.
2835   for (auto &InvMA : InvMAs) {
2836     auto *MA = InvMA.MA;
2837     Instruction *AccInst = MA->getAccessInstruction();
2838     if (SE.isSCEVable(AccInst->getType())) {
2839       SetVector<Value *> Values;
2840       for (const SCEV *Parameter : scop->parameters()) {
2841         Values.clear();
2842         findValues(Parameter, SE, Values);
2843         if (!Values.count(AccInst))
2844           continue;
2845 
2846         isl::id ParamId = scop->getIdForParam(Parameter);
2847         if (!ParamId.is_null()) {
2848           int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
2849           if (Dim >= 0)
2850             DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
2851         }
2852       }
2853     }
2854   }
2855 
2856   for (auto &InvMA : InvMAs) {
2857     auto *MA = InvMA.MA;
2858     isl::set NHCtx = InvMA.NonHoistableCtx;
2859 
2860     // Check for another invariant access that accesses the same location as
2861     // MA and if found consolidate them. Otherwise create a new equivalence
2862     // class at the end of InvariantEquivClasses.
2863     LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2864     Type *Ty = LInst->getType();
2865     const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
2866 
2867     isl::set MAInvalidCtx = MA->getInvalidContext();
2868     bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2869     bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2870 
2871     isl::set MACtx;
2872     // Check if we know that this pointer can be speculatively accessed.
2873     if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2874                            NonHoistableCtxIsEmpty)) {
2875       MACtx = isl::set::universe(DomainCtx.get_space());
2876     } else {
2877       MACtx = DomainCtx;
2878       MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
2879       MACtx = MACtx.gist_params(scop->getContext());
2880     }
2881 
2882     bool Consolidated = false;
2883     for (auto &IAClass : scop->invariantEquivClasses()) {
2884       if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
2885         continue;
2886 
2887       // If the pointer and the type is equal check if the access function wrt.
2888       // to the domain is equal too. It can happen that the domain fixes
2889       // parameter values and these can be different for distinct part of the
2890       // SCoP. If this happens we cannot consolidate the loads but need to
2891       // create a new invariant load equivalence class.
2892       auto &MAs = IAClass.InvariantAccesses;
2893       if (!MAs.empty()) {
2894         auto *LastMA = MAs.front();
2895 
2896         isl::set AR = MA->getAccessRelation().range();
2897         isl::set LastAR = LastMA->getAccessRelation().range();
2898         bool SameAR = AR.is_equal(LastAR);
2899 
2900         if (!SameAR)
2901           continue;
2902       }
2903 
2904       // Add MA to the list of accesses that are in this class.
2905       MAs.push_front(MA);
2906 
2907       Consolidated = true;
2908 
2909       // Unify the execution context of the class and this statement.
2910       isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2911       if (!IAClassDomainCtx.is_null())
2912         IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
2913       else
2914         IAClassDomainCtx = MACtx;
2915       IAClass.ExecutionContext = IAClassDomainCtx;
2916       break;
2917     }
2918 
2919     if (Consolidated)
2920       continue;
2921 
2922     MACtx = MACtx.coalesce();
2923 
2924     // If we did not consolidate MA, thus did not find an equivalence class
2925     // for it, we create a new one.
2926     scop->addInvariantEquivClass(
2927         InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
2928   }
2929 }
2930 
2931 void ScopBuilder::collectCandidateReductionLoads(
2932     MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
2933   ScopStmt *Stmt = StoreMA->getStatement();
2934 
2935   auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
2936   if (!Store)
2937     return;
2938 
2939   // Skip if there is not one binary operator between the load and the store
2940   auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
2941   if (!BinOp)
2942     return;
2943 
2944   // Skip if the binary operators has multiple uses
2945   if (BinOp->getNumUses() != 1)
2946     return;
2947 
2948   // Skip if the opcode of the binary operator is not commutative/associative
2949   if (!BinOp->isCommutative() || !BinOp->isAssociative())
2950     return;
2951 
2952   // Skip if the binary operator is outside the current SCoP
2953   if (BinOp->getParent() != Store->getParent())
2954     return;
2955 
2956   // Skip if it is a multiplicative reduction and we disabled them
2957   if (DisableMultiplicativeReductions &&
2958       (BinOp->getOpcode() == Instruction::Mul ||
2959        BinOp->getOpcode() == Instruction::FMul))
2960     return;
2961 
2962   // Check the binary operator operands for a candidate load
2963   auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
2964   auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
2965   if (!PossibleLoad0 && !PossibleLoad1)
2966     return;
2967 
2968   // A load is only a candidate if it cannot escape (thus has only this use)
2969   if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
2970     if (PossibleLoad0->getParent() == Store->getParent())
2971       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
2972   if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
2973     if (PossibleLoad1->getParent() == Store->getParent())
2974       Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
2975 }
2976 
2977 /// Find the canonical scop array info object for a set of invariant load
2978 /// hoisted loads. The canonical array is the one that corresponds to the
2979 /// first load in the list of accesses which is used as base pointer of a
2980 /// scop array.
2981 static const ScopArrayInfo *findCanonicalArray(Scop &S,
2982                                                MemoryAccessList &Accesses) {
2983   for (MemoryAccess *Access : Accesses) {
2984     const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
2985         Access->getAccessInstruction(), MemoryKind::Array);
2986     if (CanonicalArray)
2987       return CanonicalArray;
2988   }
2989   return nullptr;
2990 }
2991 
2992 /// Check if @p Array severs as base array in an invariant load.
2993 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
2994   for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
2995     for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
2996       if (Access2->getScopArrayInfo() == Array)
2997         return true;
2998   return false;
2999 }
3000 
3001 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
3002 /// with a reference to @p New.
3003 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3004                                  const ScopArrayInfo *New) {
3005   for (ScopStmt &Stmt : S)
3006     for (MemoryAccess *Access : Stmt) {
3007       if (Access->getLatestScopArrayInfo() != Old)
3008         continue;
3009 
3010       isl::id Id = New->getBasePtrId();
3011       isl::map Map = Access->getAccessRelation();
3012       Map = Map.set_tuple_id(isl::dim::out, Id);
3013       Access->setAccessRelation(Map);
3014     }
3015 }
3016 
3017 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3018   for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3019     MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3020 
3021     const ScopArrayInfo *CanonicalBasePtrSAI =
3022         findCanonicalArray(*scop, BasePtrAccesses);
3023 
3024     if (!CanonicalBasePtrSAI)
3025       continue;
3026 
3027     for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3028       const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3029           BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3030       if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3031           !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3032         continue;
3033 
3034       // we currently do not canonicalize arrays where some accesses are
3035       // hoisted as invariant loads. If we would, we need to update the access
3036       // function of the invariant loads as well. However, as this is not a
3037       // very common situation, we leave this for now to avoid further
3038       // complexity increases.
3039       if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3040         continue;
3041 
3042       replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3043     }
3044   }
3045 }
3046 
3047 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3048   for (MemoryAccess *Access : Stmt.MemAccs) {
3049     Type *ElementType = Access->getElementType();
3050 
3051     MemoryKind Ty;
3052     if (Access->isPHIKind())
3053       Ty = MemoryKind::PHI;
3054     else if (Access->isExitPHIKind())
3055       Ty = MemoryKind::ExitPHI;
3056     else if (Access->isValueKind())
3057       Ty = MemoryKind::Value;
3058     else
3059       Ty = MemoryKind::Array;
3060 
3061     // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3062     // assumptions which are taken. isl::pw_aff objects are cached internally
3063     // and they are used later by scop.
3064     for (const SCEV *Size : Access->Sizes) {
3065       if (!Size)
3066         continue;
3067       scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3068     }
3069     auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3070                                                ElementType, Access->Sizes, Ty);
3071 
3072     // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3073     // assumptions which are taken. isl::pw_aff objects are cached internally
3074     // and they are used later by scop.
3075     for (const SCEV *Subscript : Access->subscripts()) {
3076       if (!Access->isAffine() || !Subscript)
3077         continue;
3078       scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3079                      &RecordedAssumptions);
3080     }
3081     Access->buildAccessRelation(SAI);
3082     scop->addAccessData(Access);
3083   }
3084 }
3085 
3086 /// Add the minimal/maximal access in @p Set to @p User.
3087 ///
3088 /// @return True if more accesses should be added, false if we reached the
3089 ///         maximal number of run-time checks to be generated.
3090 static bool buildMinMaxAccess(isl::set Set,
3091                               Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3092   isl::pw_multi_aff MinPMA, MaxPMA;
3093   isl::pw_aff LastDimAff;
3094   isl::aff OneAff;
3095   unsigned Pos;
3096 
3097   Set = Set.remove_divs();
3098   polly::simplify(Set);
3099 
3100   if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3101     Set = Set.simple_hull();
3102 
3103   // Restrict the number of parameters involved in the access as the lexmin/
3104   // lexmax computation will take too long if this number is high.
3105   //
3106   // Experiments with a simple test case using an i7 4800MQ:
3107   //
3108   //  #Parameters involved | Time (in sec)
3109   //            6          |     0.01
3110   //            7          |     0.04
3111   //            8          |     0.12
3112   //            9          |     0.40
3113   //           10          |     1.54
3114   //           11          |     6.78
3115   //           12          |    30.38
3116   //
3117   if (isl_set_n_param(Set.get()) >
3118       static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3119     unsigned InvolvedParams = 0;
3120     for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3121       if (Set.involves_dims(isl::dim::param, u, 1))
3122         InvolvedParams++;
3123 
3124     if (InvolvedParams > RunTimeChecksMaxParameters)
3125       return false;
3126   }
3127 
3128   MinPMA = Set.lexmin_pw_multi_aff();
3129   MaxPMA = Set.lexmax_pw_multi_aff();
3130 
3131   MinPMA = MinPMA.coalesce();
3132   MaxPMA = MaxPMA.coalesce();
3133 
3134   if (MaxPMA.is_null())
3135     return false;
3136 
3137   unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
3138 
3139   // Adjust the last dimension of the maximal access by one as we want to
3140   // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3141   // we test during code generation might now point after the end of the
3142   // allocated array but we will never dereference it anyway.
3143   assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3144 
3145   Pos = MaxOutputSize - 1;
3146   LastDimAff = MaxPMA.at(Pos);
3147   OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3148   OneAff = OneAff.add_constant_si(1);
3149   LastDimAff = LastDimAff.add(OneAff);
3150   MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3151 
3152   if (MinPMA.is_null() || MaxPMA.is_null())
3153     return false;
3154 
3155   MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3156 
3157   return true;
3158 }
3159 
3160 /// Wrapper function to calculate minimal/maximal accesses to each array.
3161 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3162                                         Scop::MinMaxVectorTy &MinMaxAccesses) {
3163   MinMaxAccesses.reserve(AliasGroup.size());
3164 
3165   isl::union_set Domains = scop->getDomains();
3166   isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
3167 
3168   for (MemoryAccess *MA : AliasGroup)
3169     Accesses = Accesses.unite(MA->getAccessRelation());
3170 
3171   Accesses = Accesses.intersect_domain(Domains);
3172   isl::union_set Locations = Accesses.range();
3173 
3174   bool LimitReached = false;
3175   for (isl::set Set : Locations.get_set_list()) {
3176     LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3177     if (LimitReached)
3178       break;
3179   }
3180 
3181   return !LimitReached;
3182 }
3183 
3184 static isl::set getAccessDomain(MemoryAccess *MA) {
3185   isl::set Domain = MA->getStatement()->getDomain();
3186   Domain = Domain.project_out(isl::dim::set, 0,
3187                               unsignedFromIslSize(Domain.tuple_dim()));
3188   return Domain.reset_tuple_id();
3189 }
3190 
3191 bool ScopBuilder::buildAliasChecks() {
3192   if (!PollyUseRuntimeAliasChecks)
3193     return true;
3194 
3195   if (buildAliasGroups()) {
3196     // Aliasing assumptions do not go through addAssumption but we still want to
3197     // collect statistics so we do it here explicitly.
3198     if (scop->getAliasGroups().size())
3199       Scop::incrementNumberOfAliasingAssumptions(1);
3200     return true;
3201   }
3202 
3203   // If a problem occurs while building the alias groups we need to delete
3204   // this SCoP and pretend it wasn't valid in the first place. To this end
3205   // we make the assumed context infeasible.
3206   scop->invalidate(ALIASING, DebugLoc());
3207 
3208   LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3209                     << " could not be created. This SCoP has been dismissed.");
3210   return false;
3211 }
3212 
3213 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3214 ScopBuilder::buildAliasGroupsForAccesses() {
3215   AliasSetTracker AST(AA);
3216 
3217   DenseMap<Value *, MemoryAccess *> PtrToAcc;
3218   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3219   for (ScopStmt &Stmt : *scop) {
3220 
3221     isl::set StmtDomain = Stmt.getDomain();
3222     bool StmtDomainEmpty = StmtDomain.is_empty();
3223 
3224     // Statements with an empty domain will never be executed.
3225     if (StmtDomainEmpty)
3226       continue;
3227 
3228     for (MemoryAccess *MA : Stmt) {
3229       if (MA->isScalarKind())
3230         continue;
3231       if (!MA->isRead())
3232         HasWriteAccess.insert(MA->getScopArrayInfo());
3233       MemAccInst Acc(MA->getAccessInstruction());
3234       if (MA->isRead() && isa<MemTransferInst>(Acc))
3235         PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3236       else
3237         PtrToAcc[Acc.getPointerOperand()] = MA;
3238       AST.add(Acc);
3239     }
3240   }
3241 
3242   AliasGroupVectorTy AliasGroups;
3243   for (AliasSet &AS : AST) {
3244     if (AS.isMustAlias() || AS.isForwardingAliasSet())
3245       continue;
3246     AliasGroupTy AG;
3247     for (auto &PR : AS)
3248       AG.push_back(PtrToAcc[PR.getValue()]);
3249     if (AG.size() < 2)
3250       continue;
3251     AliasGroups.push_back(std::move(AG));
3252   }
3253 
3254   return std::make_tuple(AliasGroups, HasWriteAccess);
3255 }
3256 
3257 bool ScopBuilder::buildAliasGroups() {
3258   // To create sound alias checks we perform the following steps:
3259   //   o) We partition each group into read only and non read only accesses.
3260   //   o) For each group with more than one base pointer we then compute minimal
3261   //      and maximal accesses to each array of a group in read only and non
3262   //      read only partitions separately.
3263   AliasGroupVectorTy AliasGroups;
3264   DenseSet<const ScopArrayInfo *> HasWriteAccess;
3265 
3266   std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3267 
3268   splitAliasGroupsByDomain(AliasGroups);
3269 
3270   for (AliasGroupTy &AG : AliasGroups) {
3271     if (!scop->hasFeasibleRuntimeContext())
3272       return false;
3273 
3274     {
3275       IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3276       bool Valid = buildAliasGroup(AG, HasWriteAccess);
3277       if (!Valid)
3278         return false;
3279     }
3280     if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3281       scop->invalidate(COMPLEXITY, DebugLoc());
3282       return false;
3283     }
3284   }
3285 
3286   return true;
3287 }
3288 
3289 bool ScopBuilder::buildAliasGroup(
3290     AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3291   AliasGroupTy ReadOnlyAccesses;
3292   AliasGroupTy ReadWriteAccesses;
3293   SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3294   SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3295 
3296   if (AliasGroup.size() < 2)
3297     return true;
3298 
3299   for (MemoryAccess *Access : AliasGroup) {
3300     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3301                                         Access->getAccessInstruction())
3302              << "Possibly aliasing pointer, use restrict keyword.");
3303     const ScopArrayInfo *Array = Access->getScopArrayInfo();
3304     if (HasWriteAccess.count(Array)) {
3305       ReadWriteArrays.insert(Array);
3306       ReadWriteAccesses.push_back(Access);
3307     } else {
3308       ReadOnlyArrays.insert(Array);
3309       ReadOnlyAccesses.push_back(Access);
3310     }
3311   }
3312 
3313   // If there are no read-only pointers, and less than two read-write pointers,
3314   // no alias check is needed.
3315   if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3316     return true;
3317 
3318   // If there is no read-write pointer, no alias check is needed.
3319   if (ReadWriteArrays.empty())
3320     return true;
3321 
3322   // For non-affine accesses, no alias check can be generated as we cannot
3323   // compute a sufficiently tight lower and upper bound: bail out.
3324   for (MemoryAccess *MA : AliasGroup) {
3325     if (!MA->isAffine()) {
3326       scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3327                        MA->getAccessInstruction()->getParent());
3328       return false;
3329     }
3330   }
3331 
3332   // Ensure that for all memory accesses for which we generate alias checks,
3333   // their base pointers are available.
3334   for (MemoryAccess *MA : AliasGroup) {
3335     if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3336       scop->addRequiredInvariantLoad(
3337           cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3338   }
3339 
3340   //  scop->getAliasGroups().emplace_back();
3341   //  Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3342   Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3343   Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3344 
3345   bool Valid;
3346 
3347   Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3348 
3349   if (!Valid)
3350     return false;
3351 
3352   // Bail out if the number of values we need to compare is too large.
3353   // This is important as the number of comparisons grows quadratically with
3354   // the number of values we need to compare.
3355   if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3356       RunTimeChecksMaxArraysPerGroup)
3357     return false;
3358 
3359   Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3360 
3361   scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3362   if (!Valid)
3363     return false;
3364 
3365   return true;
3366 }
3367 
3368 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3369   for (unsigned u = 0; u < AliasGroups.size(); u++) {
3370     AliasGroupTy NewAG;
3371     AliasGroupTy &AG = AliasGroups[u];
3372     AliasGroupTy::iterator AGI = AG.begin();
3373     isl::set AGDomain = getAccessDomain(*AGI);
3374     while (AGI != AG.end()) {
3375       MemoryAccess *MA = *AGI;
3376       isl::set MADomain = getAccessDomain(MA);
3377       if (AGDomain.is_disjoint(MADomain)) {
3378         NewAG.push_back(MA);
3379         AGI = AG.erase(AGI);
3380       } else {
3381         AGDomain = AGDomain.unite(MADomain);
3382         AGI++;
3383       }
3384     }
3385     if (NewAG.size() > 1)
3386       AliasGroups.push_back(std::move(NewAG));
3387   }
3388 }
3389 
3390 #ifndef NDEBUG
3391 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3392   auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3393   auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3394   assert(PhysUse.getKind() == VirtUse.getKind());
3395 }
3396 
3397 /// Check the consistency of every statement's MemoryAccesses.
3398 ///
3399 /// The check is carried out by expecting the "physical" kind of use (derived
3400 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3401 /// kind of use (derived from a statement's MemoryAccess).
3402 ///
3403 /// The "physical" uses are taken by ensureValueRead to determine whether to
3404 /// create MemoryAccesses. When done, the kind of scalar access should be the
3405 /// same no matter which way it was derived.
3406 ///
3407 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3408 /// can intentionally influence on the kind of uses (not corresponding to the
3409 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3410 /// to pick up the virtual uses. But here in the code generator, this has not
3411 /// happened yet, such that virtual and physical uses are equivalent.
3412 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3413   for (auto *BB : S->getRegion().blocks()) {
3414     for (auto &Inst : *BB) {
3415       auto *Stmt = S->getStmtFor(&Inst);
3416       if (!Stmt)
3417         continue;
3418 
3419       if (isIgnoredIntrinsic(&Inst))
3420         continue;
3421 
3422       // Branch conditions are encoded in the statement domains.
3423       if (Inst.isTerminator() && Stmt->isBlockStmt())
3424         continue;
3425 
3426       // Verify all uses.
3427       for (auto &Op : Inst.operands())
3428         verifyUse(S, Op, LI);
3429 
3430       // Stores do not produce values used by other statements.
3431       if (isa<StoreInst>(Inst))
3432         continue;
3433 
3434       // For every value defined in the block, also check that a use of that
3435       // value in the same statement would not be an inter-statement use. It can
3436       // still be synthesizable or load-hoisted, but these kind of instructions
3437       // are not directly copied in code-generation.
3438       auto VirtDef =
3439           VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3440       assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3441              VirtDef.getKind() == VirtualUse::Intra ||
3442              VirtDef.getKind() == VirtualUse::Hoisted);
3443     }
3444   }
3445 
3446   if (S->hasSingleExitEdge())
3447     return;
3448 
3449   // PHINodes in the SCoP region's exit block are also uses to be checked.
3450   if (!S->getRegion().isTopLevelRegion()) {
3451     for (auto &Inst : *S->getRegion().getExit()) {
3452       if (!isa<PHINode>(Inst))
3453         break;
3454 
3455       for (auto &Op : Inst.operands())
3456         verifyUse(S, Op, LI);
3457     }
3458   }
3459 }
3460 #endif
3461 
3462 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3463   scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3464                       SD.getNextID()));
3465 
3466   buildStmts(R);
3467 
3468   // Create all invariant load instructions first. These are categorized as
3469   // 'synthesizable', therefore are not part of any ScopStmt but need to be
3470   // created somewhere.
3471   const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3472   for (BasicBlock *BB : scop->getRegion().blocks()) {
3473     if (SD.isErrorBlock(*BB, scop->getRegion()))
3474       continue;
3475 
3476     for (Instruction &Inst : *BB) {
3477       LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3478       if (!Load)
3479         continue;
3480 
3481       if (!RIL.count(Load))
3482         continue;
3483 
3484       // Invariant loads require a MemoryAccess to be created in some statement.
3485       // It is not important to which statement the MemoryAccess is added
3486       // because it will later be removed from the ScopStmt again. We chose the
3487       // first statement of the basic block the LoadInst is in.
3488       ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3489       assert(!List.empty());
3490       ScopStmt *RILStmt = List.front();
3491       buildMemoryAccess(Load, RILStmt);
3492     }
3493   }
3494   buildAccessFunctions();
3495 
3496   // In case the region does not have an exiting block we will later (during
3497   // code generation) split the exit block. This will move potential PHI nodes
3498   // from the current exit block into the new region exiting block. Hence, PHI
3499   // nodes that are at this point not part of the region will be.
3500   // To handle these PHI nodes later we will now model their operands as scalar
3501   // accesses. Note that we do not model anything in the exit block if we have
3502   // an exiting block in the region, as there will not be any splitting later.
3503   if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3504     for (Instruction &Inst : *R.getExit()) {
3505       PHINode *PHI = dyn_cast<PHINode>(&Inst);
3506       if (!PHI)
3507         break;
3508 
3509       buildPHIAccesses(nullptr, PHI, nullptr, true);
3510     }
3511   }
3512 
3513   // Create memory accesses for global reads since all arrays are now known.
3514   auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3515   for (auto GlobalReadPair : GlobalReads) {
3516     ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3517     Instruction *GlobalRead = GlobalReadPair.second;
3518     for (auto *BP : ArrayBasePointers)
3519       addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3520                      BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3521   }
3522 
3523   buildInvariantEquivalenceClasses();
3524 
3525   /// A map from basic blocks to their invalid domains.
3526   DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3527 
3528   if (!buildDomains(&R, InvalidDomainMap)) {
3529     LLVM_DEBUG(
3530         dbgs() << "Bailing-out because buildDomains encountered problems\n");
3531     return;
3532   }
3533 
3534   addUserAssumptions(AC, InvalidDomainMap);
3535 
3536   // Initialize the invalid domain.
3537   for (ScopStmt &Stmt : scop->Stmts)
3538     if (Stmt.isBlockStmt())
3539       Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3540     else
3541       Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3542           Stmt.getRegion()->getNode())]);
3543 
3544   // Remove empty statements.
3545   // Exit early in case there are no executable statements left in this scop.
3546   scop->removeStmtNotInDomainMap();
3547   scop->simplifySCoP(false);
3548   if (scop->isEmpty()) {
3549     LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3550     return;
3551   }
3552 
3553   // The ScopStmts now have enough information to initialize themselves.
3554   for (ScopStmt &Stmt : *scop) {
3555     collectSurroundingLoops(Stmt);
3556 
3557     buildDomain(Stmt);
3558     buildAccessRelations(Stmt);
3559 
3560     if (DetectReductions)
3561       checkForReductions(Stmt);
3562   }
3563 
3564   // Check early for a feasible runtime context.
3565   if (!scop->hasFeasibleRuntimeContext()) {
3566     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3567     return;
3568   }
3569 
3570   // Check early for profitability. Afterwards it cannot change anymore,
3571   // only the runtime context could become infeasible.
3572   if (!scop->isProfitable(UnprofitableScalarAccs)) {
3573     scop->invalidate(PROFITABLE, DebugLoc());
3574     LLVM_DEBUG(
3575         dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3576     return;
3577   }
3578 
3579   buildSchedule();
3580 
3581   finalizeAccesses();
3582 
3583   scop->realignParams();
3584   addUserContext();
3585 
3586   // After the context was fully constructed, thus all our knowledge about
3587   // the parameters is in there, we add all recorded assumptions to the
3588   // assumed/invalid context.
3589   addRecordedAssumptions();
3590 
3591   scop->simplifyContexts();
3592   if (!buildAliasChecks()) {
3593     LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3594     return;
3595   }
3596 
3597   hoistInvariantLoads();
3598   canonicalizeDynamicBasePtrs();
3599   verifyInvariantLoads();
3600   scop->simplifySCoP(true);
3601 
3602   // Check late for a feasible runtime context because profitability did not
3603   // change.
3604   if (!scop->hasFeasibleRuntimeContext()) {
3605     LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3606     return;
3607   }
3608 
3609 #ifndef NDEBUG
3610   verifyUses(scop.get(), LI, DT);
3611 #endif
3612 }
3613 
3614 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA,
3615                          const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3616                          ScopDetection &SD, ScalarEvolution &SE,
3617                          OptimizationRemarkEmitter &ORE)
3618     : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3619   DebugLoc Beg, End;
3620   auto P = getBBPairForRegion(R);
3621   getDebugLocations(P, Beg, End);
3622 
3623   std::string Msg = "SCoP begins here.";
3624   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3625            << Msg);
3626 
3627   buildScop(*R, AC);
3628 
3629   LLVM_DEBUG(dbgs() << *scop);
3630 
3631   if (!scop->hasFeasibleRuntimeContext()) {
3632     InfeasibleScops++;
3633     Msg = "SCoP ends here but was dismissed.";
3634     LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3635     RecordedAssumptions.clear();
3636     scop.reset();
3637   } else {
3638     Msg = "SCoP ends here.";
3639     ++ScopFound;
3640     if (scop->getMaxLoopDepth() > 0)
3641       ++RichScopFound;
3642   }
3643 
3644   if (R->isTopLevelRegion())
3645     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3646              << Msg);
3647   else
3648     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3649              << Msg);
3650 }
3651