1 //===- ScopBuilder.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Create a polyhedral description for a static control flow region.
10 //
11 // The pass creates a polyhedral description of the Scops detected by the SCoP
12 // detection derived from their LLVM-IR code.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "polly/ScopBuilder.h"
17 #include "polly/Options.h"
18 #include "polly/ScopDetection.h"
19 #include "polly/ScopInfo.h"
20 #include "polly/Support/GICHelper.h"
21 #include "polly/Support/ISLTools.h"
22 #include "polly/Support/SCEVValidator.h"
23 #include "polly/Support/ScopHelper.h"
24 #include "polly/Support/VirtualInstruction.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/EquivalenceClasses.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/Sequence.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/AssumptionCache.h"
33 #include "llvm/Analysis/Delinearization.h"
34 #include "llvm/Analysis/Loads.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
37 #include "llvm/Analysis/RegionInfo.h"
38 #include "llvm/Analysis/RegionIterator.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DataLayout.h"
43 #include "llvm/IR/DebugLoc.h"
44 #include "llvm/IR/DerivedTypes.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include <cassert>
59
60 using namespace llvm;
61 using namespace polly;
62
63 #define DEBUG_TYPE "polly-scops"
64
65 STATISTIC(ScopFound, "Number of valid Scops");
66 STATISTIC(RichScopFound, "Number of Scops containing a loop");
67 STATISTIC(InfeasibleScops,
68 "Number of SCoPs with statically infeasible context.");
69
70 bool polly::ModelReadOnlyScalars;
71
72 // The maximal number of dimensions we allow during invariant load construction.
73 // More complex access ranges will result in very high compile time and are also
74 // unlikely to result in good code. This value is very high and should only
75 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
76 static unsigned const MaxDimensionsInAccessRange = 9;
77
78 static cl::opt<bool, true> XModelReadOnlyScalars(
79 "polly-analyze-read-only-scalars",
80 cl::desc("Model read-only scalar values in the scop description"),
81 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true),
82 cl::cat(PollyCategory));
83
84 static cl::opt<int>
85 OptComputeOut("polly-analysis-computeout",
86 cl::desc("Bound the scop analysis by a maximal amount of "
87 "computational steps (0 means no bound)"),
88 cl::Hidden, cl::init(800000), cl::cat(PollyCategory));
89
90 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
91 "polly-allow-dereference-of-all-function-parameters",
92 cl::desc(
93 "Treat all parameters to functions that are pointers as dereferencible."
94 " This is useful for invariant load hoisting, since we can generate"
95 " less runtime checks. This is only valid if all pointers to functions"
96 " are always initialized, so that Polly can choose to hoist"
97 " their loads. "),
98 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
99
100 static cl::opt<bool>
101 PollyIgnoreInbounds("polly-ignore-inbounds",
102 cl::desc("Do not take inbounds assumptions at all"),
103 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
104
105 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
106 "polly-rtc-max-arrays-per-group",
107 cl::desc("The maximal number of arrays to compare in each alias group."),
108 cl::Hidden, cl::init(20), cl::cat(PollyCategory));
109
110 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
111 "polly-rtc-max-array-disjuncts",
112 cl::desc("The maximal number of disjunts allowed in memory accesses to "
113 "to build RTCs."),
114 cl::Hidden, cl::init(8), cl::cat(PollyCategory));
115
116 static cl::opt<unsigned> RunTimeChecksMaxParameters(
117 "polly-rtc-max-parameters",
118 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
119 cl::init(8), cl::cat(PollyCategory));
120
121 static cl::opt<bool> UnprofitableScalarAccs(
122 "polly-unprofitable-scalar-accs",
123 cl::desc("Count statements with scalar accesses as not optimizable"),
124 cl::Hidden, cl::init(false), cl::cat(PollyCategory));
125
126 static cl::opt<std::string> UserContextStr(
127 "polly-context", cl::value_desc("isl parameter set"),
128 cl::desc("Provide additional constraints on the context parameters"),
129 cl::init(""), cl::cat(PollyCategory));
130
131 static cl::opt<bool> DetectReductions("polly-detect-reductions",
132 cl::desc("Detect and exploit reductions"),
133 cl::Hidden, cl::init(true),
134 cl::cat(PollyCategory));
135
136 // Multiplicative reductions can be disabled separately as these kind of
137 // operations can overflow easily. Additive reductions and bit operations
138 // are in contrast pretty stable.
139 static cl::opt<bool> DisableMultiplicativeReductions(
140 "polly-disable-multiplicative-reductions",
141 cl::desc("Disable multiplicative reductions"), cl::Hidden,
142 cl::cat(PollyCategory));
143
144 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
145
146 static cl::opt<GranularityChoice> StmtGranularity(
147 "polly-stmt-granularity",
148 cl::desc(
149 "Algorithm to use for splitting basic blocks into multiple statements"),
150 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
151 "One statement per basic block"),
152 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
153 "Scalar independence heuristic"),
154 clEnumValN(GranularityChoice::Stores, "store",
155 "Store-level granularity")),
156 cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
157
158 /// Helper to treat non-affine regions and basic blocks the same.
159 ///
160 ///{
161
162 /// Return the block that is the representing block for @p RN.
getRegionNodeBasicBlock(RegionNode * RN)163 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
164 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
165 : RN->getNodeAs<BasicBlock>();
166 }
167
168 /// Return the @p idx'th block that is executed after @p RN.
169 static inline BasicBlock *
getRegionNodeSuccessor(RegionNode * RN,Instruction * TI,unsigned idx)170 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
171 if (RN->isSubRegion()) {
172 assert(idx == 0);
173 return RN->getNodeAs<Region>()->getExit();
174 }
175 return TI->getSuccessor(idx);
176 }
177
containsErrorBlock(RegionNode * RN,const Region & R,ScopDetection * SD)178 static bool containsErrorBlock(RegionNode *RN, const Region &R,
179 ScopDetection *SD) {
180 if (!RN->isSubRegion())
181 return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R);
182 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
183 if (SD->isErrorBlock(*BB, R))
184 return true;
185 return false;
186 }
187
188 ///}
189
190 /// Create a map to map from a given iteration to a subsequent iteration.
191 ///
192 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
193 /// is incremented by one and all other dimensions are equal, e.g.,
194 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
195 ///
196 /// if @p Dim is 2 and @p SetSpace has 4 dimensions.
createNextIterationMap(isl::space SetSpace,unsigned Dim)197 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
198 isl::space MapSpace = SetSpace.map_from_set();
199 isl::map NextIterationMap = isl::map::universe(MapSpace);
200 for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim()))
201 if (u != Dim)
202 NextIterationMap =
203 NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u);
204 isl::constraint C =
205 isl::constraint::alloc_equality(isl::local_space(MapSpace));
206 C = C.set_constant_si(1);
207 C = C.set_coefficient_si(isl::dim::in, Dim, 1);
208 C = C.set_coefficient_si(isl::dim::out, Dim, -1);
209 NextIterationMap = NextIterationMap.add_constraint(C);
210 return NextIterationMap;
211 }
212
213 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
collectBoundedParts(isl::set S)214 static isl::set collectBoundedParts(isl::set S) {
215 isl::set BoundedParts = isl::set::empty(S.get_space());
216 for (isl::basic_set BSet : S.get_basic_set_list())
217 if (BSet.is_bounded())
218 BoundedParts = BoundedParts.unite(isl::set(BSet));
219 return BoundedParts;
220 }
221
222 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
223 ///
224 /// @returns A separation of @p S into first an unbounded then a bounded subset,
225 /// both with regards to the dimension @p Dim.
partitionSetParts(isl::set S,unsigned Dim)226 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
227 unsigned Dim) {
228 for (unsigned u : rangeIslSize(0, S.tuple_dim()))
229 S = S.lower_bound_si(isl::dim::set, u, 0);
230
231 unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim());
232 isl::set OnlyDimS = S;
233
234 // Remove dimensions that are greater than Dim as they are not interesting.
235 assert(NumDimsS >= Dim + 1);
236 OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
237
238 // Create artificial parametric upper bounds for dimensions smaller than Dim
239 // as we are not interested in them.
240 OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim);
241
242 for (unsigned u = 0; u < Dim; u++) {
243 isl::constraint C = isl::constraint::alloc_inequality(
244 isl::local_space(OnlyDimS.get_space()));
245 C = C.set_coefficient_si(isl::dim::param, u, 1);
246 C = C.set_coefficient_si(isl::dim::set, u, -1);
247 OnlyDimS = OnlyDimS.add_constraint(C);
248 }
249
250 // Collect all bounded parts of OnlyDimS.
251 isl::set BoundedParts = collectBoundedParts(OnlyDimS);
252
253 // Create the dimensions greater than Dim again.
254 BoundedParts =
255 BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1);
256
257 // Remove the artificial upper bound parameters again.
258 BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim);
259
260 isl::set UnboundedParts = S.subtract(BoundedParts);
261 return std::make_pair(UnboundedParts, BoundedParts);
262 }
263
264 /// Create the conditions under which @p L @p Pred @p R is true.
buildConditionSet(ICmpInst::Predicate Pred,isl::pw_aff L,isl::pw_aff R)265 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
266 isl::pw_aff R) {
267 switch (Pred) {
268 case ICmpInst::ICMP_EQ:
269 return L.eq_set(R);
270 case ICmpInst::ICMP_NE:
271 return L.ne_set(R);
272 case ICmpInst::ICMP_SLT:
273 return L.lt_set(R);
274 case ICmpInst::ICMP_SLE:
275 return L.le_set(R);
276 case ICmpInst::ICMP_SGT:
277 return L.gt_set(R);
278 case ICmpInst::ICMP_SGE:
279 return L.ge_set(R);
280 case ICmpInst::ICMP_ULT:
281 return L.lt_set(R);
282 case ICmpInst::ICMP_UGT:
283 return L.gt_set(R);
284 case ICmpInst::ICMP_ULE:
285 return L.le_set(R);
286 case ICmpInst::ICMP_UGE:
287 return L.ge_set(R);
288 default:
289 llvm_unreachable("Non integer predicate not supported");
290 }
291 }
292
adjustDomainDimensions(isl::set Dom,Loop * OldL,Loop * NewL)293 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
294 Loop *NewL) {
295 // If the loops are the same there is nothing to do.
296 if (NewL == OldL)
297 return Dom;
298
299 int OldDepth = scop->getRelativeLoopDepth(OldL);
300 int NewDepth = scop->getRelativeLoopDepth(NewL);
301 // If both loops are non-affine loops there is nothing to do.
302 if (OldDepth == -1 && NewDepth == -1)
303 return Dom;
304
305 // Distinguish three cases:
306 // 1) The depth is the same but the loops are not.
307 // => One loop was left one was entered.
308 // 2) The depth increased from OldL to NewL.
309 // => One loop was entered, none was left.
310 // 3) The depth decreased from OldL to NewL.
311 // => Loops were left were difference of the depths defines how many.
312 if (OldDepth == NewDepth) {
313 assert(OldL->getParentLoop() == NewL->getParentLoop());
314 Dom = Dom.project_out(isl::dim::set, NewDepth, 1);
315 Dom = Dom.add_dims(isl::dim::set, 1);
316 } else if (OldDepth < NewDepth) {
317 assert(OldDepth + 1 == NewDepth);
318 auto &R = scop->getRegion();
319 (void)R;
320 assert(NewL->getParentLoop() == OldL ||
321 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
322 Dom = Dom.add_dims(isl::dim::set, 1);
323 } else {
324 assert(OldDepth > NewDepth);
325 unsigned Diff = OldDepth - NewDepth;
326 unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim());
327 assert(NumDim >= Diff);
328 Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff);
329 }
330
331 return Dom;
332 }
333
334 /// Compute the isl representation for the SCEV @p E in this BB.
335 ///
336 /// @param BB The BB for which isl representation is to be
337 /// computed.
338 /// @param InvalidDomainMap A map of BB to their invalid domains.
339 /// @param E The SCEV that should be translated.
340 /// @param NonNegative Flag to indicate the @p E has to be non-negative.
341 ///
342 /// Note that this function will also adjust the invalid context accordingly.
343
344 __isl_give isl_pw_aff *
getPwAff(BasicBlock * BB,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,const SCEV * E,bool NonNegative)345 ScopBuilder::getPwAff(BasicBlock *BB,
346 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
347 const SCEV *E, bool NonNegative) {
348 PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions);
349 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second);
350 return PWAC.first.release();
351 }
352
353 /// Build condition sets for unsigned ICmpInst(s).
354 /// Special handling is required for unsigned operands to ensure that if
355 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
356 /// it should wrap around.
357 ///
358 /// @param IsStrictUpperBound holds information on the predicate relation
359 /// between TestVal and UpperBound, i.e,
360 /// TestVal < UpperBound OR TestVal <= UpperBound
buildUnsignedConditionSets(BasicBlock * BB,Value * Condition,__isl_keep isl_set * Domain,const SCEV * SCEV_TestVal,const SCEV * SCEV_UpperBound,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,bool IsStrictUpperBound)361 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
362 BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
363 const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
364 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
365 bool IsStrictUpperBound) {
366 // Do not take NonNeg assumption on TestVal
367 // as it might have MSB (Sign bit) set.
368 isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false);
369 // Take NonNeg assumption on UpperBound.
370 isl_pw_aff *UpperBound =
371 getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true);
372
373 // 0 <= TestVal
374 isl_set *First =
375 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space(
376 isl_pw_aff_get_domain_space(TestVal))),
377 isl_pw_aff_copy(TestVal));
378
379 isl_set *Second;
380 if (IsStrictUpperBound)
381 // TestVal < UpperBound
382 Second = isl_pw_aff_lt_set(TestVal, UpperBound);
383 else
384 // TestVal <= UpperBound
385 Second = isl_pw_aff_le_set(TestVal, UpperBound);
386
387 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second);
388 return ConsequenceCondSet;
389 }
390
buildConditionSets(BasicBlock * BB,SwitchInst * SI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)391 bool ScopBuilder::buildConditionSets(
392 BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
393 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
394 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
395 Value *Condition = getConditionFromTerminator(SI);
396 assert(Condition && "No condition for switch");
397
398 isl_pw_aff *LHS, *RHS;
399 LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L));
400
401 unsigned NumSuccessors = SI->getNumSuccessors();
402 ConditionSets.resize(NumSuccessors);
403 for (auto &Case : SI->cases()) {
404 unsigned Idx = Case.getSuccessorIndex();
405 ConstantInt *CaseValue = Case.getCaseValue();
406
407 RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue));
408 isl_set *CaseConditionSet =
409 buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS),
410 isl::manage(RHS))
411 .release();
412 ConditionSets[Idx] = isl_set_coalesce(
413 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain)));
414 }
415
416 assert(ConditionSets[0] == nullptr && "Default condition set was set");
417 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]);
418 for (unsigned u = 2; u < NumSuccessors; u++)
419 ConditionSetUnion =
420 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u]));
421 ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion);
422
423 isl_pw_aff_free(LHS);
424
425 return true;
426 }
427
buildConditionSets(BasicBlock * BB,Value * Condition,Instruction * TI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)428 bool ScopBuilder::buildConditionSets(
429 BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
430 __isl_keep isl_set *Domain,
431 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
432 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
433 isl_set *ConsequenceCondSet = nullptr;
434
435 if (auto Load = dyn_cast<LoadInst>(Condition)) {
436 const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L);
437 const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType());
438 bool NonNeg = false;
439 isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg);
440 isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg);
441 ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS),
442 isl::manage(RHS))
443 .release();
444 } else if (auto *PHI = dyn_cast<PHINode>(Condition)) {
445 auto *Unique = dyn_cast<ConstantInt>(
446 getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD));
447 assert(Unique &&
448 "A PHINode condition should only be accepted by ScopDetection if "
449 "getUniqueNonErrorValue returns non-NULL");
450
451 if (Unique->isZero())
452 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
453 else
454 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
455 } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) {
456 if (CCond->isZero())
457 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain));
458 else
459 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain));
460 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) {
461 auto Opcode = BinOp->getOpcode();
462 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
463
464 bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain,
465 InvalidDomainMap, ConditionSets) &&
466 buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain,
467 InvalidDomainMap, ConditionSets);
468 if (!Valid) {
469 while (!ConditionSets.empty())
470 isl_set_free(ConditionSets.pop_back_val());
471 return false;
472 }
473
474 isl_set_free(ConditionSets.pop_back_val());
475 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
476 isl_set_free(ConditionSets.pop_back_val());
477 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
478
479 if (Opcode == Instruction::And)
480 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1);
481 else
482 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1);
483 } else {
484 auto *ICond = dyn_cast<ICmpInst>(Condition);
485 assert(ICond &&
486 "Condition of exiting branch was neither constant nor ICmp!");
487
488 Region &R = scop->getRegion();
489
490 isl_pw_aff *LHS, *RHS;
491 // For unsigned comparisons we assumed the signed bit of neither operand
492 // to be set. The comparison is equal to a signed comparison under this
493 // assumption.
494 bool NonNeg = ICond->isUnsigned();
495 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L),
496 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L);
497
498 LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD);
499 RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD);
500
501 switch (ICond->getPredicate()) {
502 case ICmpInst::ICMP_ULT:
503 ConsequenceCondSet =
504 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
505 RightOperand, InvalidDomainMap, true);
506 break;
507 case ICmpInst::ICMP_ULE:
508 ConsequenceCondSet =
509 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand,
510 RightOperand, InvalidDomainMap, false);
511 break;
512 case ICmpInst::ICMP_UGT:
513 ConsequenceCondSet =
514 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
515 LeftOperand, InvalidDomainMap, true);
516 break;
517 case ICmpInst::ICMP_UGE:
518 ConsequenceCondSet =
519 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand,
520 LeftOperand, InvalidDomainMap, false);
521 break;
522 default:
523 LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg);
524 RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg);
525 ConsequenceCondSet = buildConditionSet(ICond->getPredicate(),
526 isl::manage(LHS), isl::manage(RHS))
527 .release();
528 break;
529 }
530 }
531
532 // If no terminator was given we are only looking for parameter constraints
533 // under which @p Condition is true/false.
534 if (!TI)
535 ConsequenceCondSet = isl_set_params(ConsequenceCondSet);
536 assert(ConsequenceCondSet);
537 ConsequenceCondSet = isl_set_coalesce(
538 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain)));
539
540 isl_set *AlternativeCondSet = nullptr;
541 bool TooComplex =
542 isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
543
544 if (!TooComplex) {
545 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain),
546 isl_set_copy(ConsequenceCondSet));
547 TooComplex =
548 isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
549 }
550
551 if (TooComplex) {
552 scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(),
553 TI ? TI->getParent() : nullptr /* BasicBlock */);
554 isl_set_free(AlternativeCondSet);
555 isl_set_free(ConsequenceCondSet);
556 return false;
557 }
558
559 ConditionSets.push_back(ConsequenceCondSet);
560 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet));
561
562 return true;
563 }
564
buildConditionSets(BasicBlock * BB,Instruction * TI,Loop * L,__isl_keep isl_set * Domain,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap,SmallVectorImpl<__isl_give isl_set * > & ConditionSets)565 bool ScopBuilder::buildConditionSets(
566 BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
567 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
568 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
569 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI))
570 return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
571 ConditionSets);
572
573 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
574
575 if (TI->getNumSuccessors() == 1) {
576 ConditionSets.push_back(isl_set_copy(Domain));
577 return true;
578 }
579
580 Value *Condition = getConditionFromTerminator(TI);
581 assert(Condition && "No condition for Terminator");
582
583 return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
584 ConditionSets);
585 }
586
propagateDomainConstraints(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)587 bool ScopBuilder::propagateDomainConstraints(
588 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
589 // Iterate over the region R and propagate the domain constrains from the
590 // predecessors to the current node. In contrast to the
591 // buildDomainsWithBranchConstraints function, this one will pull the domain
592 // information from the predecessors instead of pushing it to the successors.
593 // Additionally, we assume the domains to be already present in the domain
594 // map here. However, we iterate again in reverse post order so we know all
595 // predecessors have been visited before a block or non-affine subregion is
596 // visited.
597
598 ReversePostOrderTraversal<Region *> RTraversal(R);
599 for (auto *RN : RTraversal) {
600 // Recurse for affine subregions but go on for basic blocks and non-affine
601 // subregions.
602 if (RN->isSubRegion()) {
603 Region *SubRegion = RN->getNodeAs<Region>();
604 if (!scop->isNonAffineSubRegion(SubRegion)) {
605 if (!propagateDomainConstraints(SubRegion, InvalidDomainMap))
606 return false;
607 continue;
608 }
609 }
610
611 BasicBlock *BB = getRegionNodeBasicBlock(RN);
612 isl::set &Domain = scop->getOrInitEmptyDomain(BB);
613 assert(!Domain.is_null());
614
615 // Under the union of all predecessor conditions we can reach this block.
616 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
617 Domain = Domain.intersect(PredDom).coalesce();
618 Domain = Domain.align_params(scop->getParamSpace());
619
620 Loop *BBLoop = getRegionNodeLoop(RN, LI);
621 if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop))
622 if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap))
623 return false;
624 }
625
626 return true;
627 }
628
propagateDomainConstraintsToRegionExit(BasicBlock * BB,Loop * BBLoop,SmallPtrSetImpl<BasicBlock * > & FinishedExitBlocks,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)629 void ScopBuilder::propagateDomainConstraintsToRegionExit(
630 BasicBlock *BB, Loop *BBLoop,
631 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
632 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
633 // Check if the block @p BB is the entry of a region. If so we propagate it's
634 // domain to the exit block of the region. Otherwise we are done.
635 auto *RI = scop->getRegion().getRegionInfo();
636 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
637 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
638 if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB))
639 return;
640
641 // Do not propagate the domain if there is a loop backedge inside the region
642 // that would prevent the exit block from being executed.
643 auto *L = BBLoop;
644 while (L && scop->contains(L)) {
645 SmallVector<BasicBlock *, 4> LatchBBs;
646 BBLoop->getLoopLatches(LatchBBs);
647 for (auto *LatchBB : LatchBBs)
648 if (BB != LatchBB && BBReg->contains(LatchBB))
649 return;
650 L = L->getParentLoop();
651 }
652
653 isl::set Domain = scop->getOrInitEmptyDomain(BB);
654 assert(!Domain.is_null() && "Cannot propagate a nullptr");
655
656 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops());
657
658 // Since the dimensions of @p BB and @p ExitBB might be different we have to
659 // adjust the domain before we can propagate it.
660 isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop);
661 isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB);
662
663 // If the exit domain is not yet created we set it otherwise we "add" the
664 // current domain.
665 ExitDomain =
666 !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain;
667
668 // Initialize the invalid domain.
669 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space());
670
671 FinishedExitBlocks.insert(ExitBB);
672 }
673
getPredecessorDomainConstraints(BasicBlock * BB,isl::set Domain)674 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
675 isl::set Domain) {
676 // If @p BB is the ScopEntry we are done
677 if (scop->getRegion().getEntry() == BB)
678 return isl::set::universe(Domain.get_space());
679
680 // The region info of this function.
681 auto &RI = *scop->getRegion().getRegionInfo();
682
683 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops());
684
685 // A domain to collect all predecessor domains, thus all conditions under
686 // which the block is executed. To this end we start with the empty domain.
687 isl::set PredDom = isl::set::empty(Domain.get_space());
688
689 // Set of regions of which the entry block domain has been propagated to BB.
690 // all predecessors inside any of the regions can be skipped.
691 SmallSet<Region *, 8> PropagatedRegions;
692
693 for (auto *PredBB : predecessors(BB)) {
694 // Skip backedges.
695 if (DT.dominates(BB, PredBB))
696 continue;
697
698 // If the predecessor is in a region we used for propagation we can skip it.
699 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); };
700 if (llvm::any_of(PropagatedRegions, PredBBInRegion)) {
701 continue;
702 }
703
704 // Check if there is a valid region we can use for propagation, thus look
705 // for a region that contains the predecessor and has @p BB as exit block.
706 // FIXME: This was an side-effect-free (and possibly infinite) loop when
707 // committed and seems not to be needed.
708 auto *PredR = RI.getRegionFor(PredBB);
709 while (PredR->getExit() != BB && !PredR->contains(BB))
710 PredR = PredR->getParent();
711
712 // If a valid region for propagation was found use the entry of that region
713 // for propagation, otherwise the PredBB directly.
714 if (PredR->getExit() == BB) {
715 PredBB = PredR->getEntry();
716 PropagatedRegions.insert(PredR);
717 }
718
719 isl::set PredBBDom = scop->getDomainConditions(PredBB);
720 Loop *PredBBLoop =
721 getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops());
722 PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop);
723 PredDom = PredDom.unite(PredBBDom);
724 }
725
726 return PredDom;
727 }
728
addLoopBoundsToHeaderDomain(Loop * L,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)729 bool ScopBuilder::addLoopBoundsToHeaderDomain(
730 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
731 int LoopDepth = scop->getRelativeLoopDepth(L);
732 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
733
734 BasicBlock *HeaderBB = L->getHeader();
735 assert(scop->isDomainDefined(HeaderBB));
736 isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB);
737
738 isl::map NextIterationMap =
739 createNextIterationMap(HeaderBBDom.get_space(), LoopDepth);
740
741 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space());
742
743 SmallVector<BasicBlock *, 4> LatchBlocks;
744 L->getLoopLatches(LatchBlocks);
745
746 for (BasicBlock *LatchBB : LatchBlocks) {
747 // If the latch is only reachable via error statements we skip it.
748 if (!scop->isDomainDefined(LatchBB))
749 continue;
750
751 isl::set LatchBBDom = scop->getDomainConditions(LatchBB);
752
753 isl::set BackedgeCondition;
754
755 Instruction *TI = LatchBB->getTerminator();
756 BranchInst *BI = dyn_cast<BranchInst>(TI);
757 assert(BI && "Only branch instructions allowed in loop latches");
758
759 if (BI->isUnconditional())
760 BackedgeCondition = LatchBBDom;
761 else {
762 SmallVector<isl_set *, 8> ConditionSets;
763 int idx = BI->getSuccessor(0) != HeaderBB;
764 if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(),
765 InvalidDomainMap, ConditionSets))
766 return false;
767
768 // Free the non back edge condition set as we do not need it.
769 isl_set_free(ConditionSets[1 - idx]);
770
771 BackedgeCondition = isl::manage(ConditionSets[idx]);
772 }
773
774 int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB));
775 assert(LatchLoopDepth >= LoopDepth);
776 BackedgeCondition = BackedgeCondition.project_out(
777 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth);
778 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition);
779 }
780
781 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space());
782 for (int i = 0; i < LoopDepth; i++)
783 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i);
784
785 isl::set UnionBackedgeConditionComplement =
786 UnionBackedgeCondition.complement();
787 UnionBackedgeConditionComplement =
788 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth,
789 0);
790 UnionBackedgeConditionComplement =
791 UnionBackedgeConditionComplement.apply(ForwardMap);
792 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement);
793 HeaderBBDom = HeaderBBDom.apply(NextIterationMap);
794
795 auto Parts = partitionSetParts(HeaderBBDom, LoopDepth);
796 HeaderBBDom = Parts.second;
797
798 // Check if there is a <nsw> tagged AddRec for this loop and if so do not
799 // require a runtime check. The assumption is already implied by the <nsw>
800 // tag.
801 bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
802
803 isl::set UnboundedCtx = Parts.first.params();
804 recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx,
805 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION,
806 nullptr, RequiresRTC);
807 return true;
808 }
809
buildInvariantEquivalenceClasses()810 void ScopBuilder::buildInvariantEquivalenceClasses() {
811 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
812
813 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
814 for (LoadInst *LInst : RIL) {
815 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
816
817 Type *Ty = LInst->getType();
818 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)];
819 if (ClassRep) {
820 scop->addInvariantLoadMapping(LInst, ClassRep);
821 continue;
822 }
823
824 ClassRep = LInst;
825 scop->addInvariantEquivClass(
826 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty});
827 }
828 }
829
buildDomains(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)830 bool ScopBuilder::buildDomains(
831 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
832 bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
833 auto *EntryBB = R->getEntry();
834 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB);
835 int LD = scop->getRelativeLoopDepth(L);
836 auto *S =
837 isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1));
838
839 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S)));
840 isl::set Domain = isl::manage(S);
841 scop->setDomain(EntryBB, Domain);
842
843 if (IsOnlyNonAffineRegion)
844 return !containsErrorBlock(R->getNode(), *R, &SD);
845
846 if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
847 return false;
848
849 if (!propagateDomainConstraints(R, InvalidDomainMap))
850 return false;
851
852 // Error blocks and blocks dominated by them have been assumed to never be
853 // executed. Representing them in the Scop does not add any value. In fact,
854 // it is likely to cause issues during construction of the ScopStmts. The
855 // contents of error blocks have not been verified to be expressible and
856 // will cause problems when building up a ScopStmt for them.
857 // Furthermore, basic blocks dominated by error blocks may reference
858 // instructions in the error block which, if the error block is not modeled,
859 // can themselves not be constructed properly. To this end we will replace
860 // the domains of error blocks and those only reachable via error blocks
861 // with an empty set. Additionally, we will record for each block under which
862 // parameter combination it would be reached via an error block in its
863 // InvalidDomain. This information is needed during load hoisting.
864 if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
865 return false;
866
867 return true;
868 }
869
buildDomainsWithBranchConstraints(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)870 bool ScopBuilder::buildDomainsWithBranchConstraints(
871 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
872 // To create the domain for each block in R we iterate over all blocks and
873 // subregions in R and propagate the conditions under which the current region
874 // element is executed. To this end we iterate in reverse post order over R as
875 // it ensures that we first visit all predecessors of a region node (either a
876 // basic block or a subregion) before we visit the region node itself.
877 // Initially, only the domain for the SCoP region entry block is set and from
878 // there we propagate the current domain to all successors, however we add the
879 // condition that the successor is actually executed next.
880 // As we are only interested in non-loop carried constraints here we can
881 // simply skip loop back edges.
882
883 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
884 ReversePostOrderTraversal<Region *> RTraversal(R);
885 for (auto *RN : RTraversal) {
886 // Recurse for affine subregions but go on for basic blocks and non-affine
887 // subregions.
888 if (RN->isSubRegion()) {
889 Region *SubRegion = RN->getNodeAs<Region>();
890 if (!scop->isNonAffineSubRegion(SubRegion)) {
891 if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap))
892 return false;
893 continue;
894 }
895 }
896
897 if (containsErrorBlock(RN, scop->getRegion(), &SD))
898 scop->notifyErrorBlock();
899 ;
900
901 BasicBlock *BB = getRegionNodeBasicBlock(RN);
902 Instruction *TI = BB->getTerminator();
903
904 if (isa<UnreachableInst>(TI))
905 continue;
906
907 if (!scop->isDomainDefined(BB))
908 continue;
909 isl::set Domain = scop->getDomainConditions(BB);
910
911 scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim()));
912
913 auto *BBLoop = getRegionNodeLoop(RN, LI);
914 // Propagate the domain from BB directly to blocks that have a superset
915 // domain, at the moment only region exit nodes of regions that start in BB.
916 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
917 InvalidDomainMap);
918
919 // If all successors of BB have been set a domain through the propagation
920 // above we do not need to build condition sets but can just skip this
921 // block. However, it is important to note that this is a local property
922 // with regards to the region @p R. To this end FinishedExitBlocks is a
923 // local variable.
924 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
925 return FinishedExitBlocks.count(SuccBB);
926 };
927 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit))
928 continue;
929
930 // Build the condition sets for the successor nodes of the current region
931 // node. If it is a non-affine subregion we will always execute the single
932 // exit node, hence the single entry node domain is the condition set. For
933 // basic blocks we use the helper function buildConditionSets.
934 SmallVector<isl_set *, 8> ConditionSets;
935 if (RN->isSubRegion())
936 ConditionSets.push_back(Domain.copy());
937 else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap,
938 ConditionSets))
939 return false;
940
941 // Now iterate over the successors and set their initial domain based on
942 // their condition set. We skip back edges here and have to be careful when
943 // we leave a loop not to keep constraints over a dimension that doesn't
944 // exist anymore.
945 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
946 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
947 isl::set CondSet = isl::manage(ConditionSets[u]);
948 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u);
949
950 // Skip blocks outside the region.
951 if (!scop->contains(SuccBB))
952 continue;
953
954 // If we propagate the domain of some block to "SuccBB" we do not have to
955 // adjust the domain.
956 if (FinishedExitBlocks.count(SuccBB))
957 continue;
958
959 // Skip back edges.
960 if (DT.dominates(SuccBB, BB))
961 continue;
962
963 Loop *SuccBBLoop =
964 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
965
966 CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop);
967
968 // Set the domain for the successor or merge it with an existing domain in
969 // case there are multiple paths (without loop back edges) to the
970 // successor block.
971 isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB);
972
973 if (!SuccDomain.is_null()) {
974 SuccDomain = SuccDomain.unite(CondSet).coalesce();
975 } else {
976 // Initialize the invalid domain.
977 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space());
978 SuccDomain = CondSet;
979 }
980
981 SuccDomain = SuccDomain.detect_equalities();
982
983 // Check if the maximal number of domain disjunctions was reached.
984 // In case this happens we will clean up and bail.
985 if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
986 continue;
987
988 scop->invalidate(COMPLEXITY, DebugLoc());
989 while (++u < ConditionSets.size())
990 isl_set_free(ConditionSets[u]);
991 return false;
992 }
993 }
994
995 return true;
996 }
997
propagateInvalidStmtDomains(Region * R,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)998 bool ScopBuilder::propagateInvalidStmtDomains(
999 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1000 ReversePostOrderTraversal<Region *> RTraversal(R);
1001 for (auto *RN : RTraversal) {
1002
1003 // Recurse for affine subregions but go on for basic blocks and non-affine
1004 // subregions.
1005 if (RN->isSubRegion()) {
1006 Region *SubRegion = RN->getNodeAs<Region>();
1007 if (!scop->isNonAffineSubRegion(SubRegion)) {
1008 propagateInvalidStmtDomains(SubRegion, InvalidDomainMap);
1009 continue;
1010 }
1011 }
1012
1013 bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD);
1014 BasicBlock *BB = getRegionNodeBasicBlock(RN);
1015 isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1016 assert(!Domain.is_null() && "Cannot propagate a nullptr");
1017
1018 isl::set InvalidDomain = InvalidDomainMap[BB];
1019
1020 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain);
1021
1022 if (!IsInvalidBlock) {
1023 InvalidDomain = InvalidDomain.intersect(Domain);
1024 } else {
1025 InvalidDomain = Domain;
1026 isl::set DomPar = Domain.params();
1027 recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar,
1028 BB->getTerminator()->getDebugLoc(), AS_RESTRICTION);
1029 Domain = isl::set::empty(Domain.get_space());
1030 }
1031
1032 if (InvalidDomain.is_empty()) {
1033 InvalidDomainMap[BB] = InvalidDomain;
1034 continue;
1035 }
1036
1037 auto *BBLoop = getRegionNodeLoop(RN, LI);
1038 auto *TI = BB->getTerminator();
1039 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1040 for (unsigned u = 0; u < NumSuccs; u++) {
1041 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u);
1042
1043 // Skip successors outside the SCoP.
1044 if (!scop->contains(SuccBB))
1045 continue;
1046
1047 // Skip backedges.
1048 if (DT.dominates(SuccBB, BB))
1049 continue;
1050
1051 Loop *SuccBBLoop =
1052 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops());
1053
1054 auto AdjustedInvalidDomain =
1055 adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop);
1056
1057 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1058 SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain);
1059 SuccInvalidDomain = SuccInvalidDomain.coalesce();
1060
1061 InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1062
1063 // Check if the maximal number of domain disjunctions was reached.
1064 // In case this happens we will bail.
1065 if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) <
1066 MaxDisjunctsInDomain)
1067 continue;
1068
1069 InvalidDomainMap.erase(BB);
1070 scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent());
1071 return false;
1072 }
1073
1074 InvalidDomainMap[BB] = InvalidDomain;
1075 }
1076
1077 return true;
1078 }
1079
buildPHIAccesses(ScopStmt * PHIStmt,PHINode * PHI,Region * NonAffineSubRegion,bool IsExitBlock)1080 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1081 Region *NonAffineSubRegion,
1082 bool IsExitBlock) {
1083 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1084 // true, are not modeled as ordinary PHI nodes as they are not part of the
1085 // region. However, we model the operands in the predecessor blocks that are
1086 // part of the region as regular scalar accesses.
1087
1088 // If we can synthesize a PHI we can skip it, however only if it is in
1089 // the region. If it is not it can only be in the exit block of the region.
1090 // In this case we model the operands but not the PHI itself.
1091 auto *Scope = LI.getLoopFor(PHI->getParent());
1092 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope))
1093 return;
1094
1095 // PHI nodes are modeled as if they had been demoted prior to the SCoP
1096 // detection. Hence, the PHI is a load of a new memory location in which the
1097 // incoming value was written at the end of the incoming basic block.
1098 bool OnlyNonAffineSubRegionOperands = true;
1099 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1100 Value *Op = PHI->getIncomingValue(u);
1101 BasicBlock *OpBB = PHI->getIncomingBlock(u);
1102 ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u));
1103
1104 // Do not build PHI dependences inside a non-affine subregion, but make
1105 // sure that the necessary scalar values are still made available.
1106 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) {
1107 auto *OpInst = dyn_cast<Instruction>(Op);
1108 if (!OpInst || !NonAffineSubRegion->contains(OpInst))
1109 ensureValueRead(Op, OpStmt);
1110 continue;
1111 }
1112
1113 OnlyNonAffineSubRegionOperands = false;
1114 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock);
1115 }
1116
1117 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1118 addPHIReadAccess(PHIStmt, PHI);
1119 }
1120 }
1121
buildScalarDependences(ScopStmt * UserStmt,Instruction * Inst)1122 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1123 Instruction *Inst) {
1124 assert(!isa<PHINode>(Inst));
1125
1126 // Pull-in required operands.
1127 for (Use &Op : Inst->operands())
1128 ensureValueRead(Op.get(), UserStmt);
1129 }
1130
1131 // Create a sequence of two schedules. Either argument may be null and is
1132 // interpreted as the empty schedule. Can also return null if both schedules are
1133 // empty.
combineInSequence(isl::schedule Prev,isl::schedule Succ)1134 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1135 if (Prev.is_null())
1136 return Succ;
1137 if (Succ.is_null())
1138 return Prev;
1139
1140 return Prev.sequence(Succ);
1141 }
1142
1143 // Create an isl_multi_union_aff that defines an identity mapping from the
1144 // elements of USet to their N-th dimension.
1145 //
1146 // # Example:
1147 //
1148 // Domain: { A[i,j]; B[i,j,k] }
1149 // N: 1
1150 //
1151 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1152 //
1153 // @param USet A union set describing the elements for which to generate a
1154 // mapping.
1155 // @param N The dimension to map to.
1156 // @returns A mapping from USet to its N-th dimension.
mapToDimension(isl::union_set USet,unsigned N)1157 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1158 assert(!USet.is_null());
1159 assert(!USet.is_empty());
1160
1161 auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
1162
1163 for (isl::set S : USet.get_set_list()) {
1164 unsigned Dim = unsignedFromIslSize(S.tuple_dim());
1165 assert(Dim >= N);
1166 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set,
1167 N, Dim - N);
1168 if (N > 1)
1169 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1);
1170
1171 Result = Result.add_pw_multi_aff(PMA);
1172 }
1173
1174 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1175 }
1176
buildSchedule()1177 void ScopBuilder::buildSchedule() {
1178 Loop *L = getLoopSurroundingScop(*scop, LI);
1179 LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1180 buildSchedule(scop->getRegion().getNode(), LoopStack);
1181 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1182 scop->setScheduleTree(LoopStack[0].Schedule);
1183 }
1184
1185 /// To generate a schedule for the elements in a Region we traverse the Region
1186 /// in reverse-post-order and add the contained RegionNodes in traversal order
1187 /// to the schedule of the loop that is currently at the top of the LoopStack.
1188 /// For loop-free codes, this results in a correct sequential ordering.
1189 ///
1190 /// Example:
1191 /// bb1(0)
1192 /// / \.
1193 /// bb2(1) bb3(2)
1194 /// \ / \.
1195 /// bb4(3) bb5(4)
1196 /// \ /
1197 /// bb6(5)
1198 ///
1199 /// Including loops requires additional processing. Whenever a loop header is
1200 /// encountered, the corresponding loop is added to the @p LoopStack. Starting
1201 /// from an empty schedule, we first process all RegionNodes that are within
1202 /// this loop and complete the sequential schedule at this loop-level before
1203 /// processing about any other nodes. To implement this
1204 /// loop-nodes-first-processing, the reverse post-order traversal is
1205 /// insufficient. Hence, we additionally check if the traversal yields
1206 /// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1207 /// These region-nodes are then queue and only traverse after the all nodes
1208 /// within the current loop have been processed.
buildSchedule(Region * R,LoopStackTy & LoopStack)1209 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1210 Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI);
1211
1212 ReversePostOrderTraversal<Region *> RTraversal(R);
1213 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1214 std::deque<RegionNode *> DelayList;
1215 bool LastRNWaiting = false;
1216
1217 // Iterate over the region @p R in reverse post-order but queue
1218 // sub-regions/blocks iff they are not part of the last encountered but not
1219 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1220 // that we queued the last sub-region/block from the reverse post-order
1221 // iterator. If it is set we have to explore the next sub-region/block from
1222 // the iterator (if any) to guarantee progress. If it is not set we first try
1223 // the next queued sub-region/blocks.
1224 while (!WorkList.empty() || !DelayList.empty()) {
1225 RegionNode *RN;
1226
1227 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1228 RN = WorkList.front();
1229 WorkList.pop_front();
1230 LastRNWaiting = false;
1231 } else {
1232 RN = DelayList.front();
1233 DelayList.pop_front();
1234 }
1235
1236 Loop *L = getRegionNodeLoop(RN, LI);
1237 if (!scop->contains(L))
1238 L = OuterScopLoop;
1239
1240 Loop *LastLoop = LoopStack.back().L;
1241 if (LastLoop != L) {
1242 if (LastLoop && !LastLoop->contains(L)) {
1243 LastRNWaiting = true;
1244 DelayList.push_back(RN);
1245 continue;
1246 }
1247 LoopStack.push_back({L, {}, 0});
1248 }
1249 buildSchedule(RN, LoopStack);
1250 }
1251 }
1252
buildSchedule(RegionNode * RN,LoopStackTy & LoopStack)1253 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1254 if (RN->isSubRegion()) {
1255 auto *LocalRegion = RN->getNodeAs<Region>();
1256 if (!scop->isNonAffineSubRegion(LocalRegion)) {
1257 buildSchedule(LocalRegion, LoopStack);
1258 return;
1259 }
1260 }
1261
1262 assert(LoopStack.rbegin() != LoopStack.rend());
1263 auto LoopData = LoopStack.rbegin();
1264 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1265
1266 for (auto *Stmt : scop->getStmtListFor(RN)) {
1267 isl::union_set UDomain{Stmt->getDomain()};
1268 auto StmtSchedule = isl::schedule::from_domain(UDomain);
1269 LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule);
1270 }
1271
1272 // Check if we just processed the last node in this loop. If we did, finalize
1273 // the loop by:
1274 //
1275 // - adding new schedule dimensions
1276 // - folding the resulting schedule into the parent loop schedule
1277 // - dropping the loop schedule from the LoopStack.
1278 //
1279 // Then continue to check surrounding loops, which might also have been
1280 // completed by this node.
1281 size_t Dimension = LoopStack.size();
1282 while (LoopData->L &&
1283 LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) {
1284 isl::schedule Schedule = LoopData->Schedule;
1285 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1286
1287 assert(std::next(LoopData) != LoopStack.rend());
1288 Loop *L = LoopData->L;
1289 ++LoopData;
1290 --Dimension;
1291
1292 if (!Schedule.is_null()) {
1293 isl::union_set Domain = Schedule.get_domain();
1294 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension);
1295 Schedule = Schedule.insert_partial_schedule(MUPA);
1296
1297 if (hasDisableAllTransformsHint(L)) {
1298 /// If any of the loops has a disable_nonforced heuristic, mark the
1299 /// entire SCoP as such. The ISL rescheduler can only reschedule the
1300 /// SCoP in its entirety.
1301 /// TODO: ScopDetection could avoid including such loops or warp them as
1302 /// boxed loop. It still needs to pass-through loop with user-defined
1303 /// metadata.
1304 scop->markDisableHeuristics();
1305 }
1306
1307 // It is easier to insert the marks here that do it retroactively.
1308 isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L);
1309 if (!IslLoopId.is_null())
1310 Schedule =
1311 Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule();
1312
1313 LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule);
1314 }
1315
1316 LoopData->NumBlocksProcessed += NumBlocksProcessed;
1317 }
1318 // Now pop all loops processed up there from the LoopStack
1319 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end());
1320 }
1321
buildEscapingDependences(Instruction * Inst)1322 void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1323 // Check for uses of this instruction outside the scop. Because we do not
1324 // iterate over such instructions and therefore did not "ensure" the existence
1325 // of a write, we must determine such use here.
1326 if (scop->isEscaping(Inst))
1327 ensureValueWrite(Inst);
1328 }
1329
addRecordedAssumptions()1330 void ScopBuilder::addRecordedAssumptions() {
1331 for (auto &AS : llvm::reverse(RecordedAssumptions)) {
1332
1333 if (!AS.BB) {
1334 scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign,
1335 nullptr /* BasicBlock */, AS.RequiresRTC);
1336 continue;
1337 }
1338
1339 // If the domain was deleted the assumptions are void.
1340 isl_set *Dom = scop->getDomainConditions(AS.BB).release();
1341 if (!Dom)
1342 continue;
1343
1344 // If a basic block was given use its domain to simplify the assumption.
1345 // In case of restrictions we know they only have to hold on the domain,
1346 // thus we can intersect them with the domain of the block. However, for
1347 // assumptions the domain has to imply them, thus:
1348 // _ _____
1349 // Dom => S <==> A v B <==> A - B
1350 //
1351 // To avoid the complement we will register A - B as a restriction not an
1352 // assumption.
1353 isl_set *S = AS.Set.copy();
1354 if (AS.Sign == AS_RESTRICTION)
1355 S = isl_set_params(isl_set_intersect(S, Dom));
1356 else /* (AS.Sign == AS_ASSUMPTION) */
1357 S = isl_set_params(isl_set_subtract(Dom, S));
1358
1359 scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB,
1360 AS.RequiresRTC);
1361 }
1362 }
1363
addUserAssumptions(AssumptionCache & AC,DenseMap<BasicBlock *,isl::set> & InvalidDomainMap)1364 void ScopBuilder::addUserAssumptions(
1365 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1366 for (auto &Assumption : AC.assumptions()) {
1367 auto *CI = dyn_cast_or_null<CallInst>(Assumption);
1368 if (!CI || CI->arg_size() != 1)
1369 continue;
1370
1371 bool InScop = scop->contains(CI);
1372 if (!InScop && !scop->isDominatedBy(DT, CI->getParent()))
1373 continue;
1374
1375 auto *L = LI.getLoopFor(CI->getParent());
1376 auto *Val = CI->getArgOperand(0);
1377 ParameterSetTy DetectedParams;
1378 auto &R = scop->getRegion();
1379 if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) {
1380 ORE.emit(
1381 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1382 << "Non-affine user assumption ignored.");
1383 continue;
1384 }
1385
1386 // Collect all newly introduced parameters.
1387 ParameterSetTy NewParams;
1388 for (auto *Param : DetectedParams) {
1389 Param = extractConstantFactor(Param, SE).second;
1390 Param = scop->getRepresentingInvariantLoadSCEV(Param);
1391 if (scop->isParam(Param))
1392 continue;
1393 NewParams.insert(Param);
1394 }
1395
1396 SmallVector<isl_set *, 2> ConditionSets;
1397 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1398 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1399 auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get())
1400 : isl_set_copy(scop->getContext().get());
1401 assert(Dom && "Cannot propagate a nullptr.");
1402 bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap,
1403 ConditionSets);
1404 isl_set_free(Dom);
1405
1406 if (!Valid)
1407 continue;
1408
1409 isl_set *AssumptionCtx = nullptr;
1410 if (InScop) {
1411 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1]));
1412 isl_set_free(ConditionSets[0]);
1413 } else {
1414 AssumptionCtx = isl_set_complement(ConditionSets[1]);
1415 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]);
1416 }
1417
1418 // Project out newly introduced parameters as they are not otherwise useful.
1419 if (!NewParams.empty()) {
1420 for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) {
1421 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u);
1422 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id));
1423 isl_id_free(Id);
1424
1425 if (!NewParams.count(Param))
1426 continue;
1427
1428 AssumptionCtx =
1429 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1);
1430 }
1431 }
1432 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1433 << "Use user assumption: "
1434 << stringFromIslObj(AssumptionCtx, "null"));
1435 isl::set newContext =
1436 scop->getContext().intersect(isl::manage(AssumptionCtx));
1437 scop->setContext(newContext);
1438 }
1439 }
1440
buildAccessMultiDimFixed(MemAccInst Inst,ScopStmt * Stmt)1441 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1442 Value *Val = Inst.getValueOperand();
1443 Type *ElementType = Val->getType();
1444 Value *Address = Inst.getPointerOperand();
1445 const SCEV *AccessFunction =
1446 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1447 const SCEVUnknown *BasePointer =
1448 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1449 enum MemoryAccess::AccessType AccType =
1450 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1451
1452 if (auto *BitCast = dyn_cast<BitCastInst>(Address))
1453 Address = BitCast->getOperand(0);
1454
1455 auto *GEP = dyn_cast<GetElementPtrInst>(Address);
1456 if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) !=
1457 DL.getTypeAllocSize(ElementType))
1458 return false;
1459
1460 SmallVector<const SCEV *, 4> Subscripts;
1461 SmallVector<int, 4> Sizes;
1462 getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1463 auto *BasePtr = GEP->getOperand(0);
1464
1465 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr))
1466 BasePtr = BasePtrCast->getOperand(0);
1467
1468 // Check for identical base pointers to ensure that we do not miss index
1469 // offsets that have been added before this GEP is applied.
1470 if (BasePtr != BasePointer->getValue())
1471 return false;
1472
1473 std::vector<const SCEV *> SizesSCEV;
1474
1475 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1476
1477 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1478 for (auto *Subscript : Subscripts) {
1479 InvariantLoadsSetTy AccessILS;
1480 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
1481 &AccessILS))
1482 return false;
1483
1484 for (LoadInst *LInst : AccessILS)
1485 if (!ScopRIL.count(LInst))
1486 return false;
1487 }
1488
1489 if (Sizes.empty())
1490 return false;
1491
1492 SizesSCEV.push_back(nullptr);
1493
1494 for (auto V : Sizes)
1495 SizesSCEV.push_back(SE.getSCEV(
1496 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V)));
1497
1498 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1499 true, Subscripts, SizesSCEV, Val);
1500 return true;
1501 }
1502
buildAccessMultiDimParam(MemAccInst Inst,ScopStmt * Stmt)1503 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1504 if (!PollyDelinearize)
1505 return false;
1506
1507 Value *Address = Inst.getPointerOperand();
1508 Value *Val = Inst.getValueOperand();
1509 Type *ElementType = Val->getType();
1510 unsigned ElementSize = DL.getTypeAllocSize(ElementType);
1511 enum MemoryAccess::AccessType AccType =
1512 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1513
1514 const SCEV *AccessFunction =
1515 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1516 const SCEVUnknown *BasePointer =
1517 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1518
1519 assert(BasePointer && "Could not find base pointer");
1520
1521 auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1522 auto AccItr = InsnToMemAcc.find(Inst);
1523 if (AccItr == InsnToMemAcc.end())
1524 return false;
1525
1526 std::vector<const SCEV *> Sizes = {nullptr};
1527
1528 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(),
1529 AccItr->second.Shape->DelinearizedSizes.end());
1530
1531 // In case only the element size is contained in the 'Sizes' array, the
1532 // access does not access a real multi-dimensional array. Hence, we allow
1533 // the normal single-dimensional access construction to handle this.
1534 if (Sizes.size() == 1)
1535 return false;
1536
1537 // Remove the element size. This information is already provided by the
1538 // ElementSize parameter. In case the element size of this access and the
1539 // element size used for delinearization differs the delinearization is
1540 // incorrect. Hence, we invalidate the scop.
1541 //
1542 // TODO: Handle delinearization with differing element sizes.
1543 auto DelinearizedSize =
1544 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue();
1545 Sizes.pop_back();
1546 if (ElementSize != DelinearizedSize)
1547 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent());
1548
1549 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1550 true, AccItr->second.DelinearizedSubscripts, Sizes, Val);
1551 return true;
1552 }
1553
buildAccessMemIntrinsic(MemAccInst Inst,ScopStmt * Stmt)1554 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1555 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst);
1556
1557 if (MemIntr == nullptr)
1558 return false;
1559
1560 auto *L = LI.getLoopFor(Inst->getParent());
1561 auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L);
1562 assert(LengthVal);
1563
1564 // Check if the length val is actually affine or if we overapproximate it
1565 InvariantLoadsSetTy AccessILS;
1566 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1567
1568 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1569 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
1570 LengthVal, SE, &AccessILS);
1571 for (LoadInst *LInst : AccessILS)
1572 if (!ScopRIL.count(LInst))
1573 LengthIsAffine = false;
1574 if (!LengthIsAffine)
1575 LengthVal = nullptr;
1576
1577 auto *DestPtrVal = MemIntr->getDest();
1578 assert(DestPtrVal);
1579
1580 auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L);
1581 assert(DestAccFunc);
1582 // Ignore accesses to "NULL".
1583 // TODO: We could use this to optimize the region further, e.g., intersect
1584 // the context with
1585 // isl_set_complement(isl_set_params(getDomain()))
1586 // as we know it would be undefined to execute this instruction anyway.
1587 if (DestAccFunc->isZero())
1588 return true;
1589
1590 if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) {
1591 if (isa<ConstantPointerNull>(U->getValue()))
1592 return true;
1593 }
1594
1595 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc));
1596 assert(DestPtrSCEV);
1597 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV);
1598 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(),
1599 IntegerType::getInt8Ty(DestPtrVal->getContext()),
1600 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr},
1601 Inst.getValueOperand());
1602
1603 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr);
1604 if (!MemTrans)
1605 return true;
1606
1607 auto *SrcPtrVal = MemTrans->getSource();
1608 assert(SrcPtrVal);
1609
1610 auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L);
1611 assert(SrcAccFunc);
1612 // Ignore accesses to "NULL".
1613 // TODO: See above TODO
1614 if (SrcAccFunc->isZero())
1615 return true;
1616
1617 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc));
1618 assert(SrcPtrSCEV);
1619 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV);
1620 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(),
1621 IntegerType::getInt8Ty(SrcPtrVal->getContext()),
1622 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr},
1623 Inst.getValueOperand());
1624
1625 return true;
1626 }
1627
buildAccessCallInst(MemAccInst Inst,ScopStmt * Stmt)1628 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1629 auto *CI = dyn_cast_or_null<CallInst>(Inst);
1630
1631 if (CI == nullptr)
1632 return false;
1633
1634 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI))
1635 return true;
1636
1637 bool ReadOnly = false;
1638 auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0);
1639 auto *CalledFunction = CI->getCalledFunction();
1640 switch (AA.getModRefBehavior(CalledFunction)) {
1641 case FMRB_UnknownModRefBehavior:
1642 llvm_unreachable("Unknown mod ref behaviour cannot be represented.");
1643 case FMRB_DoesNotAccessMemory:
1644 return true;
1645 case FMRB_OnlyWritesMemory:
1646 case FMRB_OnlyWritesInaccessibleMem:
1647 case FMRB_OnlyWritesInaccessibleOrArgMem:
1648 case FMRB_OnlyAccessesInaccessibleMem:
1649 case FMRB_OnlyAccessesInaccessibleOrArgMem:
1650 return false;
1651 case FMRB_OnlyReadsMemory:
1652 case FMRB_OnlyReadsInaccessibleMem:
1653 case FMRB_OnlyReadsInaccessibleOrArgMem:
1654 GlobalReads.emplace_back(Stmt, CI);
1655 return true;
1656 case FMRB_OnlyReadsArgumentPointees:
1657 ReadOnly = true;
1658 LLVM_FALLTHROUGH;
1659 case FMRB_OnlyWritesArgumentPointees:
1660 case FMRB_OnlyAccessesArgumentPointees: {
1661 auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1662 Loop *L = LI.getLoopFor(Inst->getParent());
1663 for (const auto &Arg : CI->args()) {
1664 if (!Arg->getType()->isPointerTy())
1665 continue;
1666
1667 auto *ArgSCEV = SE.getSCEVAtScope(Arg, L);
1668 if (ArgSCEV->isZero())
1669 continue;
1670
1671 if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) {
1672 if (isa<ConstantPointerNull>(U->getValue()))
1673 return true;
1674 }
1675
1676 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV));
1677 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(),
1678 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI);
1679 }
1680 return true;
1681 }
1682 }
1683
1684 return true;
1685 }
1686
buildAccessSingleDim(MemAccInst Inst,ScopStmt * Stmt)1687 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1688 Value *Address = Inst.getPointerOperand();
1689 Value *Val = Inst.getValueOperand();
1690 Type *ElementType = Val->getType();
1691 enum MemoryAccess::AccessType AccType =
1692 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1693
1694 const SCEV *AccessFunction =
1695 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent()));
1696 const SCEVUnknown *BasePointer =
1697 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction));
1698
1699 assert(BasePointer && "Could not find base pointer");
1700 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer);
1701
1702 // Check if the access depends on a loop contained in a non-affine subregion.
1703 bool isVariantInNonAffineLoop = false;
1704 SetVector<const Loop *> Loops;
1705 findLoops(AccessFunction, Loops);
1706 for (const Loop *L : Loops)
1707 if (Stmt->contains(L)) {
1708 isVariantInNonAffineLoop = true;
1709 break;
1710 }
1711
1712 InvariantLoadsSetTy AccessILS;
1713
1714 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1715 bool IsAffine = !isVariantInNonAffineLoop &&
1716 isAffineExpr(&scop->getRegion(), SurroundingLoop,
1717 AccessFunction, SE, &AccessILS);
1718
1719 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1720 for (LoadInst *LInst : AccessILS)
1721 if (!ScopRIL.count(LInst))
1722 IsAffine = false;
1723
1724 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1725 AccType = MemoryAccess::MAY_WRITE;
1726
1727 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType,
1728 IsAffine, {AccessFunction}, {nullptr}, Val);
1729 }
1730
buildMemoryAccess(MemAccInst Inst,ScopStmt * Stmt)1731 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1732 if (buildAccessMemIntrinsic(Inst, Stmt))
1733 return;
1734
1735 if (buildAccessCallInst(Inst, Stmt))
1736 return;
1737
1738 if (buildAccessMultiDimFixed(Inst, Stmt))
1739 return;
1740
1741 if (buildAccessMultiDimParam(Inst, Stmt))
1742 return;
1743
1744 buildAccessSingleDim(Inst, Stmt);
1745 }
1746
buildAccessFunctions()1747 void ScopBuilder::buildAccessFunctions() {
1748 for (auto &Stmt : *scop) {
1749 if (Stmt.isBlockStmt()) {
1750 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock());
1751 continue;
1752 }
1753
1754 Region *R = Stmt.getRegion();
1755 for (BasicBlock *BB : R->blocks())
1756 buildAccessFunctions(&Stmt, *BB, R);
1757 }
1758
1759 // Build write accesses for values that are used after the SCoP.
1760 // The instructions defining them might be synthesizable and therefore not
1761 // contained in any statement, hence we iterate over the original instructions
1762 // to identify all escaping values.
1763 for (BasicBlock *BB : scop->getRegion().blocks()) {
1764 for (Instruction &Inst : *BB)
1765 buildEscapingDependences(&Inst);
1766 }
1767 }
1768
shouldModelInst(Instruction * Inst,Loop * L)1769 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1770 return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) &&
1771 !canSynthesize(Inst, *scop, &SE, L);
1772 }
1773
1774 /// Generate a name for a statement.
1775 ///
1776 /// @param BB The basic block the statement will represent.
1777 /// @param BBIdx The index of the @p BB relative to other BBs/regions.
1778 /// @param Count The index of the created statement in @p BB.
1779 /// @param IsMain Whether this is the main of all statement for @p BB. If true,
1780 /// no suffix will be added.
1781 /// @param IsLast Uses a special indicator for the last statement of a BB.
makeStmtName(BasicBlock * BB,long BBIdx,int Count,bool IsMain,bool IsLast=false)1782 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1783 bool IsMain, bool IsLast = false) {
1784 std::string Suffix;
1785 if (!IsMain) {
1786 if (UseInstructionNames)
1787 Suffix = '_';
1788 if (IsLast)
1789 Suffix += "last";
1790 else if (Count < 26)
1791 Suffix += 'a' + Count;
1792 else
1793 Suffix += std::to_string(Count);
1794 }
1795 return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames);
1796 }
1797
1798 /// Generate a name for a statement that represents a non-affine subregion.
1799 ///
1800 /// @param R The region the statement will represent.
1801 /// @param RIdx The index of the @p R relative to other BBs/regions.
makeStmtName(Region * R,long RIdx)1802 static std::string makeStmtName(Region *R, long RIdx) {
1803 return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "",
1804 UseInstructionNames);
1805 }
1806
buildSequentialBlockStmts(BasicBlock * BB,bool SplitOnStore)1807 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1808 Loop *SurroundingLoop = LI.getLoopFor(BB);
1809
1810 int Count = 0;
1811 long BBIdx = scop->getNextStmtIdx();
1812 std::vector<Instruction *> Instructions;
1813 for (Instruction &Inst : *BB) {
1814 if (shouldModelInst(&Inst, SurroundingLoop))
1815 Instructions.push_back(&Inst);
1816 if (Inst.getMetadata("polly_split_after") ||
1817 (SplitOnStore && isa<StoreInst>(Inst))) {
1818 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1819 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1820 Count++;
1821 Instructions.clear();
1822 }
1823 }
1824
1825 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0);
1826 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1827 }
1828
1829 /// Is @p Inst an ordered instruction?
1830 ///
1831 /// An unordered instruction is an instruction, such that a sequence of
1832 /// unordered instructions can be permuted without changing semantics. Any
1833 /// instruction for which this is not always the case is ordered.
isOrderedInstruction(Instruction * Inst)1834 static bool isOrderedInstruction(Instruction *Inst) {
1835 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1836 }
1837
1838 /// Join instructions to the same statement if one uses the scalar result of the
1839 /// other.
joinOperandTree(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)1840 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1841 ArrayRef<Instruction *> ModeledInsts) {
1842 for (Instruction *Inst : ModeledInsts) {
1843 if (isa<PHINode>(Inst))
1844 continue;
1845
1846 for (Use &Op : Inst->operands()) {
1847 Instruction *OpInst = dyn_cast<Instruction>(Op.get());
1848 if (!OpInst)
1849 continue;
1850
1851 // Check if OpInst is in the BB and is a modeled instruction.
1852 auto OpVal = UnionFind.findValue(OpInst);
1853 if (OpVal == UnionFind.end())
1854 continue;
1855
1856 UnionFind.unionSets(Inst, OpInst);
1857 }
1858 }
1859 }
1860
1861 /// Ensure that the order of ordered instructions does not change.
1862 ///
1863 /// If we encounter an ordered instruction enclosed in instructions belonging to
1864 /// a different statement (which might as well contain ordered instructions, but
1865 /// this is not tested here), join them.
1866 static void
joinOrderedInstructions(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)1867 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1868 ArrayRef<Instruction *> ModeledInsts) {
1869 SetVector<Instruction *> SeenLeaders;
1870 for (Instruction *Inst : ModeledInsts) {
1871 if (!isOrderedInstruction(Inst))
1872 continue;
1873
1874 Instruction *Leader = UnionFind.getLeaderValue(Inst);
1875 // Since previous iterations might have merged sets, some items in
1876 // SeenLeaders are not leaders anymore. However, The new leader of
1877 // previously merged instructions must be one of the former leaders of
1878 // these merged instructions.
1879 bool Inserted = SeenLeaders.insert(Leader);
1880 if (Inserted)
1881 continue;
1882
1883 // Merge statements to close holes. Say, we have already seen statements A
1884 // and B, in this order. Then we see an instruction of A again and we would
1885 // see the pattern "A B A". This function joins all statements until the
1886 // only seen occurrence of A.
1887 for (Instruction *Prev : reverse(SeenLeaders)) {
1888 // We are backtracking from the last element until we see Inst's leader
1889 // in SeenLeaders and merge all into one set. Although leaders of
1890 // instructions change during the execution of this loop, it's irrelevant
1891 // as we are just searching for the element that we already confirmed is
1892 // in the list.
1893 if (Prev == Leader)
1894 break;
1895 UnionFind.unionSets(Prev, Leader);
1896 }
1897 }
1898 }
1899
1900 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1901 /// the incoming values from this block are executed after the PHI READ.
1902 ///
1903 /// Otherwise it could overwrite the incoming value from before the BB with the
1904 /// value for the next execution. This can happen if the PHI WRITE is added to
1905 /// the statement with the instruction that defines the incoming value (instead
1906 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1907 /// are in order, we put both into the statement. PHI WRITEs are always executed
1908 /// after PHI READs when they are in the same statement.
1909 ///
1910 /// TODO: This is an overpessimization. We only have to ensure that the PHI
1911 /// WRITE is not put into a statement containing the PHI itself. That could also
1912 /// be done by
1913 /// - having all (strongly connected) PHIs in a single statement,
1914 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1915 /// has a chance of being lifted before a PHI by being in a statement with a
1916 /// PHI that comes before in the basic block), or
1917 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
joinOrderedPHIs(EquivalenceClasses<Instruction * > & UnionFind,ArrayRef<Instruction * > ModeledInsts)1918 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1919 ArrayRef<Instruction *> ModeledInsts) {
1920 for (Instruction *Inst : ModeledInsts) {
1921 PHINode *PHI = dyn_cast<PHINode>(Inst);
1922 if (!PHI)
1923 continue;
1924
1925 int Idx = PHI->getBasicBlockIndex(PHI->getParent());
1926 if (Idx < 0)
1927 continue;
1928
1929 Instruction *IncomingVal =
1930 dyn_cast<Instruction>(PHI->getIncomingValue(Idx));
1931 if (!IncomingVal)
1932 continue;
1933
1934 UnionFind.unionSets(PHI, IncomingVal);
1935 }
1936 }
1937
buildEqivClassBlockStmts(BasicBlock * BB)1938 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1939 Loop *L = LI.getLoopFor(BB);
1940
1941 // Extracting out modeled instructions saves us from checking
1942 // shouldModelInst() repeatedly.
1943 SmallVector<Instruction *, 32> ModeledInsts;
1944 EquivalenceClasses<Instruction *> UnionFind;
1945 Instruction *MainInst = nullptr, *MainLeader = nullptr;
1946 for (Instruction &Inst : *BB) {
1947 if (!shouldModelInst(&Inst, L))
1948 continue;
1949 ModeledInsts.push_back(&Inst);
1950 UnionFind.insert(&Inst);
1951
1952 // When a BB is split into multiple statements, the main statement is the
1953 // one containing the 'main' instruction. We select the first instruction
1954 // that is unlikely to be removed (because it has side-effects) as the main
1955 // one. It is used to ensure that at least one statement from the bb has the
1956 // same name as with -polly-stmt-granularity=bb.
1957 if (!MainInst && (isa<StoreInst>(Inst) ||
1958 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst))))
1959 MainInst = &Inst;
1960 }
1961
1962 joinOperandTree(UnionFind, ModeledInsts);
1963 joinOrderedInstructions(UnionFind, ModeledInsts);
1964 joinOrderedPHIs(UnionFind, ModeledInsts);
1965
1966 // The list of instructions for statement (statement represented by the leader
1967 // instruction).
1968 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1969
1970 // The order of statements must be preserved w.r.t. their ordered
1971 // instructions. Without this explicit scan, we would also use non-ordered
1972 // instructions (whose order is arbitrary) to determine statement order.
1973 for (Instruction *Inst : ModeledInsts) {
1974 if (!isOrderedInstruction(Inst))
1975 continue;
1976
1977 auto LeaderIt = UnionFind.findLeader(Inst);
1978 if (LeaderIt == UnionFind.member_end())
1979 continue;
1980
1981 // Insert element for the leader instruction.
1982 (void)LeaderToInstList[*LeaderIt];
1983 }
1984
1985 // Collect the instructions of all leaders. UnionFind's member iterator
1986 // unfortunately are not in any specific order.
1987 for (Instruction *Inst : ModeledInsts) {
1988 auto LeaderIt = UnionFind.findLeader(Inst);
1989 if (LeaderIt == UnionFind.member_end())
1990 continue;
1991
1992 if (Inst == MainInst)
1993 MainLeader = *LeaderIt;
1994 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
1995 InstList.push_back(Inst);
1996 }
1997
1998 // Finally build the statements.
1999 int Count = 0;
2000 long BBIdx = scop->getNextStmtIdx();
2001 for (auto &Instructions : LeaderToInstList) {
2002 std::vector<Instruction *> &InstList = Instructions.second;
2003
2004 // If there is no main instruction, make the first statement the main.
2005 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2006
2007 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2008 scop->addScopStmt(BB, Name, L, std::move(InstList));
2009 Count += 1;
2010 }
2011
2012 // Unconditionally add an epilogue (last statement). It contains no
2013 // instructions, but holds the PHI write accesses for successor basic blocks,
2014 // if the incoming value is not defined in another statement if the same BB.
2015 // The epilogue becomes the main statement only if there is no other
2016 // statement that could become main.
2017 // The epilogue will be removed if no PHIWrite is added to it.
2018 std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true);
2019 scop->addScopStmt(BB, EpilogueName, L, {});
2020 }
2021
buildStmts(Region & SR)2022 void ScopBuilder::buildStmts(Region &SR) {
2023 if (scop->isNonAffineSubRegion(&SR)) {
2024 std::vector<Instruction *> Instructions;
2025 Loop *SurroundingLoop =
2026 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops());
2027 for (Instruction &Inst : *SR.getEntry())
2028 if (shouldModelInst(&Inst, SurroundingLoop))
2029 Instructions.push_back(&Inst);
2030 long RIdx = scop->getNextStmtIdx();
2031 std::string Name = makeStmtName(&SR, RIdx);
2032 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions);
2033 return;
2034 }
2035
2036 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2037 if (I->isSubRegion())
2038 buildStmts(*I->getNodeAs<Region>());
2039 else {
2040 BasicBlock *BB = I->getNodeAs<BasicBlock>();
2041 switch (StmtGranularity) {
2042 case GranularityChoice::BasicBlocks:
2043 buildSequentialBlockStmts(BB);
2044 break;
2045 case GranularityChoice::ScalarIndependence:
2046 buildEqivClassBlockStmts(BB);
2047 break;
2048 case GranularityChoice::Stores:
2049 buildSequentialBlockStmts(BB, true);
2050 break;
2051 }
2052 }
2053 }
2054
buildAccessFunctions(ScopStmt * Stmt,BasicBlock & BB,Region * NonAffineSubRegion)2055 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2056 Region *NonAffineSubRegion) {
2057 assert(
2058 Stmt &&
2059 "The exit BB is the only one that cannot be represented by a statement");
2060 assert(Stmt->represents(&BB));
2061
2062 // We do not build access functions for error blocks, as they may contain
2063 // instructions we can not model.
2064 if (SD.isErrorBlock(BB, scop->getRegion()))
2065 return;
2066
2067 auto BuildAccessesForInst = [this, Stmt,
2068 NonAffineSubRegion](Instruction *Inst) {
2069 PHINode *PHI = dyn_cast<PHINode>(Inst);
2070 if (PHI)
2071 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false);
2072
2073 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) {
2074 assert(Stmt && "Cannot build access function in non-existing statement");
2075 buildMemoryAccess(MemInst, Stmt);
2076 }
2077
2078 // PHI nodes have already been modeled above and terminators that are
2079 // not part of a non-affine subregion are fully modeled and regenerated
2080 // from the polyhedral domains. Hence, they do not need to be modeled as
2081 // explicit data dependences.
2082 if (!PHI)
2083 buildScalarDependences(Stmt, Inst);
2084 };
2085
2086 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2087 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2088 if (IsEntryBlock) {
2089 for (Instruction *Inst : Stmt->getInstructions())
2090 BuildAccessesForInst(Inst);
2091 if (Stmt->isRegionStmt())
2092 BuildAccessesForInst(BB.getTerminator());
2093 } else {
2094 for (Instruction &Inst : BB) {
2095 if (isIgnoredIntrinsic(&Inst))
2096 continue;
2097
2098 // Invariant loads already have been processed.
2099 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst)))
2100 continue;
2101
2102 BuildAccessesForInst(&Inst);
2103 }
2104 }
2105 }
2106
addMemoryAccess(ScopStmt * Stmt,Instruction * Inst,MemoryAccess::AccessType AccType,Value * BaseAddress,Type * ElementType,bool Affine,Value * AccessValue,ArrayRef<const SCEV * > Subscripts,ArrayRef<const SCEV * > Sizes,MemoryKind Kind)2107 MemoryAccess *ScopBuilder::addMemoryAccess(
2108 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2109 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2110 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2111 MemoryKind Kind) {
2112 bool isKnownMustAccess = false;
2113
2114 // Accesses in single-basic block statements are always executed.
2115 if (Stmt->isBlockStmt())
2116 isKnownMustAccess = true;
2117
2118 if (Stmt->isRegionStmt()) {
2119 // Accesses that dominate the exit block of a non-affine region are always
2120 // executed. In non-affine regions there may exist MemoryKind::Values that
2121 // do not dominate the exit. MemoryKind::Values will always dominate the
2122 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2123 // non-affine region.
2124 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit()))
2125 isKnownMustAccess = true;
2126 }
2127
2128 // Non-affine PHI writes do not "happen" at a particular instruction, but
2129 // after exiting the statement. Therefore they are guaranteed to execute and
2130 // overwrite the old value.
2131 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2132 isKnownMustAccess = true;
2133
2134 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2135 AccType = MemoryAccess::MAY_WRITE;
2136
2137 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2138 Affine, Subscripts, Sizes, AccessValue, Kind);
2139
2140 scop->addAccessFunction(Access);
2141 Stmt->addAccess(Access);
2142 return Access;
2143 }
2144
addArrayAccess(ScopStmt * Stmt,MemAccInst MemAccInst,MemoryAccess::AccessType AccType,Value * BaseAddress,Type * ElementType,bool IsAffine,ArrayRef<const SCEV * > Subscripts,ArrayRef<const SCEV * > Sizes,Value * AccessValue)2145 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2146 MemoryAccess::AccessType AccType,
2147 Value *BaseAddress, Type *ElementType,
2148 bool IsAffine,
2149 ArrayRef<const SCEV *> Subscripts,
2150 ArrayRef<const SCEV *> Sizes,
2151 Value *AccessValue) {
2152 ArrayBasePointers.insert(BaseAddress);
2153 addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine,
2154 AccessValue, Subscripts, Sizes, MemoryKind::Array);
2155 }
2156
2157 /// Check if @p Expr is divisible by @p Size.
isDivisible(const SCEV * Expr,unsigned Size,ScalarEvolution & SE)2158 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2159 assert(Size != 0);
2160 if (Size == 1)
2161 return true;
2162
2163 // Only one factor needs to be divisible.
2164 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) {
2165 for (auto *FactorExpr : MulExpr->operands())
2166 if (isDivisible(FactorExpr, Size, SE))
2167 return true;
2168 return false;
2169 }
2170
2171 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2172 // to be divisible.
2173 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) {
2174 for (auto *OpExpr : NAryExpr->operands())
2175 if (!isDivisible(OpExpr, Size, SE))
2176 return false;
2177 return true;
2178 }
2179
2180 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size);
2181 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV);
2182 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV);
2183 return MulSCEV == Expr;
2184 }
2185
foldSizeConstantsToRight()2186 void ScopBuilder::foldSizeConstantsToRight() {
2187 isl::union_set Accessed = scop->getAccesses().range();
2188
2189 for (auto Array : scop->arrays()) {
2190 if (Array->getNumberOfDimensions() <= 1)
2191 continue;
2192
2193 isl::space Space = Array->getSpace();
2194 Space = Space.align_params(Accessed.get_space());
2195
2196 if (!Accessed.contains(Space))
2197 continue;
2198
2199 isl::set Elements = Accessed.extract_set(Space);
2200 isl::map Transform = isl::map::universe(Array->getSpace().map_from_set());
2201
2202 std::vector<int> Int;
2203 unsigned Dims = unsignedFromIslSize(Elements.tuple_dim());
2204 for (unsigned i = 0; i < Dims; i++) {
2205 isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i);
2206 DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1);
2207 DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0);
2208
2209 isl::basic_set DimHull = DimOnly.affine_hull();
2210
2211 if (i == Dims - 1) {
2212 Int.push_back(1);
2213 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2214 continue;
2215 }
2216
2217 if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) {
2218 isl::aff Diff = DimHull.get_div(0);
2219 isl::val Val = Diff.get_denominator_val();
2220
2221 int ValInt = 1;
2222 if (Val.is_int()) {
2223 auto ValAPInt = APIntFromVal(Val);
2224 if (ValAPInt.isSignedIntN(32))
2225 ValInt = ValAPInt.getSExtValue();
2226 } else {
2227 }
2228
2229 Int.push_back(ValInt);
2230 isl::constraint C = isl::constraint::alloc_equality(
2231 isl::local_space(Transform.get_space()));
2232 C = C.set_coefficient_si(isl::dim::out, i, ValInt);
2233 C = C.set_coefficient_si(isl::dim::in, i, -1);
2234 Transform = Transform.add_constraint(C);
2235 continue;
2236 }
2237
2238 isl::basic_set ZeroSet = isl::basic_set(DimHull);
2239 ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0);
2240
2241 int ValInt = 1;
2242 if (ZeroSet.is_equal(DimHull)) {
2243 ValInt = 0;
2244 }
2245
2246 Int.push_back(ValInt);
2247 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i);
2248 }
2249
2250 isl::set MappedElements = isl::map(Transform).domain();
2251 if (!Elements.is_subset(MappedElements))
2252 continue;
2253
2254 bool CanFold = true;
2255 if (Int[0] <= 1)
2256 CanFold = false;
2257
2258 unsigned NumDims = Array->getNumberOfDimensions();
2259 for (unsigned i = 1; i < NumDims - 1; i++)
2260 if (Int[0] != Int[i] && Int[i])
2261 CanFold = false;
2262
2263 if (!CanFold)
2264 continue;
2265
2266 for (auto &Access : scop->access_functions())
2267 if (Access->getScopArrayInfo() == Array)
2268 Access->setAccessRelation(
2269 Access->getAccessRelation().apply_range(Transform));
2270
2271 std::vector<const SCEV *> Sizes;
2272 for (unsigned i = 0; i < NumDims; i++) {
2273 auto Size = Array->getDimensionSize(i);
2274
2275 if (i == NumDims - 1)
2276 Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0]));
2277 Sizes.push_back(Size);
2278 }
2279
2280 Array->updateSizes(Sizes, false /* CheckConsistency */);
2281 }
2282 }
2283
finalizeAccesses()2284 void ScopBuilder::finalizeAccesses() {
2285 updateAccessDimensionality();
2286 foldSizeConstantsToRight();
2287 foldAccessRelations();
2288 assumeNoOutOfBounds();
2289 }
2290
updateAccessDimensionality()2291 void ScopBuilder::updateAccessDimensionality() {
2292 // Check all array accesses for each base pointer and find a (virtual) element
2293 // size for the base pointer that divides all access functions.
2294 for (ScopStmt &Stmt : *scop)
2295 for (MemoryAccess *Access : Stmt) {
2296 if (!Access->isArrayKind())
2297 continue;
2298 ScopArrayInfo *Array =
2299 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2300
2301 if (Array->getNumberOfDimensions() != 1)
2302 continue;
2303 unsigned DivisibleSize = Array->getElemSizeInBytes();
2304 const SCEV *Subscript = Access->getSubscript(0);
2305 while (!isDivisible(Subscript, DivisibleSize, SE))
2306 DivisibleSize /= 2;
2307 auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8);
2308 Array->updateElementType(Ty);
2309 }
2310
2311 for (auto &Stmt : *scop)
2312 for (auto &Access : Stmt)
2313 Access->updateDimensionality();
2314 }
2315
foldAccessRelations()2316 void ScopBuilder::foldAccessRelations() {
2317 for (auto &Stmt : *scop)
2318 for (auto &Access : Stmt)
2319 Access->foldAccessRelation();
2320 }
2321
assumeNoOutOfBounds()2322 void ScopBuilder::assumeNoOutOfBounds() {
2323 if (PollyIgnoreInbounds)
2324 return;
2325 for (auto &Stmt : *scop)
2326 for (auto &Access : Stmt) {
2327 isl::set Outside = Access->assumeNoOutOfBound();
2328 const auto &Loc = Access->getAccessInstruction()
2329 ? Access->getAccessInstruction()->getDebugLoc()
2330 : DebugLoc();
2331 recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc,
2332 AS_ASSUMPTION);
2333 }
2334 }
2335
ensureValueWrite(Instruction * Inst)2336 void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2337 // Find the statement that defines the value of Inst. That statement has to
2338 // write the value to make it available to those statements that read it.
2339 ScopStmt *Stmt = scop->getStmtFor(Inst);
2340
2341 // It is possible that the value is synthesizable within a loop (such that it
2342 // is not part of any statement), but not after the loop (where you need the
2343 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2344 // avoid this. In case the IR has no such PHI, use the last statement (where
2345 // the value is synthesizable) to write the value.
2346 if (!Stmt)
2347 Stmt = scop->getLastStmtFor(Inst->getParent());
2348
2349 // Inst not defined within this SCoP.
2350 if (!Stmt)
2351 return;
2352
2353 // Do not process further if the instruction is already written.
2354 if (Stmt->lookupValueWriteOf(Inst))
2355 return;
2356
2357 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(),
2358 true, Inst, ArrayRef<const SCEV *>(),
2359 ArrayRef<const SCEV *>(), MemoryKind::Value);
2360 }
2361
ensureValueRead(Value * V,ScopStmt * UserStmt)2362 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2363 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2364 // to be able to replace this one. Currently, there is a split responsibility.
2365 // In a first step, the MemoryAccess is created, but without the
2366 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2367 // AccessRelation is created. At least for scalar accesses, there is no new
2368 // information available at ScopStmt::buildAccessRelations(), so we could
2369 // create the AccessRelation right away. This is what
2370 // ScopStmt::ensureValueRead(Value*) does.
2371
2372 auto *Scope = UserStmt->getSurroundingLoop();
2373 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false);
2374 switch (VUse.getKind()) {
2375 case VirtualUse::Constant:
2376 case VirtualUse::Block:
2377 case VirtualUse::Synthesizable:
2378 case VirtualUse::Hoisted:
2379 case VirtualUse::Intra:
2380 // Uses of these kinds do not need a MemoryAccess.
2381 break;
2382
2383 case VirtualUse::ReadOnly:
2384 // Add MemoryAccess for invariant values only if requested.
2385 if (!ModelReadOnlyScalars)
2386 break;
2387
2388 LLVM_FALLTHROUGH;
2389 case VirtualUse::Inter:
2390
2391 // Do not create another MemoryAccess for reloading the value if one already
2392 // exists.
2393 if (UserStmt->lookupValueReadOf(V))
2394 break;
2395
2396 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(),
2397 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2398 MemoryKind::Value);
2399
2400 // Inter-statement uses need to write the value in their defining statement.
2401 if (VUse.isInter())
2402 ensureValueWrite(cast<Instruction>(V));
2403 break;
2404 }
2405 }
2406
ensurePHIWrite(PHINode * PHI,ScopStmt * IncomingStmt,BasicBlock * IncomingBlock,Value * IncomingValue,bool IsExitBlock)2407 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2408 BasicBlock *IncomingBlock,
2409 Value *IncomingValue, bool IsExitBlock) {
2410 // As the incoming block might turn out to be an error statement ensure we
2411 // will create an exit PHI SAI object. It is needed during code generation
2412 // and would be created later anyway.
2413 if (IsExitBlock)
2414 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {},
2415 MemoryKind::ExitPHI);
2416
2417 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2418 // from outside the SCoP's region have no statement representation.
2419 if (!IncomingStmt)
2420 return;
2421
2422 // Take care for the incoming value being available in the incoming block.
2423 // This must be done before the check for multiple PHI writes because multiple
2424 // exiting edges from subregion each can be the effective written value of the
2425 // subregion. As such, all of them must be made available in the subregion
2426 // statement.
2427 ensureValueRead(IncomingValue, IncomingStmt);
2428
2429 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2430 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2431 assert(Acc->getAccessInstruction() == PHI);
2432 Acc->addIncoming(IncomingBlock, IncomingValue);
2433 return;
2434 }
2435
2436 MemoryAccess *Acc = addMemoryAccess(
2437 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true,
2438 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2439 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2440 assert(Acc);
2441 Acc->addIncoming(IncomingBlock, IncomingValue);
2442 }
2443
addPHIReadAccess(ScopStmt * PHIStmt,PHINode * PHI)2444 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2445 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true,
2446 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(),
2447 MemoryKind::PHI);
2448 }
2449
buildDomain(ScopStmt & Stmt)2450 void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2451 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt);
2452
2453 Stmt.Domain = scop->getDomainConditions(&Stmt);
2454 Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2455 }
2456
collectSurroundingLoops(ScopStmt & Stmt)2457 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2458 isl::set Domain = Stmt.getDomain();
2459 BasicBlock *BB = Stmt.getEntryBlock();
2460
2461 Loop *L = LI.getLoopFor(BB);
2462
2463 while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2464 L = L->getParentLoop();
2465
2466 SmallVector<llvm::Loop *, 8> Loops;
2467
2468 while (L && Stmt.getParent()->getRegion().contains(L)) {
2469 Loops.push_back(L);
2470 L = L->getParentLoop();
2471 }
2472
2473 Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend());
2474 }
2475
2476 /// Return the reduction type for a given binary operator.
getReductionType(const BinaryOperator * BinOp,const Instruction * Load)2477 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2478 const Instruction *Load) {
2479 if (!BinOp)
2480 return MemoryAccess::RT_NONE;
2481 switch (BinOp->getOpcode()) {
2482 case Instruction::FAdd:
2483 if (!BinOp->isFast())
2484 return MemoryAccess::RT_NONE;
2485 LLVM_FALLTHROUGH;
2486 case Instruction::Add:
2487 return MemoryAccess::RT_ADD;
2488 case Instruction::Or:
2489 return MemoryAccess::RT_BOR;
2490 case Instruction::Xor:
2491 return MemoryAccess::RT_BXOR;
2492 case Instruction::And:
2493 return MemoryAccess::RT_BAND;
2494 case Instruction::FMul:
2495 if (!BinOp->isFast())
2496 return MemoryAccess::RT_NONE;
2497 LLVM_FALLTHROUGH;
2498 case Instruction::Mul:
2499 if (DisableMultiplicativeReductions)
2500 return MemoryAccess::RT_NONE;
2501 return MemoryAccess::RT_MUL;
2502 default:
2503 return MemoryAccess::RT_NONE;
2504 }
2505 }
2506
checkForReductions(ScopStmt & Stmt)2507 void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2508 SmallVector<MemoryAccess *, 2> Loads;
2509 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2510
2511 // First collect candidate load-store reduction chains by iterating over all
2512 // stores and collecting possible reduction loads.
2513 for (MemoryAccess *StoreMA : Stmt) {
2514 if (StoreMA->isRead())
2515 continue;
2516
2517 Loads.clear();
2518 collectCandidateReductionLoads(StoreMA, Loads);
2519 for (MemoryAccess *LoadMA : Loads)
2520 Candidates.push_back(std::make_pair(LoadMA, StoreMA));
2521 }
2522
2523 // Then check each possible candidate pair.
2524 for (const auto &CandidatePair : Candidates) {
2525 bool Valid = true;
2526 isl::map LoadAccs = CandidatePair.first->getAccessRelation();
2527 isl::map StoreAccs = CandidatePair.second->getAccessRelation();
2528
2529 // Skip those with obviously unequal base addresses.
2530 if (!LoadAccs.has_equal_space(StoreAccs)) {
2531 continue;
2532 }
2533
2534 // And check if the remaining for overlap with other memory accesses.
2535 isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
2536 AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain());
2537 isl::set AllAccs = AllAccsRel.range();
2538
2539 for (MemoryAccess *MA : Stmt) {
2540 if (MA == CandidatePair.first || MA == CandidatePair.second)
2541 continue;
2542
2543 isl::map AccRel =
2544 MA->getAccessRelation().intersect_domain(Stmt.getDomain());
2545 isl::set Accs = AccRel.range();
2546
2547 if (AllAccs.has_equal_space(Accs)) {
2548 isl::set OverlapAccs = Accs.intersect(AllAccs);
2549 Valid = Valid && OverlapAccs.is_empty();
2550 }
2551 }
2552
2553 if (!Valid)
2554 continue;
2555
2556 const LoadInst *Load =
2557 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction());
2558 MemoryAccess::ReductionType RT =
2559 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load);
2560
2561 // If no overlapping access was found we mark the load and store as
2562 // reduction like.
2563 CandidatePair.first->markAsReductionLike(RT);
2564 CandidatePair.second->markAsReductionLike(RT);
2565 }
2566 }
2567
verifyInvariantLoads()2568 void ScopBuilder::verifyInvariantLoads() {
2569 auto &RIL = scop->getRequiredInvariantLoads();
2570 for (LoadInst *LI : RIL) {
2571 assert(LI && scop->contains(LI));
2572 // If there exists a statement in the scop which has a memory access for
2573 // @p LI, then mark this scop as infeasible for optimization.
2574 for (ScopStmt &Stmt : *scop)
2575 if (Stmt.getArrayAccessOrNULLFor(LI)) {
2576 scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent());
2577 return;
2578 }
2579 }
2580 }
2581
hoistInvariantLoads()2582 void ScopBuilder::hoistInvariantLoads() {
2583 if (!PollyInvariantLoadHoisting)
2584 return;
2585
2586 isl::union_map Writes = scop->getWrites();
2587 for (ScopStmt &Stmt : *scop) {
2588 InvariantAccessesTy InvariantAccesses;
2589
2590 for (MemoryAccess *Access : Stmt) {
2591 isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2592 if (!NHCtx.is_null())
2593 InvariantAccesses.push_back({Access, NHCtx});
2594 }
2595
2596 // Transfer the memory access from the statement to the SCoP.
2597 for (auto InvMA : InvariantAccesses)
2598 Stmt.removeMemoryAccess(InvMA.MA);
2599 addInvariantLoads(Stmt, InvariantAccesses);
2600 }
2601 }
2602
2603 /// Check if an access range is too complex.
2604 ///
2605 /// An access range is too complex, if it contains either many disjuncts or
2606 /// very complex expressions. As a simple heuristic, we assume if a set to
2607 /// be too complex if the sum of existentially quantified dimensions and
2608 /// set dimensions is larger than a threshold. This reliably detects both
2609 /// sets with many disjuncts as well as sets with many divisions as they
2610 /// arise in h264.
2611 ///
2612 /// @param AccessRange The range to check for complexity.
2613 ///
2614 /// @returns True if the access range is too complex.
isAccessRangeTooComplex(isl::set AccessRange)2615 static bool isAccessRangeTooComplex(isl::set AccessRange) {
2616 unsigned NumTotalDims = 0;
2617
2618 for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2619 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div));
2620 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set));
2621 }
2622
2623 if (NumTotalDims > MaxDimensionsInAccessRange)
2624 return true;
2625
2626 return false;
2627 }
2628
hasNonHoistableBasePtrInScop(MemoryAccess * MA,isl::union_map Writes)2629 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2630 isl::union_map Writes) {
2631 if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2632 return getNonHoistableCtx(BasePtrMA, Writes).is_null();
2633 }
2634
2635 Value *BaseAddr = MA->getOriginalBaseAddr();
2636 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr))
2637 if (!isa<LoadInst>(BasePtrInst))
2638 return scop->contains(BasePtrInst);
2639
2640 return false;
2641 }
2642
addUserContext()2643 void ScopBuilder::addUserContext() {
2644 if (UserContextStr.empty())
2645 return;
2646
2647 isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2648 isl::space Space = scop->getParamSpace();
2649 isl::size SpaceParams = Space.dim(isl::dim::param);
2650 if (unsignedFromIslSize(SpaceParams) !=
2651 unsignedFromIslSize(UserContext.dim(isl::dim::param))) {
2652 std::string SpaceStr = stringFromIslObj(Space, "null");
2653 errs() << "Error: the context provided in -polly-context has not the same "
2654 << "number of dimensions than the computed context. Due to this "
2655 << "mismatch, the -polly-context option is ignored. Please provide "
2656 << "the context in the parameter space: " << SpaceStr << ".\n";
2657 return;
2658 }
2659
2660 for (auto i : rangeIslSize(0, SpaceParams)) {
2661 std::string NameContext =
2662 scop->getContext().get_dim_name(isl::dim::param, i);
2663 std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i);
2664
2665 if (NameContext != NameUserContext) {
2666 std::string SpaceStr = stringFromIslObj(Space, "null");
2667 errs() << "Error: the name of dimension " << i
2668 << " provided in -polly-context "
2669 << "is '" << NameUserContext << "', but the name in the computed "
2670 << "context is '" << NameContext
2671 << "'. Due to this name mismatch, "
2672 << "the -polly-context option is ignored. Please provide "
2673 << "the context in the parameter space: " << SpaceStr << ".\n";
2674 return;
2675 }
2676
2677 UserContext = UserContext.set_dim_id(isl::dim::param, i,
2678 Space.get_dim_id(isl::dim::param, i));
2679 }
2680 isl::set newContext = scop->getContext().intersect(UserContext);
2681 scop->setContext(newContext);
2682 }
2683
getNonHoistableCtx(MemoryAccess * Access,isl::union_map Writes)2684 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2685 isl::union_map Writes) {
2686 // TODO: Loads that are not loop carried, hence are in a statement with
2687 // zero iterators, are by construction invariant, though we
2688 // currently "hoist" them anyway. This is necessary because we allow
2689 // them to be treated as parameters (e.g., in conditions) and our code
2690 // generation would otherwise use the old value.
2691
2692 auto &Stmt = *Access->getStatement();
2693 BasicBlock *BB = Stmt.getEntryBlock();
2694
2695 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2696 Access->isMemoryIntrinsic())
2697 return {};
2698
2699 // Skip accesses that have an invariant base pointer which is defined but
2700 // not loaded inside the SCoP. This can happened e.g., if a readnone call
2701 // returns a pointer that is used as a base address. However, as we want
2702 // to hoist indirect pointers, we allow the base pointer to be defined in
2703 // the region if it is also a memory access. Each ScopArrayInfo object
2704 // that has a base pointer origin has a base pointer that is loaded and
2705 // that it is invariant, thus it will be hoisted too. However, if there is
2706 // no base pointer origin we check that the base pointer is defined
2707 // outside the region.
2708 auto *LI = cast<LoadInst>(Access->getAccessInstruction());
2709 if (hasNonHoistableBasePtrInScop(Access, Writes))
2710 return {};
2711
2712 isl::map AccessRelation = Access->getAccessRelation();
2713 assert(!AccessRelation.is_empty());
2714
2715 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators()))
2716 return {};
2717
2718 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain());
2719 isl::set SafeToLoad;
2720
2721 auto &DL = scop->getFunction().getParent()->getDataLayout();
2722 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(),
2723 LI->getAlign(), DL)) {
2724 SafeToLoad = isl::set::universe(AccessRelation.get_space().range());
2725 } else if (BB != LI->getParent()) {
2726 // Skip accesses in non-affine subregions as they might not be executed
2727 // under the same condition as the entry of the non-affine subregion.
2728 return {};
2729 } else {
2730 SafeToLoad = AccessRelation.range();
2731 }
2732
2733 if (isAccessRangeTooComplex(AccessRelation.range()))
2734 return {};
2735
2736 isl::union_map Written = Writes.intersect_range(SafeToLoad);
2737 isl::set WrittenCtx = Written.params();
2738 bool IsWritten = !WrittenCtx.is_empty();
2739
2740 if (!IsWritten)
2741 return WrittenCtx;
2742
2743 WrittenCtx = WrittenCtx.remove_divs();
2744 bool TooComplex =
2745 unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2746 if (TooComplex || !isRequiredInvariantLoad(LI))
2747 return {};
2748
2749 scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(),
2750 AS_RESTRICTION, LI->getParent());
2751 return WrittenCtx;
2752 }
2753
isAParameter(llvm::Value * maybeParam,const Function & F)2754 static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2755 for (const llvm::Argument &Arg : F.args())
2756 if (&Arg == maybeParam)
2757 return true;
2758
2759 return false;
2760 }
2761
canAlwaysBeHoisted(MemoryAccess * MA,bool StmtInvalidCtxIsEmpty,bool MAInvalidCtxIsEmpty,bool NonHoistableCtxIsEmpty)2762 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2763 bool StmtInvalidCtxIsEmpty,
2764 bool MAInvalidCtxIsEmpty,
2765 bool NonHoistableCtxIsEmpty) {
2766 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2767 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2768 if (PollyAllowDereferenceOfAllFunctionParams &&
2769 isAParameter(LInst->getPointerOperand(), scop->getFunction()))
2770 return true;
2771
2772 // TODO: We can provide more information for better but more expensive
2773 // results.
2774 if (!isDereferenceableAndAlignedPointer(
2775 LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL))
2776 return false;
2777
2778 // If the location might be overwritten we do not hoist it unconditionally.
2779 //
2780 // TODO: This is probably too conservative.
2781 if (!NonHoistableCtxIsEmpty)
2782 return false;
2783
2784 // If a dereferenceable load is in a statement that is modeled precisely we
2785 // can hoist it.
2786 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2787 return true;
2788
2789 // Even if the statement is not modeled precisely we can hoist the load if it
2790 // does not involve any parameters that might have been specialized by the
2791 // statement domain.
2792 for (const SCEV *Subscript : MA->subscripts())
2793 if (!isa<SCEVConstant>(Subscript))
2794 return false;
2795 return true;
2796 }
2797
addInvariantLoads(ScopStmt & Stmt,InvariantAccessesTy & InvMAs)2798 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2799 InvariantAccessesTy &InvMAs) {
2800 if (InvMAs.empty())
2801 return;
2802
2803 isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2804 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2805
2806 // Get the context under which the statement is executed but remove the error
2807 // context under which this statement is reached.
2808 isl::set DomainCtx = Stmt.getDomain().params();
2809 DomainCtx = DomainCtx.subtract(StmtInvalidCtx);
2810
2811 if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
2812 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2813 scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent());
2814 return;
2815 }
2816
2817 // Project out all parameters that relate to loads in the statement. Otherwise
2818 // we could have cyclic dependences on the constraints under which the
2819 // hoisted loads are executed and we could not determine an order in which to
2820 // pre-load them. This happens because not only lower bounds are part of the
2821 // domain but also upper bounds.
2822 for (auto &InvMA : InvMAs) {
2823 auto *MA = InvMA.MA;
2824 Instruction *AccInst = MA->getAccessInstruction();
2825 if (SE.isSCEVable(AccInst->getType())) {
2826 SetVector<Value *> Values;
2827 for (const SCEV *Parameter : scop->parameters()) {
2828 Values.clear();
2829 findValues(Parameter, SE, Values);
2830 if (!Values.count(AccInst))
2831 continue;
2832
2833 isl::id ParamId = scop->getIdForParam(Parameter);
2834 if (!ParamId.is_null()) {
2835 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId);
2836 if (Dim >= 0)
2837 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1);
2838 }
2839 }
2840 }
2841 }
2842
2843 for (auto &InvMA : InvMAs) {
2844 auto *MA = InvMA.MA;
2845 isl::set NHCtx = InvMA.NonHoistableCtx;
2846
2847 // Check for another invariant access that accesses the same location as
2848 // MA and if found consolidate them. Otherwise create a new equivalence
2849 // class at the end of InvariantEquivClasses.
2850 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction());
2851 Type *Ty = LInst->getType();
2852 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand());
2853
2854 isl::set MAInvalidCtx = MA->getInvalidContext();
2855 bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2856 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2857
2858 isl::set MACtx;
2859 // Check if we know that this pointer can be speculatively accessed.
2860 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2861 NonHoistableCtxIsEmpty)) {
2862 MACtx = isl::set::universe(DomainCtx.get_space());
2863 } else {
2864 MACtx = DomainCtx;
2865 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx));
2866 MACtx = MACtx.gist_params(scop->getContext());
2867 }
2868
2869 bool Consolidated = false;
2870 for (auto &IAClass : scop->invariantEquivClasses()) {
2871 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
2872 continue;
2873
2874 // If the pointer and the type is equal check if the access function wrt.
2875 // to the domain is equal too. It can happen that the domain fixes
2876 // parameter values and these can be different for distinct part of the
2877 // SCoP. If this happens we cannot consolidate the loads but need to
2878 // create a new invariant load equivalence class.
2879 auto &MAs = IAClass.InvariantAccesses;
2880 if (!MAs.empty()) {
2881 auto *LastMA = MAs.front();
2882
2883 isl::set AR = MA->getAccessRelation().range();
2884 isl::set LastAR = LastMA->getAccessRelation().range();
2885 bool SameAR = AR.is_equal(LastAR);
2886
2887 if (!SameAR)
2888 continue;
2889 }
2890
2891 // Add MA to the list of accesses that are in this class.
2892 MAs.push_front(MA);
2893
2894 Consolidated = true;
2895
2896 // Unify the execution context of the class and this statement.
2897 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2898 if (!IAClassDomainCtx.is_null())
2899 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce();
2900 else
2901 IAClassDomainCtx = MACtx;
2902 IAClass.ExecutionContext = IAClassDomainCtx;
2903 break;
2904 }
2905
2906 if (Consolidated)
2907 continue;
2908
2909 MACtx = MACtx.coalesce();
2910
2911 // If we did not consolidate MA, thus did not find an equivalence class
2912 // for it, we create a new one.
2913 scop->addInvariantEquivClass(
2914 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty});
2915 }
2916 }
2917
collectCandidateReductionLoads(MemoryAccess * StoreMA,SmallVectorImpl<MemoryAccess * > & Loads)2918 void ScopBuilder::collectCandidateReductionLoads(
2919 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
2920 ScopStmt *Stmt = StoreMA->getStatement();
2921
2922 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
2923 if (!Store)
2924 return;
2925
2926 // Skip if there is not one binary operator between the load and the store
2927 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
2928 if (!BinOp)
2929 return;
2930
2931 // Skip if the binary operators has multiple uses
2932 if (BinOp->getNumUses() != 1)
2933 return;
2934
2935 // Skip if the opcode of the binary operator is not commutative/associative
2936 if (!BinOp->isCommutative() || !BinOp->isAssociative())
2937 return;
2938
2939 // Skip if the binary operator is outside the current SCoP
2940 if (BinOp->getParent() != Store->getParent())
2941 return;
2942
2943 // Skip if it is a multiplicative reduction and we disabled them
2944 if (DisableMultiplicativeReductions &&
2945 (BinOp->getOpcode() == Instruction::Mul ||
2946 BinOp->getOpcode() == Instruction::FMul))
2947 return;
2948
2949 // Check the binary operator operands for a candidate load
2950 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
2951 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
2952 if (!PossibleLoad0 && !PossibleLoad1)
2953 return;
2954
2955 // A load is only a candidate if it cannot escape (thus has only this use)
2956 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
2957 if (PossibleLoad0->getParent() == Store->getParent())
2958 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0));
2959 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
2960 if (PossibleLoad1->getParent() == Store->getParent())
2961 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1));
2962 }
2963
2964 /// Find the canonical scop array info object for a set of invariant load
2965 /// hoisted loads. The canonical array is the one that corresponds to the
2966 /// first load in the list of accesses which is used as base pointer of a
2967 /// scop array.
findCanonicalArray(Scop & S,MemoryAccessList & Accesses)2968 static const ScopArrayInfo *findCanonicalArray(Scop &S,
2969 MemoryAccessList &Accesses) {
2970 for (MemoryAccess *Access : Accesses) {
2971 const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
2972 Access->getAccessInstruction(), MemoryKind::Array);
2973 if (CanonicalArray)
2974 return CanonicalArray;
2975 }
2976 return nullptr;
2977 }
2978
2979 /// Check if @p Array severs as base array in an invariant load.
isUsedForIndirectHoistedLoad(Scop & S,const ScopArrayInfo * Array)2980 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
2981 for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
2982 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
2983 if (Access2->getScopArrayInfo() == Array)
2984 return true;
2985 return false;
2986 }
2987
2988 /// Replace the base pointer arrays in all memory accesses referencing @p Old,
2989 /// with a reference to @p New.
replaceBasePtrArrays(Scop & S,const ScopArrayInfo * Old,const ScopArrayInfo * New)2990 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
2991 const ScopArrayInfo *New) {
2992 for (ScopStmt &Stmt : S)
2993 for (MemoryAccess *Access : Stmt) {
2994 if (Access->getLatestScopArrayInfo() != Old)
2995 continue;
2996
2997 isl::id Id = New->getBasePtrId();
2998 isl::map Map = Access->getAccessRelation();
2999 Map = Map.set_tuple_id(isl::dim::out, Id);
3000 Access->setAccessRelation(Map);
3001 }
3002 }
3003
canonicalizeDynamicBasePtrs()3004 void ScopBuilder::canonicalizeDynamicBasePtrs() {
3005 for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3006 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3007
3008 const ScopArrayInfo *CanonicalBasePtrSAI =
3009 findCanonicalArray(*scop, BasePtrAccesses);
3010
3011 if (!CanonicalBasePtrSAI)
3012 continue;
3013
3014 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3015 const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3016 BasePtrAccess->getAccessInstruction(), MemoryKind::Array);
3017 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3018 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI))
3019 continue;
3020
3021 // we currently do not canonicalize arrays where some accesses are
3022 // hoisted as invariant loads. If we would, we need to update the access
3023 // function of the invariant loads as well. However, as this is not a
3024 // very common situation, we leave this for now to avoid further
3025 // complexity increases.
3026 if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI))
3027 continue;
3028
3029 replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI);
3030 }
3031 }
3032 }
3033
buildAccessRelations(ScopStmt & Stmt)3034 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3035 for (MemoryAccess *Access : Stmt.MemAccs) {
3036 Type *ElementType = Access->getElementType();
3037
3038 MemoryKind Ty;
3039 if (Access->isPHIKind())
3040 Ty = MemoryKind::PHI;
3041 else if (Access->isExitPHIKind())
3042 Ty = MemoryKind::ExitPHI;
3043 else if (Access->isValueKind())
3044 Ty = MemoryKind::Value;
3045 else
3046 Ty = MemoryKind::Array;
3047
3048 // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3049 // assumptions which are taken. isl::pw_aff objects are cached internally
3050 // and they are used later by scop.
3051 for (const SCEV *Size : Access->Sizes) {
3052 if (!Size)
3053 continue;
3054 scop->getPwAff(Size, nullptr, false, &RecordedAssumptions);
3055 }
3056 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(),
3057 ElementType, Access->Sizes, Ty);
3058
3059 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3060 // assumptions which are taken. isl::pw_aff objects are cached internally
3061 // and they are used later by scop.
3062 for (const SCEV *Subscript : Access->subscripts()) {
3063 if (!Access->isAffine() || !Subscript)
3064 continue;
3065 scop->getPwAff(Subscript, Stmt.getEntryBlock(), false,
3066 &RecordedAssumptions);
3067 }
3068 Access->buildAccessRelation(SAI);
3069 scop->addAccessData(Access);
3070 }
3071 }
3072
3073 /// Add the minimal/maximal access in @p Set to @p User.
3074 ///
3075 /// @return True if more accesses should be added, false if we reached the
3076 /// maximal number of run-time checks to be generated.
buildMinMaxAccess(isl::set Set,Scop::MinMaxVectorTy & MinMaxAccesses,Scop & S)3077 static bool buildMinMaxAccess(isl::set Set,
3078 Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3079 isl::pw_multi_aff MinPMA, MaxPMA;
3080 isl::pw_aff LastDimAff;
3081 isl::aff OneAff;
3082 unsigned Pos;
3083
3084 Set = Set.remove_divs();
3085 polly::simplify(Set);
3086
3087 if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3088 Set = Set.simple_hull();
3089
3090 // Restrict the number of parameters involved in the access as the lexmin/
3091 // lexmax computation will take too long if this number is high.
3092 //
3093 // Experiments with a simple test case using an i7 4800MQ:
3094 //
3095 // #Parameters involved | Time (in sec)
3096 // 6 | 0.01
3097 // 7 | 0.04
3098 // 8 | 0.12
3099 // 9 | 0.40
3100 // 10 | 1.54
3101 // 11 | 6.78
3102 // 12 | 30.38
3103 //
3104 if (isl_set_n_param(Set.get()) >
3105 static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3106 unsigned InvolvedParams = 0;
3107 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++)
3108 if (Set.involves_dims(isl::dim::param, u, 1))
3109 InvolvedParams++;
3110
3111 if (InvolvedParams > RunTimeChecksMaxParameters)
3112 return false;
3113 }
3114
3115 MinPMA = Set.lexmin_pw_multi_aff();
3116 MaxPMA = Set.lexmax_pw_multi_aff();
3117
3118 MinPMA = MinPMA.coalesce();
3119 MaxPMA = MaxPMA.coalesce();
3120
3121 if (MaxPMA.is_null())
3122 return false;
3123
3124 unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out));
3125
3126 // Adjust the last dimension of the maximal access by one as we want to
3127 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3128 // we test during code generation might now point after the end of the
3129 // allocated array but we will never dereference it anyway.
3130 assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3131
3132 Pos = MaxOutputSize - 1;
3133 LastDimAff = MaxPMA.at(Pos);
3134 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3135 OneAff = OneAff.add_constant_si(1);
3136 LastDimAff = LastDimAff.add(OneAff);
3137 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff);
3138
3139 if (MinPMA.is_null() || MaxPMA.is_null())
3140 return false;
3141
3142 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA));
3143
3144 return true;
3145 }
3146
3147 /// Wrapper function to calculate minimal/maximal accesses to each array.
calculateMinMaxAccess(AliasGroupTy AliasGroup,Scop::MinMaxVectorTy & MinMaxAccesses)3148 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3149 Scop::MinMaxVectorTy &MinMaxAccesses) {
3150 MinMaxAccesses.reserve(AliasGroup.size());
3151
3152 isl::union_set Domains = scop->getDomains();
3153 isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx());
3154
3155 for (MemoryAccess *MA : AliasGroup)
3156 Accesses = Accesses.unite(MA->getAccessRelation());
3157
3158 Accesses = Accesses.intersect_domain(Domains);
3159 isl::union_set Locations = Accesses.range();
3160
3161 bool LimitReached = false;
3162 for (isl::set Set : Locations.get_set_list()) {
3163 LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop);
3164 if (LimitReached)
3165 break;
3166 }
3167
3168 return !LimitReached;
3169 }
3170
getAccessDomain(MemoryAccess * MA)3171 static isl::set getAccessDomain(MemoryAccess *MA) {
3172 isl::set Domain = MA->getStatement()->getDomain();
3173 Domain = Domain.project_out(isl::dim::set, 0,
3174 unsignedFromIslSize(Domain.tuple_dim()));
3175 return Domain.reset_tuple_id();
3176 }
3177
buildAliasChecks()3178 bool ScopBuilder::buildAliasChecks() {
3179 if (!PollyUseRuntimeAliasChecks)
3180 return true;
3181
3182 if (buildAliasGroups()) {
3183 // Aliasing assumptions do not go through addAssumption but we still want to
3184 // collect statistics so we do it here explicitly.
3185 if (scop->getAliasGroups().size())
3186 Scop::incrementNumberOfAliasingAssumptions(1);
3187 return true;
3188 }
3189
3190 // If a problem occurs while building the alias groups we need to delete
3191 // this SCoP and pretend it wasn't valid in the first place. To this end
3192 // we make the assumed context infeasible.
3193 scop->invalidate(ALIASING, DebugLoc());
3194
3195 LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3196 << " could not be created. This SCoP has been dismissed.");
3197 return false;
3198 }
3199
3200 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
buildAliasGroupsForAccesses()3201 ScopBuilder::buildAliasGroupsForAccesses() {
3202 AliasSetTracker AST(AA);
3203
3204 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3205 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3206 for (ScopStmt &Stmt : *scop) {
3207
3208 isl::set StmtDomain = Stmt.getDomain();
3209 bool StmtDomainEmpty = StmtDomain.is_empty();
3210
3211 // Statements with an empty domain will never be executed.
3212 if (StmtDomainEmpty)
3213 continue;
3214
3215 for (MemoryAccess *MA : Stmt) {
3216 if (MA->isScalarKind())
3217 continue;
3218 if (!MA->isRead())
3219 HasWriteAccess.insert(MA->getScopArrayInfo());
3220 MemAccInst Acc(MA->getAccessInstruction());
3221 if (MA->isRead() && isa<MemTransferInst>(Acc))
3222 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA;
3223 else
3224 PtrToAcc[Acc.getPointerOperand()] = MA;
3225 AST.add(Acc);
3226 }
3227 }
3228
3229 AliasGroupVectorTy AliasGroups;
3230 for (AliasSet &AS : AST) {
3231 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3232 continue;
3233 AliasGroupTy AG;
3234 for (auto &PR : AS)
3235 AG.push_back(PtrToAcc[PR.getValue()]);
3236 if (AG.size() < 2)
3237 continue;
3238 AliasGroups.push_back(std::move(AG));
3239 }
3240
3241 return std::make_tuple(AliasGroups, HasWriteAccess);
3242 }
3243
buildAliasGroups()3244 bool ScopBuilder::buildAliasGroups() {
3245 // To create sound alias checks we perform the following steps:
3246 // o) We partition each group into read only and non read only accesses.
3247 // o) For each group with more than one base pointer we then compute minimal
3248 // and maximal accesses to each array of a group in read only and non
3249 // read only partitions separately.
3250 AliasGroupVectorTy AliasGroups;
3251 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3252
3253 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses();
3254
3255 splitAliasGroupsByDomain(AliasGroups);
3256
3257 for (AliasGroupTy &AG : AliasGroups) {
3258 if (!scop->hasFeasibleRuntimeContext())
3259 return false;
3260
3261 {
3262 IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3263 bool Valid = buildAliasGroup(AG, HasWriteAccess);
3264 if (!Valid)
3265 return false;
3266 }
3267 if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) {
3268 scop->invalidate(COMPLEXITY, DebugLoc());
3269 return false;
3270 }
3271 }
3272
3273 return true;
3274 }
3275
buildAliasGroup(AliasGroupTy & AliasGroup,DenseSet<const ScopArrayInfo * > HasWriteAccess)3276 bool ScopBuilder::buildAliasGroup(
3277 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3278 AliasGroupTy ReadOnlyAccesses;
3279 AliasGroupTy ReadWriteAccesses;
3280 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3281 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3282
3283 if (AliasGroup.size() < 2)
3284 return true;
3285
3286 for (MemoryAccess *Access : AliasGroup) {
3287 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3288 Access->getAccessInstruction())
3289 << "Possibly aliasing pointer, use restrict keyword.");
3290 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3291 if (HasWriteAccess.count(Array)) {
3292 ReadWriteArrays.insert(Array);
3293 ReadWriteAccesses.push_back(Access);
3294 } else {
3295 ReadOnlyArrays.insert(Array);
3296 ReadOnlyAccesses.push_back(Access);
3297 }
3298 }
3299
3300 // If there are no read-only pointers, and less than two read-write pointers,
3301 // no alias check is needed.
3302 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3303 return true;
3304
3305 // If there is no read-write pointer, no alias check is needed.
3306 if (ReadWriteArrays.empty())
3307 return true;
3308
3309 // For non-affine accesses, no alias check can be generated as we cannot
3310 // compute a sufficiently tight lower and upper bound: bail out.
3311 for (MemoryAccess *MA : AliasGroup) {
3312 if (!MA->isAffine()) {
3313 scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(),
3314 MA->getAccessInstruction()->getParent());
3315 return false;
3316 }
3317 }
3318
3319 // Ensure that for all memory accesses for which we generate alias checks,
3320 // their base pointers are available.
3321 for (MemoryAccess *MA : AliasGroup) {
3322 if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3323 scop->addRequiredInvariantLoad(
3324 cast<LoadInst>(BasePtrMA->getAccessInstruction()));
3325 }
3326
3327 // scop->getAliasGroups().emplace_back();
3328 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3329 Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3330 Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3331
3332 bool Valid;
3333
3334 Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite);
3335
3336 if (!Valid)
3337 return false;
3338
3339 // Bail out if the number of values we need to compare is too large.
3340 // This is important as the number of comparisons grows quadratically with
3341 // the number of values we need to compare.
3342 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3343 RunTimeChecksMaxArraysPerGroup)
3344 return false;
3345
3346 Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly);
3347
3348 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3349 if (!Valid)
3350 return false;
3351
3352 return true;
3353 }
3354
splitAliasGroupsByDomain(AliasGroupVectorTy & AliasGroups)3355 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3356 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3357 AliasGroupTy NewAG;
3358 AliasGroupTy &AG = AliasGroups[u];
3359 AliasGroupTy::iterator AGI = AG.begin();
3360 isl::set AGDomain = getAccessDomain(*AGI);
3361 while (AGI != AG.end()) {
3362 MemoryAccess *MA = *AGI;
3363 isl::set MADomain = getAccessDomain(MA);
3364 if (AGDomain.is_disjoint(MADomain)) {
3365 NewAG.push_back(MA);
3366 AGI = AG.erase(AGI);
3367 } else {
3368 AGDomain = AGDomain.unite(MADomain);
3369 AGI++;
3370 }
3371 }
3372 if (NewAG.size() > 1)
3373 AliasGroups.push_back(std::move(NewAG));
3374 }
3375 }
3376
3377 #ifndef NDEBUG
verifyUse(Scop * S,Use & Op,LoopInfo & LI)3378 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3379 auto PhysUse = VirtualUse::create(S, Op, &LI, false);
3380 auto VirtUse = VirtualUse::create(S, Op, &LI, true);
3381 assert(PhysUse.getKind() == VirtUse.getKind());
3382 }
3383
3384 /// Check the consistency of every statement's MemoryAccesses.
3385 ///
3386 /// The check is carried out by expecting the "physical" kind of use (derived
3387 /// from the BasicBlocks instructions resides in) to be same as the "virtual"
3388 /// kind of use (derived from a statement's MemoryAccess).
3389 ///
3390 /// The "physical" uses are taken by ensureValueRead to determine whether to
3391 /// create MemoryAccesses. When done, the kind of scalar access should be the
3392 /// same no matter which way it was derived.
3393 ///
3394 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3395 /// can intentionally influence on the kind of uses (not corresponding to the
3396 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3397 /// to pick up the virtual uses. But here in the code generator, this has not
3398 /// happened yet, such that virtual and physical uses are equivalent.
verifyUses(Scop * S,LoopInfo & LI,DominatorTree & DT)3399 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3400 for (auto *BB : S->getRegion().blocks()) {
3401 for (auto &Inst : *BB) {
3402 auto *Stmt = S->getStmtFor(&Inst);
3403 if (!Stmt)
3404 continue;
3405
3406 if (isIgnoredIntrinsic(&Inst))
3407 continue;
3408
3409 // Branch conditions are encoded in the statement domains.
3410 if (Inst.isTerminator() && Stmt->isBlockStmt())
3411 continue;
3412
3413 // Verify all uses.
3414 for (auto &Op : Inst.operands())
3415 verifyUse(S, Op, LI);
3416
3417 // Stores do not produce values used by other statements.
3418 if (isa<StoreInst>(Inst))
3419 continue;
3420
3421 // For every value defined in the block, also check that a use of that
3422 // value in the same statement would not be an inter-statement use. It can
3423 // still be synthesizable or load-hoisted, but these kind of instructions
3424 // are not directly copied in code-generation.
3425 auto VirtDef =
3426 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true);
3427 assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3428 VirtDef.getKind() == VirtualUse::Intra ||
3429 VirtDef.getKind() == VirtualUse::Hoisted);
3430 }
3431 }
3432
3433 if (S->hasSingleExitEdge())
3434 return;
3435
3436 // PHINodes in the SCoP region's exit block are also uses to be checked.
3437 if (!S->getRegion().isTopLevelRegion()) {
3438 for (auto &Inst : *S->getRegion().getExit()) {
3439 if (!isa<PHINode>(Inst))
3440 break;
3441
3442 for (auto &Op : Inst.operands())
3443 verifyUse(S, Op, LI);
3444 }
3445 }
3446 }
3447 #endif
3448
buildScop(Region & R,AssumptionCache & AC)3449 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3450 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE,
3451 SD.getNextID()));
3452
3453 buildStmts(R);
3454
3455 // Create all invariant load instructions first. These are categorized as
3456 // 'synthesizable', therefore are not part of any ScopStmt but need to be
3457 // created somewhere.
3458 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3459 for (BasicBlock *BB : scop->getRegion().blocks()) {
3460 if (SD.isErrorBlock(*BB, scop->getRegion()))
3461 continue;
3462
3463 for (Instruction &Inst : *BB) {
3464 LoadInst *Load = dyn_cast<LoadInst>(&Inst);
3465 if (!Load)
3466 continue;
3467
3468 if (!RIL.count(Load))
3469 continue;
3470
3471 // Invariant loads require a MemoryAccess to be created in some statement.
3472 // It is not important to which statement the MemoryAccess is added
3473 // because it will later be removed from the ScopStmt again. We chose the
3474 // first statement of the basic block the LoadInst is in.
3475 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3476 assert(!List.empty());
3477 ScopStmt *RILStmt = List.front();
3478 buildMemoryAccess(Load, RILStmt);
3479 }
3480 }
3481 buildAccessFunctions();
3482
3483 // In case the region does not have an exiting block we will later (during
3484 // code generation) split the exit block. This will move potential PHI nodes
3485 // from the current exit block into the new region exiting block. Hence, PHI
3486 // nodes that are at this point not part of the region will be.
3487 // To handle these PHI nodes later we will now model their operands as scalar
3488 // accesses. Note that we do not model anything in the exit block if we have
3489 // an exiting block in the region, as there will not be any splitting later.
3490 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3491 for (Instruction &Inst : *R.getExit()) {
3492 PHINode *PHI = dyn_cast<PHINode>(&Inst);
3493 if (!PHI)
3494 break;
3495
3496 buildPHIAccesses(nullptr, PHI, nullptr, true);
3497 }
3498 }
3499
3500 // Create memory accesses for global reads since all arrays are now known.
3501 auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0);
3502 for (auto GlobalReadPair : GlobalReads) {
3503 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3504 Instruction *GlobalRead = GlobalReadPair.second;
3505 for (auto *BP : ArrayBasePointers)
3506 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ,
3507 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead);
3508 }
3509
3510 buildInvariantEquivalenceClasses();
3511
3512 /// A map from basic blocks to their invalid domains.
3513 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3514
3515 if (!buildDomains(&R, InvalidDomainMap)) {
3516 LLVM_DEBUG(
3517 dbgs() << "Bailing-out because buildDomains encountered problems\n");
3518 return;
3519 }
3520
3521 addUserAssumptions(AC, InvalidDomainMap);
3522
3523 // Initialize the invalid domain.
3524 for (ScopStmt &Stmt : scop->Stmts)
3525 if (Stmt.isBlockStmt())
3526 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3527 else
3528 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3529 Stmt.getRegion()->getNode())]);
3530
3531 // Remove empty statements.
3532 // Exit early in case there are no executable statements left in this scop.
3533 scop->removeStmtNotInDomainMap();
3534 scop->simplifySCoP(false);
3535 if (scop->isEmpty()) {
3536 LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3537 return;
3538 }
3539
3540 // The ScopStmts now have enough information to initialize themselves.
3541 for (ScopStmt &Stmt : *scop) {
3542 collectSurroundingLoops(Stmt);
3543
3544 buildDomain(Stmt);
3545 buildAccessRelations(Stmt);
3546
3547 if (DetectReductions)
3548 checkForReductions(Stmt);
3549 }
3550
3551 // Check early for a feasible runtime context.
3552 if (!scop->hasFeasibleRuntimeContext()) {
3553 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n");
3554 return;
3555 }
3556
3557 // Check early for profitability. Afterwards it cannot change anymore,
3558 // only the runtime context could become infeasible.
3559 if (!scop->isProfitable(UnprofitableScalarAccs)) {
3560 scop->invalidate(PROFITABLE, DebugLoc());
3561 LLVM_DEBUG(
3562 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3563 return;
3564 }
3565
3566 buildSchedule();
3567
3568 finalizeAccesses();
3569
3570 scop->realignParams();
3571 addUserContext();
3572
3573 // After the context was fully constructed, thus all our knowledge about
3574 // the parameters is in there, we add all recorded assumptions to the
3575 // assumed/invalid context.
3576 addRecordedAssumptions();
3577
3578 scop->simplifyContexts();
3579 if (!buildAliasChecks()) {
3580 LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3581 return;
3582 }
3583
3584 hoistInvariantLoads();
3585 canonicalizeDynamicBasePtrs();
3586 verifyInvariantLoads();
3587 scop->simplifySCoP(true);
3588
3589 // Check late for a feasible runtime context because profitability did not
3590 // change.
3591 if (!scop->hasFeasibleRuntimeContext()) {
3592 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3593 return;
3594 }
3595
3596 #ifndef NDEBUG
3597 verifyUses(scop.get(), LI, DT);
3598 #endif
3599 }
3600
ScopBuilder(Region * R,AssumptionCache & AC,AAResults & AA,const DataLayout & DL,DominatorTree & DT,LoopInfo & LI,ScopDetection & SD,ScalarEvolution & SE,OptimizationRemarkEmitter & ORE)3601 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3602 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3603 ScopDetection &SD, ScalarEvolution &SE,
3604 OptimizationRemarkEmitter &ORE)
3605 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3606 DebugLoc Beg, End;
3607 auto P = getBBPairForRegion(R);
3608 getDebugLocations(P, Beg, End);
3609
3610 std::string Msg = "SCoP begins here.";
3611 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3612 << Msg);
3613
3614 buildScop(*R, AC);
3615
3616 LLVM_DEBUG(dbgs() << *scop);
3617
3618 if (!scop->hasFeasibleRuntimeContext()) {
3619 InfeasibleScops++;
3620 Msg = "SCoP ends here but was dismissed.";
3621 LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3622 RecordedAssumptions.clear();
3623 scop.reset();
3624 } else {
3625 Msg = "SCoP ends here.";
3626 ++ScopFound;
3627 if (scop->getMaxLoopDepth() > 0)
3628 ++RichScopFound;
3629 }
3630
3631 if (R->isTopLevelRegion())
3632 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3633 << Msg);
3634 else
3635 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3636 << Msg);
3637 }
3638