1 //===- ScopBuilder.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Create a polyhedral description for a static control flow region. 10 // 11 // The pass creates a polyhedral description of the Scops detected by the SCoP 12 // detection derived from their LLVM-IR code. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopBuilder.h" 17 #include "polly/Options.h" 18 #include "polly/ScopDetection.h" 19 #include "polly/ScopInfo.h" 20 #include "polly/Support/GICHelper.h" 21 #include "polly/Support/ISLTools.h" 22 #include "polly/Support/SCEVValidator.h" 23 #include "polly/Support/ScopHelper.h" 24 #include "polly/Support/VirtualInstruction.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/EquivalenceClasses.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/Sequence.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/Analysis/AliasAnalysis.h" 32 #include "llvm/Analysis/AssumptionCache.h" 33 #include "llvm/Analysis/Delinearization.h" 34 #include "llvm/Analysis/Loads.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 37 #include "llvm/Analysis/RegionInfo.h" 38 #include "llvm/Analysis/RegionIterator.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/DebugLoc.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Dominators.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/IR/Instruction.h" 49 #include "llvm/IR/Instructions.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/Use.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/CommandLine.h" 54 #include "llvm/Support/Compiler.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/ErrorHandling.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include <cassert> 59 60 using namespace llvm; 61 using namespace polly; 62 63 #define DEBUG_TYPE "polly-scops" 64 65 STATISTIC(ScopFound, "Number of valid Scops"); 66 STATISTIC(RichScopFound, "Number of Scops containing a loop"); 67 STATISTIC(InfeasibleScops, 68 "Number of SCoPs with statically infeasible context."); 69 70 bool polly::ModelReadOnlyScalars; 71 72 // The maximal number of dimensions we allow during invariant load construction. 73 // More complex access ranges will result in very high compile time and are also 74 // unlikely to result in good code. This value is very high and should only 75 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). 76 static unsigned const MaxDimensionsInAccessRange = 9; 77 78 static cl::opt<bool, true> XModelReadOnlyScalars( 79 "polly-analyze-read-only-scalars", 80 cl::desc("Model read-only scalar values in the scop description"), 81 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore, 82 cl::init(true), cl::cat(PollyCategory)); 83 84 static cl::opt<int> 85 OptComputeOut("polly-analysis-computeout", 86 cl::desc("Bound the scop analysis by a maximal amount of " 87 "computational steps (0 means no bound)"), 88 cl::Hidden, cl::init(800000), cl::ZeroOrMore, 89 cl::cat(PollyCategory)); 90 91 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams( 92 "polly-allow-dereference-of-all-function-parameters", 93 cl::desc( 94 "Treat all parameters to functions that are pointers as dereferencible." 95 " This is useful for invariant load hoisting, since we can generate" 96 " less runtime checks. This is only valid if all pointers to functions" 97 " are always initialized, so that Polly can choose to hoist" 98 " their loads. "), 99 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 100 101 static cl::opt<bool> 102 PollyIgnoreInbounds("polly-ignore-inbounds", 103 cl::desc("Do not take inbounds assumptions at all"), 104 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 105 106 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup( 107 "polly-rtc-max-arrays-per-group", 108 cl::desc("The maximal number of arrays to compare in each alias group."), 109 cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); 110 111 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts( 112 "polly-rtc-max-array-disjuncts", 113 cl::desc("The maximal number of disjunts allowed in memory accesses to " 114 "to build RTCs."), 115 cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); 116 117 static cl::opt<unsigned> RunTimeChecksMaxParameters( 118 "polly-rtc-max-parameters", 119 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, 120 cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); 121 122 static cl::opt<bool> UnprofitableScalarAccs( 123 "polly-unprofitable-scalar-accs", 124 cl::desc("Count statements with scalar accesses as not optimizable"), 125 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 126 127 static cl::opt<std::string> UserContextStr( 128 "polly-context", cl::value_desc("isl parameter set"), 129 cl::desc("Provide additional constraints on the context parameters"), 130 cl::init(""), cl::cat(PollyCategory)); 131 132 static cl::opt<bool> DetectReductions("polly-detect-reductions", 133 cl::desc("Detect and exploit reductions"), 134 cl::Hidden, cl::ZeroOrMore, 135 cl::init(true), cl::cat(PollyCategory)); 136 137 // Multiplicative reductions can be disabled separately as these kind of 138 // operations can overflow easily. Additive reductions and bit operations 139 // are in contrast pretty stable. 140 static cl::opt<bool> DisableMultiplicativeReductions( 141 "polly-disable-multiplicative-reductions", 142 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, 143 cl::init(false), cl::cat(PollyCategory)); 144 145 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores }; 146 147 static cl::opt<GranularityChoice> StmtGranularity( 148 "polly-stmt-granularity", 149 cl::desc( 150 "Algorithm to use for splitting basic blocks into multiple statements"), 151 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", 152 "One statement per basic block"), 153 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", 154 "Scalar independence heuristic"), 155 clEnumValN(GranularityChoice::Stores, "store", 156 "Store-level granularity")), 157 cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory)); 158 159 /// Helper to treat non-affine regions and basic blocks the same. 160 /// 161 ///{ 162 163 /// Return the block that is the representing block for @p RN. 164 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { 165 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry() 166 : RN->getNodeAs<BasicBlock>(); 167 } 168 169 /// Return the @p idx'th block that is executed after @p RN. 170 static inline BasicBlock * 171 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) { 172 if (RN->isSubRegion()) { 173 assert(idx == 0); 174 return RN->getNodeAs<Region>()->getExit(); 175 } 176 return TI->getSuccessor(idx); 177 } 178 179 static bool containsErrorBlock(RegionNode *RN, const Region &R, 180 ScopDetection *SD) { 181 if (!RN->isSubRegion()) 182 return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R); 183 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks()) 184 if (SD->isErrorBlock(*BB, R)) 185 return true; 186 return false; 187 } 188 189 ///} 190 191 /// Create a map to map from a given iteration to a subsequent iteration. 192 /// 193 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim 194 /// is incremented by one and all other dimensions are equal, e.g., 195 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3] 196 /// 197 /// if @p Dim is 2 and @p SetSpace has 4 dimensions. 198 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) { 199 isl::space MapSpace = SetSpace.map_from_set(); 200 isl::map NextIterationMap = isl::map::universe(MapSpace); 201 for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim())) 202 if (u != Dim) 203 NextIterationMap = 204 NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u); 205 isl::constraint C = 206 isl::constraint::alloc_equality(isl::local_space(MapSpace)); 207 C = C.set_constant_si(1); 208 C = C.set_coefficient_si(isl::dim::in, Dim, 1); 209 C = C.set_coefficient_si(isl::dim::out, Dim, -1); 210 NextIterationMap = NextIterationMap.add_constraint(C); 211 return NextIterationMap; 212 } 213 214 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded. 215 static isl::set collectBoundedParts(isl::set S) { 216 isl::set BoundedParts = isl::set::empty(S.get_space()); 217 for (isl::basic_set BSet : S.get_basic_set_list()) 218 if (BSet.is_bounded()) 219 BoundedParts = BoundedParts.unite(isl::set(BSet)); 220 return BoundedParts; 221 } 222 223 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim. 224 /// 225 /// @returns A separation of @p S into first an unbounded then a bounded subset, 226 /// both with regards to the dimension @p Dim. 227 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S, 228 unsigned Dim) { 229 for (unsigned u : rangeIslSize(0, S.tuple_dim())) 230 S = S.lower_bound_si(isl::dim::set, u, 0); 231 232 unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim()); 233 isl::set OnlyDimS = S; 234 235 // Remove dimensions that are greater than Dim as they are not interesting. 236 assert(NumDimsS >= Dim + 1); 237 OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1); 238 239 // Create artificial parametric upper bounds for dimensions smaller than Dim 240 // as we are not interested in them. 241 OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim); 242 243 for (unsigned u = 0; u < Dim; u++) { 244 isl::constraint C = isl::constraint::alloc_inequality( 245 isl::local_space(OnlyDimS.get_space())); 246 C = C.set_coefficient_si(isl::dim::param, u, 1); 247 C = C.set_coefficient_si(isl::dim::set, u, -1); 248 OnlyDimS = OnlyDimS.add_constraint(C); 249 } 250 251 // Collect all bounded parts of OnlyDimS. 252 isl::set BoundedParts = collectBoundedParts(OnlyDimS); 253 254 // Create the dimensions greater than Dim again. 255 BoundedParts = 256 BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1); 257 258 // Remove the artificial upper bound parameters again. 259 BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim); 260 261 isl::set UnboundedParts = S.subtract(BoundedParts); 262 return std::make_pair(UnboundedParts, BoundedParts); 263 } 264 265 /// Create the conditions under which @p L @p Pred @p R is true. 266 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L, 267 isl::pw_aff R) { 268 switch (Pred) { 269 case ICmpInst::ICMP_EQ: 270 return L.eq_set(R); 271 case ICmpInst::ICMP_NE: 272 return L.ne_set(R); 273 case ICmpInst::ICMP_SLT: 274 return L.lt_set(R); 275 case ICmpInst::ICMP_SLE: 276 return L.le_set(R); 277 case ICmpInst::ICMP_SGT: 278 return L.gt_set(R); 279 case ICmpInst::ICMP_SGE: 280 return L.ge_set(R); 281 case ICmpInst::ICMP_ULT: 282 return L.lt_set(R); 283 case ICmpInst::ICMP_UGT: 284 return L.gt_set(R); 285 case ICmpInst::ICMP_ULE: 286 return L.le_set(R); 287 case ICmpInst::ICMP_UGE: 288 return L.ge_set(R); 289 default: 290 llvm_unreachable("Non integer predicate not supported"); 291 } 292 } 293 294 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL, 295 Loop *NewL) { 296 // If the loops are the same there is nothing to do. 297 if (NewL == OldL) 298 return Dom; 299 300 int OldDepth = scop->getRelativeLoopDepth(OldL); 301 int NewDepth = scop->getRelativeLoopDepth(NewL); 302 // If both loops are non-affine loops there is nothing to do. 303 if (OldDepth == -1 && NewDepth == -1) 304 return Dom; 305 306 // Distinguish three cases: 307 // 1) The depth is the same but the loops are not. 308 // => One loop was left one was entered. 309 // 2) The depth increased from OldL to NewL. 310 // => One loop was entered, none was left. 311 // 3) The depth decreased from OldL to NewL. 312 // => Loops were left were difference of the depths defines how many. 313 if (OldDepth == NewDepth) { 314 assert(OldL->getParentLoop() == NewL->getParentLoop()); 315 Dom = Dom.project_out(isl::dim::set, NewDepth, 1); 316 Dom = Dom.add_dims(isl::dim::set, 1); 317 } else if (OldDepth < NewDepth) { 318 assert(OldDepth + 1 == NewDepth); 319 auto &R = scop->getRegion(); 320 (void)R; 321 assert(NewL->getParentLoop() == OldL || 322 ((!OldL || !R.contains(OldL)) && R.contains(NewL))); 323 Dom = Dom.add_dims(isl::dim::set, 1); 324 } else { 325 assert(OldDepth > NewDepth); 326 unsigned Diff = OldDepth - NewDepth; 327 unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim()); 328 assert(NumDim >= Diff); 329 Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff); 330 } 331 332 return Dom; 333 } 334 335 /// Compute the isl representation for the SCEV @p E in this BB. 336 /// 337 /// @param BB The BB for which isl representation is to be 338 /// computed. 339 /// @param InvalidDomainMap A map of BB to their invalid domains. 340 /// @param E The SCEV that should be translated. 341 /// @param NonNegative Flag to indicate the @p E has to be non-negative. 342 /// 343 /// Note that this function will also adjust the invalid context accordingly. 344 345 __isl_give isl_pw_aff * 346 ScopBuilder::getPwAff(BasicBlock *BB, 347 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 348 const SCEV *E, bool NonNegative) { 349 PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions); 350 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second); 351 return PWAC.first.release(); 352 } 353 354 /// Build condition sets for unsigned ICmpInst(s). 355 /// Special handling is required for unsigned operands to ensure that if 356 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst 357 /// it should wrap around. 358 /// 359 /// @param IsStrictUpperBound holds information on the predicate relation 360 /// between TestVal and UpperBound, i.e, 361 /// TestVal < UpperBound OR TestVal <= UpperBound 362 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets( 363 BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, 364 const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, 365 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 366 bool IsStrictUpperBound) { 367 // Do not take NonNeg assumption on TestVal 368 // as it might have MSB (Sign bit) set. 369 isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false); 370 // Take NonNeg assumption on UpperBound. 371 isl_pw_aff *UpperBound = 372 getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true); 373 374 // 0 <= TestVal 375 isl_set *First = 376 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space( 377 isl_pw_aff_get_domain_space(TestVal))), 378 isl_pw_aff_copy(TestVal)); 379 380 isl_set *Second; 381 if (IsStrictUpperBound) 382 // TestVal < UpperBound 383 Second = isl_pw_aff_lt_set(TestVal, UpperBound); 384 else 385 // TestVal <= UpperBound 386 Second = isl_pw_aff_le_set(TestVal, UpperBound); 387 388 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second); 389 return ConsequenceCondSet; 390 } 391 392 bool ScopBuilder::buildConditionSets( 393 BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain, 394 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 395 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 396 Value *Condition = getConditionFromTerminator(SI); 397 assert(Condition && "No condition for switch"); 398 399 isl_pw_aff *LHS, *RHS; 400 LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L)); 401 402 unsigned NumSuccessors = SI->getNumSuccessors(); 403 ConditionSets.resize(NumSuccessors); 404 for (auto &Case : SI->cases()) { 405 unsigned Idx = Case.getSuccessorIndex(); 406 ConstantInt *CaseValue = Case.getCaseValue(); 407 408 RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue)); 409 isl_set *CaseConditionSet = 410 buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS), 411 isl::manage(RHS)) 412 .release(); 413 ConditionSets[Idx] = isl_set_coalesce( 414 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain))); 415 } 416 417 assert(ConditionSets[0] == nullptr && "Default condition set was set"); 418 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]); 419 for (unsigned u = 2; u < NumSuccessors; u++) 420 ConditionSetUnion = 421 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u])); 422 ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion); 423 424 isl_pw_aff_free(LHS); 425 426 return true; 427 } 428 429 bool ScopBuilder::buildConditionSets( 430 BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L, 431 __isl_keep isl_set *Domain, 432 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 433 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 434 isl_set *ConsequenceCondSet = nullptr; 435 436 if (auto Load = dyn_cast<LoadInst>(Condition)) { 437 const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L); 438 const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType()); 439 bool NonNeg = false; 440 isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg); 441 isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg); 442 ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS), 443 isl::manage(RHS)) 444 .release(); 445 } else if (auto *PHI = dyn_cast<PHINode>(Condition)) { 446 auto *Unique = dyn_cast<ConstantInt>( 447 getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD)); 448 assert(Unique && 449 "A PHINode condition should only be accepted by ScopDetection if " 450 "getUniqueNonErrorValue returns non-NULL"); 451 452 if (Unique->isZero()) 453 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); 454 else 455 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); 456 } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) { 457 if (CCond->isZero()) 458 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); 459 else 460 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); 461 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 462 auto Opcode = BinOp->getOpcode(); 463 assert(Opcode == Instruction::And || Opcode == Instruction::Or); 464 465 bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain, 466 InvalidDomainMap, ConditionSets) && 467 buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain, 468 InvalidDomainMap, ConditionSets); 469 if (!Valid) { 470 while (!ConditionSets.empty()) 471 isl_set_free(ConditionSets.pop_back_val()); 472 return false; 473 } 474 475 isl_set_free(ConditionSets.pop_back_val()); 476 isl_set *ConsCondPart0 = ConditionSets.pop_back_val(); 477 isl_set_free(ConditionSets.pop_back_val()); 478 isl_set *ConsCondPart1 = ConditionSets.pop_back_val(); 479 480 if (Opcode == Instruction::And) 481 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1); 482 else 483 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1); 484 } else { 485 auto *ICond = dyn_cast<ICmpInst>(Condition); 486 assert(ICond && 487 "Condition of exiting branch was neither constant nor ICmp!"); 488 489 Region &R = scop->getRegion(); 490 491 isl_pw_aff *LHS, *RHS; 492 // For unsigned comparisons we assumed the signed bit of neither operand 493 // to be set. The comparison is equal to a signed comparison under this 494 // assumption. 495 bool NonNeg = ICond->isUnsigned(); 496 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L), 497 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L); 498 499 LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD); 500 RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD); 501 502 switch (ICond->getPredicate()) { 503 case ICmpInst::ICMP_ULT: 504 ConsequenceCondSet = 505 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand, 506 RightOperand, InvalidDomainMap, true); 507 break; 508 case ICmpInst::ICMP_ULE: 509 ConsequenceCondSet = 510 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand, 511 RightOperand, InvalidDomainMap, false); 512 break; 513 case ICmpInst::ICMP_UGT: 514 ConsequenceCondSet = 515 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand, 516 LeftOperand, InvalidDomainMap, true); 517 break; 518 case ICmpInst::ICMP_UGE: 519 ConsequenceCondSet = 520 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand, 521 LeftOperand, InvalidDomainMap, false); 522 break; 523 default: 524 LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg); 525 RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg); 526 ConsequenceCondSet = buildConditionSet(ICond->getPredicate(), 527 isl::manage(LHS), isl::manage(RHS)) 528 .release(); 529 break; 530 } 531 } 532 533 // If no terminator was given we are only looking for parameter constraints 534 // under which @p Condition is true/false. 535 if (!TI) 536 ConsequenceCondSet = isl_set_params(ConsequenceCondSet); 537 assert(ConsequenceCondSet); 538 ConsequenceCondSet = isl_set_coalesce( 539 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain))); 540 541 isl_set *AlternativeCondSet = nullptr; 542 bool TooComplex = 543 isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain; 544 545 if (!TooComplex) { 546 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain), 547 isl_set_copy(ConsequenceCondSet)); 548 TooComplex = 549 isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain; 550 } 551 552 if (TooComplex) { 553 scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(), 554 TI ? TI->getParent() : nullptr /* BasicBlock */); 555 isl_set_free(AlternativeCondSet); 556 isl_set_free(ConsequenceCondSet); 557 return false; 558 } 559 560 ConditionSets.push_back(ConsequenceCondSet); 561 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet)); 562 563 return true; 564 } 565 566 bool ScopBuilder::buildConditionSets( 567 BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain, 568 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 569 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 570 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) 571 return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap, 572 ConditionSets); 573 574 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch."); 575 576 if (TI->getNumSuccessors() == 1) { 577 ConditionSets.push_back(isl_set_copy(Domain)); 578 return true; 579 } 580 581 Value *Condition = getConditionFromTerminator(TI); 582 assert(Condition && "No condition for Terminator"); 583 584 return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap, 585 ConditionSets); 586 } 587 588 bool ScopBuilder::propagateDomainConstraints( 589 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 590 // Iterate over the region R and propagate the domain constrains from the 591 // predecessors to the current node. In contrast to the 592 // buildDomainsWithBranchConstraints function, this one will pull the domain 593 // information from the predecessors instead of pushing it to the successors. 594 // Additionally, we assume the domains to be already present in the domain 595 // map here. However, we iterate again in reverse post order so we know all 596 // predecessors have been visited before a block or non-affine subregion is 597 // visited. 598 599 ReversePostOrderTraversal<Region *> RTraversal(R); 600 for (auto *RN : RTraversal) { 601 // Recurse for affine subregions but go on for basic blocks and non-affine 602 // subregions. 603 if (RN->isSubRegion()) { 604 Region *SubRegion = RN->getNodeAs<Region>(); 605 if (!scop->isNonAffineSubRegion(SubRegion)) { 606 if (!propagateDomainConstraints(SubRegion, InvalidDomainMap)) 607 return false; 608 continue; 609 } 610 } 611 612 BasicBlock *BB = getRegionNodeBasicBlock(RN); 613 isl::set &Domain = scop->getOrInitEmptyDomain(BB); 614 assert(!Domain.is_null()); 615 616 // Under the union of all predecessor conditions we can reach this block. 617 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain); 618 Domain = Domain.intersect(PredDom).coalesce(); 619 Domain = Domain.align_params(scop->getParamSpace()); 620 621 Loop *BBLoop = getRegionNodeLoop(RN, LI); 622 if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop)) 623 if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap)) 624 return false; 625 } 626 627 return true; 628 } 629 630 void ScopBuilder::propagateDomainConstraintsToRegionExit( 631 BasicBlock *BB, Loop *BBLoop, 632 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, 633 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 634 // Check if the block @p BB is the entry of a region. If so we propagate it's 635 // domain to the exit block of the region. Otherwise we are done. 636 auto *RI = scop->getRegion().getRegionInfo(); 637 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; 638 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr; 639 if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB)) 640 return; 641 642 // Do not propagate the domain if there is a loop backedge inside the region 643 // that would prevent the exit block from being executed. 644 auto *L = BBLoop; 645 while (L && scop->contains(L)) { 646 SmallVector<BasicBlock *, 4> LatchBBs; 647 BBLoop->getLoopLatches(LatchBBs); 648 for (auto *LatchBB : LatchBBs) 649 if (BB != LatchBB && BBReg->contains(LatchBB)) 650 return; 651 L = L->getParentLoop(); 652 } 653 654 isl::set Domain = scop->getOrInitEmptyDomain(BB); 655 assert(!Domain.is_null() && "Cannot propagate a nullptr"); 656 657 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops()); 658 659 // Since the dimensions of @p BB and @p ExitBB might be different we have to 660 // adjust the domain before we can propagate it. 661 isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop); 662 isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB); 663 664 // If the exit domain is not yet created we set it otherwise we "add" the 665 // current domain. 666 ExitDomain = 667 !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain; 668 669 // Initialize the invalid domain. 670 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space()); 671 672 FinishedExitBlocks.insert(ExitBB); 673 } 674 675 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB, 676 isl::set Domain) { 677 // If @p BB is the ScopEntry we are done 678 if (scop->getRegion().getEntry() == BB) 679 return isl::set::universe(Domain.get_space()); 680 681 // The region info of this function. 682 auto &RI = *scop->getRegion().getRegionInfo(); 683 684 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops()); 685 686 // A domain to collect all predecessor domains, thus all conditions under 687 // which the block is executed. To this end we start with the empty domain. 688 isl::set PredDom = isl::set::empty(Domain.get_space()); 689 690 // Set of regions of which the entry block domain has been propagated to BB. 691 // all predecessors inside any of the regions can be skipped. 692 SmallSet<Region *, 8> PropagatedRegions; 693 694 for (auto *PredBB : predecessors(BB)) { 695 // Skip backedges. 696 if (DT.dominates(BB, PredBB)) 697 continue; 698 699 // If the predecessor is in a region we used for propagation we can skip it. 700 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; 701 if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(), 702 PredBBInRegion)) { 703 continue; 704 } 705 706 // Check if there is a valid region we can use for propagation, thus look 707 // for a region that contains the predecessor and has @p BB as exit block. 708 // FIXME: This was an side-effect-free (and possibly infinite) loop when 709 // committed and seems not to be needed. 710 auto *PredR = RI.getRegionFor(PredBB); 711 while (PredR->getExit() != BB && !PredR->contains(BB)) 712 PredR = PredR->getParent(); 713 714 // If a valid region for propagation was found use the entry of that region 715 // for propagation, otherwise the PredBB directly. 716 if (PredR->getExit() == BB) { 717 PredBB = PredR->getEntry(); 718 PropagatedRegions.insert(PredR); 719 } 720 721 isl::set PredBBDom = scop->getDomainConditions(PredBB); 722 Loop *PredBBLoop = 723 getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops()); 724 PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop); 725 PredDom = PredDom.unite(PredBBDom); 726 } 727 728 return PredDom; 729 } 730 731 bool ScopBuilder::addLoopBoundsToHeaderDomain( 732 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 733 int LoopDepth = scop->getRelativeLoopDepth(L); 734 assert(LoopDepth >= 0 && "Loop in region should have at least depth one"); 735 736 BasicBlock *HeaderBB = L->getHeader(); 737 assert(scop->isDomainDefined(HeaderBB)); 738 isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB); 739 740 isl::map NextIterationMap = 741 createNextIterationMap(HeaderBBDom.get_space(), LoopDepth); 742 743 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space()); 744 745 SmallVector<BasicBlock *, 4> LatchBlocks; 746 L->getLoopLatches(LatchBlocks); 747 748 for (BasicBlock *LatchBB : LatchBlocks) { 749 // If the latch is only reachable via error statements we skip it. 750 if (!scop->isDomainDefined(LatchBB)) 751 continue; 752 753 isl::set LatchBBDom = scop->getDomainConditions(LatchBB); 754 755 isl::set BackedgeCondition; 756 757 Instruction *TI = LatchBB->getTerminator(); 758 BranchInst *BI = dyn_cast<BranchInst>(TI); 759 assert(BI && "Only branch instructions allowed in loop latches"); 760 761 if (BI->isUnconditional()) 762 BackedgeCondition = LatchBBDom; 763 else { 764 SmallVector<isl_set *, 8> ConditionSets; 765 int idx = BI->getSuccessor(0) != HeaderBB; 766 if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(), 767 InvalidDomainMap, ConditionSets)) 768 return false; 769 770 // Free the non back edge condition set as we do not need it. 771 isl_set_free(ConditionSets[1 - idx]); 772 773 BackedgeCondition = isl::manage(ConditionSets[idx]); 774 } 775 776 int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB)); 777 assert(LatchLoopDepth >= LoopDepth); 778 BackedgeCondition = BackedgeCondition.project_out( 779 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth); 780 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition); 781 } 782 783 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space()); 784 for (int i = 0; i < LoopDepth; i++) 785 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i); 786 787 isl::set UnionBackedgeConditionComplement = 788 UnionBackedgeCondition.complement(); 789 UnionBackedgeConditionComplement = 790 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth, 791 0); 792 UnionBackedgeConditionComplement = 793 UnionBackedgeConditionComplement.apply(ForwardMap); 794 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement); 795 HeaderBBDom = HeaderBBDom.apply(NextIterationMap); 796 797 auto Parts = partitionSetParts(HeaderBBDom, LoopDepth); 798 HeaderBBDom = Parts.second; 799 800 // Check if there is a <nsw> tagged AddRec for this loop and if so do not 801 // require a runtime check. The assumption is already implied by the <nsw> 802 // tag. 803 bool RequiresRTC = !scop->hasNSWAddRecForLoop(L); 804 805 isl::set UnboundedCtx = Parts.first.params(); 806 recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx, 807 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION, 808 nullptr, RequiresRTC); 809 return true; 810 } 811 812 void ScopBuilder::buildInvariantEquivalenceClasses() { 813 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses; 814 815 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); 816 for (LoadInst *LInst : RIL) { 817 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); 818 819 Type *Ty = LInst->getType(); 820 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)]; 821 if (ClassRep) { 822 scop->addInvariantLoadMapping(LInst, ClassRep); 823 continue; 824 } 825 826 ClassRep = LInst; 827 scop->addInvariantEquivClass( 828 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty}); 829 } 830 } 831 832 bool ScopBuilder::buildDomains( 833 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 834 bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R); 835 auto *EntryBB = R->getEntry(); 836 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB); 837 int LD = scop->getRelativeLoopDepth(L); 838 auto *S = 839 isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1)); 840 841 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S))); 842 isl::set Domain = isl::manage(S); 843 scop->setDomain(EntryBB, Domain); 844 845 if (IsOnlyNonAffineRegion) 846 return !containsErrorBlock(R->getNode(), *R, &SD); 847 848 if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap)) 849 return false; 850 851 if (!propagateDomainConstraints(R, InvalidDomainMap)) 852 return false; 853 854 // Error blocks and blocks dominated by them have been assumed to never be 855 // executed. Representing them in the Scop does not add any value. In fact, 856 // it is likely to cause issues during construction of the ScopStmts. The 857 // contents of error blocks have not been verified to be expressible and 858 // will cause problems when building up a ScopStmt for them. 859 // Furthermore, basic blocks dominated by error blocks may reference 860 // instructions in the error block which, if the error block is not modeled, 861 // can themselves not be constructed properly. To this end we will replace 862 // the domains of error blocks and those only reachable via error blocks 863 // with an empty set. Additionally, we will record for each block under which 864 // parameter combination it would be reached via an error block in its 865 // InvalidDomain. This information is needed during load hoisting. 866 if (!propagateInvalidStmtDomains(R, InvalidDomainMap)) 867 return false; 868 869 return true; 870 } 871 872 bool ScopBuilder::buildDomainsWithBranchConstraints( 873 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 874 // To create the domain for each block in R we iterate over all blocks and 875 // subregions in R and propagate the conditions under which the current region 876 // element is executed. To this end we iterate in reverse post order over R as 877 // it ensures that we first visit all predecessors of a region node (either a 878 // basic block or a subregion) before we visit the region node itself. 879 // Initially, only the domain for the SCoP region entry block is set and from 880 // there we propagate the current domain to all successors, however we add the 881 // condition that the successor is actually executed next. 882 // As we are only interested in non-loop carried constraints here we can 883 // simply skip loop back edges. 884 885 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks; 886 ReversePostOrderTraversal<Region *> RTraversal(R); 887 for (auto *RN : RTraversal) { 888 // Recurse for affine subregions but go on for basic blocks and non-affine 889 // subregions. 890 if (RN->isSubRegion()) { 891 Region *SubRegion = RN->getNodeAs<Region>(); 892 if (!scop->isNonAffineSubRegion(SubRegion)) { 893 if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap)) 894 return false; 895 continue; 896 } 897 } 898 899 if (containsErrorBlock(RN, scop->getRegion(), &SD)) 900 scop->notifyErrorBlock(); 901 ; 902 903 BasicBlock *BB = getRegionNodeBasicBlock(RN); 904 Instruction *TI = BB->getTerminator(); 905 906 if (isa<UnreachableInst>(TI)) 907 continue; 908 909 if (!scop->isDomainDefined(BB)) 910 continue; 911 isl::set Domain = scop->getDomainConditions(BB); 912 913 scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim())); 914 915 auto *BBLoop = getRegionNodeLoop(RN, LI); 916 // Propagate the domain from BB directly to blocks that have a superset 917 // domain, at the moment only region exit nodes of regions that start in BB. 918 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, 919 InvalidDomainMap); 920 921 // If all successors of BB have been set a domain through the propagation 922 // above we do not need to build condition sets but can just skip this 923 // block. However, it is important to note that this is a local property 924 // with regards to the region @p R. To this end FinishedExitBlocks is a 925 // local variable. 926 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { 927 return FinishedExitBlocks.count(SuccBB); 928 }; 929 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) 930 continue; 931 932 // Build the condition sets for the successor nodes of the current region 933 // node. If it is a non-affine subregion we will always execute the single 934 // exit node, hence the single entry node domain is the condition set. For 935 // basic blocks we use the helper function buildConditionSets. 936 SmallVector<isl_set *, 8> ConditionSets; 937 if (RN->isSubRegion()) 938 ConditionSets.push_back(Domain.copy()); 939 else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap, 940 ConditionSets)) 941 return false; 942 943 // Now iterate over the successors and set their initial domain based on 944 // their condition set. We skip back edges here and have to be careful when 945 // we leave a loop not to keep constraints over a dimension that doesn't 946 // exist anymore. 947 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size()); 948 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) { 949 isl::set CondSet = isl::manage(ConditionSets[u]); 950 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u); 951 952 // Skip blocks outside the region. 953 if (!scop->contains(SuccBB)) 954 continue; 955 956 // If we propagate the domain of some block to "SuccBB" we do not have to 957 // adjust the domain. 958 if (FinishedExitBlocks.count(SuccBB)) 959 continue; 960 961 // Skip back edges. 962 if (DT.dominates(SuccBB, BB)) 963 continue; 964 965 Loop *SuccBBLoop = 966 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops()); 967 968 CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop); 969 970 // Set the domain for the successor or merge it with an existing domain in 971 // case there are multiple paths (without loop back edges) to the 972 // successor block. 973 isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB); 974 975 if (!SuccDomain.is_null()) { 976 SuccDomain = SuccDomain.unite(CondSet).coalesce(); 977 } else { 978 // Initialize the invalid domain. 979 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space()); 980 SuccDomain = CondSet; 981 } 982 983 SuccDomain = SuccDomain.detect_equalities(); 984 985 // Check if the maximal number of domain disjunctions was reached. 986 // In case this happens we will clean up and bail. 987 if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain) 988 continue; 989 990 scop->invalidate(COMPLEXITY, DebugLoc()); 991 while (++u < ConditionSets.size()) 992 isl_set_free(ConditionSets[u]); 993 return false; 994 } 995 } 996 997 return true; 998 } 999 1000 bool ScopBuilder::propagateInvalidStmtDomains( 1001 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 1002 ReversePostOrderTraversal<Region *> RTraversal(R); 1003 for (auto *RN : RTraversal) { 1004 1005 // Recurse for affine subregions but go on for basic blocks and non-affine 1006 // subregions. 1007 if (RN->isSubRegion()) { 1008 Region *SubRegion = RN->getNodeAs<Region>(); 1009 if (!scop->isNonAffineSubRegion(SubRegion)) { 1010 propagateInvalidStmtDomains(SubRegion, InvalidDomainMap); 1011 continue; 1012 } 1013 } 1014 1015 bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD); 1016 BasicBlock *BB = getRegionNodeBasicBlock(RN); 1017 isl::set &Domain = scop->getOrInitEmptyDomain(BB); 1018 assert(!Domain.is_null() && "Cannot propagate a nullptr"); 1019 1020 isl::set InvalidDomain = InvalidDomainMap[BB]; 1021 1022 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain); 1023 1024 if (!IsInvalidBlock) { 1025 InvalidDomain = InvalidDomain.intersect(Domain); 1026 } else { 1027 InvalidDomain = Domain; 1028 isl::set DomPar = Domain.params(); 1029 recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar, 1030 BB->getTerminator()->getDebugLoc(), AS_RESTRICTION); 1031 Domain = isl::set::empty(Domain.get_space()); 1032 } 1033 1034 if (InvalidDomain.is_empty()) { 1035 InvalidDomainMap[BB] = InvalidDomain; 1036 continue; 1037 } 1038 1039 auto *BBLoop = getRegionNodeLoop(RN, LI); 1040 auto *TI = BB->getTerminator(); 1041 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); 1042 for (unsigned u = 0; u < NumSuccs; u++) { 1043 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); 1044 1045 // Skip successors outside the SCoP. 1046 if (!scop->contains(SuccBB)) 1047 continue; 1048 1049 // Skip backedges. 1050 if (DT.dominates(SuccBB, BB)) 1051 continue; 1052 1053 Loop *SuccBBLoop = 1054 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops()); 1055 1056 auto AdjustedInvalidDomain = 1057 adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop); 1058 1059 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB]; 1060 SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain); 1061 SuccInvalidDomain = SuccInvalidDomain.coalesce(); 1062 1063 InvalidDomainMap[SuccBB] = SuccInvalidDomain; 1064 1065 // Check if the maximal number of domain disjunctions was reached. 1066 // In case this happens we will bail. 1067 if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) < 1068 MaxDisjunctsInDomain) 1069 continue; 1070 1071 InvalidDomainMap.erase(BB); 1072 scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent()); 1073 return false; 1074 } 1075 1076 InvalidDomainMap[BB] = InvalidDomain; 1077 } 1078 1079 return true; 1080 } 1081 1082 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, 1083 Region *NonAffineSubRegion, 1084 bool IsExitBlock) { 1085 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is 1086 // true, are not modeled as ordinary PHI nodes as they are not part of the 1087 // region. However, we model the operands in the predecessor blocks that are 1088 // part of the region as regular scalar accesses. 1089 1090 // If we can synthesize a PHI we can skip it, however only if it is in 1091 // the region. If it is not it can only be in the exit block of the region. 1092 // In this case we model the operands but not the PHI itself. 1093 auto *Scope = LI.getLoopFor(PHI->getParent()); 1094 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope)) 1095 return; 1096 1097 // PHI nodes are modeled as if they had been demoted prior to the SCoP 1098 // detection. Hence, the PHI is a load of a new memory location in which the 1099 // incoming value was written at the end of the incoming basic block. 1100 bool OnlyNonAffineSubRegionOperands = true; 1101 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) { 1102 Value *Op = PHI->getIncomingValue(u); 1103 BasicBlock *OpBB = PHI->getIncomingBlock(u); 1104 ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u)); 1105 1106 // Do not build PHI dependences inside a non-affine subregion, but make 1107 // sure that the necessary scalar values are still made available. 1108 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) { 1109 auto *OpInst = dyn_cast<Instruction>(Op); 1110 if (!OpInst || !NonAffineSubRegion->contains(OpInst)) 1111 ensureValueRead(Op, OpStmt); 1112 continue; 1113 } 1114 1115 OnlyNonAffineSubRegionOperands = false; 1116 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock); 1117 } 1118 1119 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) { 1120 addPHIReadAccess(PHIStmt, PHI); 1121 } 1122 } 1123 1124 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt, 1125 Instruction *Inst) { 1126 assert(!isa<PHINode>(Inst)); 1127 1128 // Pull-in required operands. 1129 for (Use &Op : Inst->operands()) 1130 ensureValueRead(Op.get(), UserStmt); 1131 } 1132 1133 // Create a sequence of two schedules. Either argument may be null and is 1134 // interpreted as the empty schedule. Can also return null if both schedules are 1135 // empty. 1136 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) { 1137 if (Prev.is_null()) 1138 return Succ; 1139 if (Succ.is_null()) 1140 return Prev; 1141 1142 return Prev.sequence(Succ); 1143 } 1144 1145 // Create an isl_multi_union_aff that defines an identity mapping from the 1146 // elements of USet to their N-th dimension. 1147 // 1148 // # Example: 1149 // 1150 // Domain: { A[i,j]; B[i,j,k] } 1151 // N: 1 1152 // 1153 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] } 1154 // 1155 // @param USet A union set describing the elements for which to generate a 1156 // mapping. 1157 // @param N The dimension to map to. 1158 // @returns A mapping from USet to its N-th dimension. 1159 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) { 1160 assert(!USet.is_null()); 1161 assert(!USet.is_empty()); 1162 1163 auto Result = isl::union_pw_multi_aff::empty(USet.get_space()); 1164 1165 for (isl::set S : USet.get_set_list()) { 1166 unsigned Dim = unsignedFromIslSize(S.tuple_dim()); 1167 assert(Dim >= N); 1168 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set, 1169 N, Dim - N); 1170 if (N > 1) 1171 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1); 1172 1173 Result = Result.add_pw_multi_aff(PMA); 1174 } 1175 1176 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result)); 1177 } 1178 1179 void ScopBuilder::buildSchedule() { 1180 Loop *L = getLoopSurroundingScop(*scop, LI); 1181 LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)}); 1182 buildSchedule(scop->getRegion().getNode(), LoopStack); 1183 assert(LoopStack.size() == 1 && LoopStack.back().L == L); 1184 scop->setScheduleTree(LoopStack[0].Schedule); 1185 } 1186 1187 /// To generate a schedule for the elements in a Region we traverse the Region 1188 /// in reverse-post-order and add the contained RegionNodes in traversal order 1189 /// to the schedule of the loop that is currently at the top of the LoopStack. 1190 /// For loop-free codes, this results in a correct sequential ordering. 1191 /// 1192 /// Example: 1193 /// bb1(0) 1194 /// / \. 1195 /// bb2(1) bb3(2) 1196 /// \ / \. 1197 /// bb4(3) bb5(4) 1198 /// \ / 1199 /// bb6(5) 1200 /// 1201 /// Including loops requires additional processing. Whenever a loop header is 1202 /// encountered, the corresponding loop is added to the @p LoopStack. Starting 1203 /// from an empty schedule, we first process all RegionNodes that are within 1204 /// this loop and complete the sequential schedule at this loop-level before 1205 /// processing about any other nodes. To implement this 1206 /// loop-nodes-first-processing, the reverse post-order traversal is 1207 /// insufficient. Hence, we additionally check if the traversal yields 1208 /// sub-regions or blocks that are outside the last loop on the @p LoopStack. 1209 /// These region-nodes are then queue and only traverse after the all nodes 1210 /// within the current loop have been processed. 1211 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) { 1212 Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI); 1213 1214 ReversePostOrderTraversal<Region *> RTraversal(R); 1215 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end()); 1216 std::deque<RegionNode *> DelayList; 1217 bool LastRNWaiting = false; 1218 1219 // Iterate over the region @p R in reverse post-order but queue 1220 // sub-regions/blocks iff they are not part of the last encountered but not 1221 // completely traversed loop. The variable LastRNWaiting is a flag to indicate 1222 // that we queued the last sub-region/block from the reverse post-order 1223 // iterator. If it is set we have to explore the next sub-region/block from 1224 // the iterator (if any) to guarantee progress. If it is not set we first try 1225 // the next queued sub-region/blocks. 1226 while (!WorkList.empty() || !DelayList.empty()) { 1227 RegionNode *RN; 1228 1229 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) { 1230 RN = WorkList.front(); 1231 WorkList.pop_front(); 1232 LastRNWaiting = false; 1233 } else { 1234 RN = DelayList.front(); 1235 DelayList.pop_front(); 1236 } 1237 1238 Loop *L = getRegionNodeLoop(RN, LI); 1239 if (!scop->contains(L)) 1240 L = OuterScopLoop; 1241 1242 Loop *LastLoop = LoopStack.back().L; 1243 if (LastLoop != L) { 1244 if (LastLoop && !LastLoop->contains(L)) { 1245 LastRNWaiting = true; 1246 DelayList.push_back(RN); 1247 continue; 1248 } 1249 LoopStack.push_back({L, {}, 0}); 1250 } 1251 buildSchedule(RN, LoopStack); 1252 } 1253 } 1254 1255 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) { 1256 if (RN->isSubRegion()) { 1257 auto *LocalRegion = RN->getNodeAs<Region>(); 1258 if (!scop->isNonAffineSubRegion(LocalRegion)) { 1259 buildSchedule(LocalRegion, LoopStack); 1260 return; 1261 } 1262 } 1263 1264 assert(LoopStack.rbegin() != LoopStack.rend()); 1265 auto LoopData = LoopStack.rbegin(); 1266 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN); 1267 1268 for (auto *Stmt : scop->getStmtListFor(RN)) { 1269 isl::union_set UDomain{Stmt->getDomain()}; 1270 auto StmtSchedule = isl::schedule::from_domain(UDomain); 1271 LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule); 1272 } 1273 1274 // Check if we just processed the last node in this loop. If we did, finalize 1275 // the loop by: 1276 // 1277 // - adding new schedule dimensions 1278 // - folding the resulting schedule into the parent loop schedule 1279 // - dropping the loop schedule from the LoopStack. 1280 // 1281 // Then continue to check surrounding loops, which might also have been 1282 // completed by this node. 1283 size_t Dimension = LoopStack.size(); 1284 while (LoopData->L && 1285 LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) { 1286 isl::schedule Schedule = LoopData->Schedule; 1287 auto NumBlocksProcessed = LoopData->NumBlocksProcessed; 1288 1289 assert(std::next(LoopData) != LoopStack.rend()); 1290 Loop *L = LoopData->L; 1291 ++LoopData; 1292 --Dimension; 1293 1294 if (!Schedule.is_null()) { 1295 isl::union_set Domain = Schedule.get_domain(); 1296 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension); 1297 Schedule = Schedule.insert_partial_schedule(MUPA); 1298 1299 if (hasDisableAllTransformsHint(L)) { 1300 /// If any of the loops has a disable_nonforced heuristic, mark the 1301 /// entire SCoP as such. The ISL rescheduler can only reschedule the 1302 /// SCoP in its entirety. 1303 /// TODO: ScopDetection could avoid including such loops or warp them as 1304 /// boxed loop. It still needs to pass-through loop with user-defined 1305 /// metadata. 1306 scop->markDisableHeuristics(); 1307 } 1308 1309 // It is easier to insert the marks here that do it retroactively. 1310 isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L); 1311 if (!IslLoopId.is_null()) 1312 Schedule = 1313 Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule(); 1314 1315 LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule); 1316 } 1317 1318 LoopData->NumBlocksProcessed += NumBlocksProcessed; 1319 } 1320 // Now pop all loops processed up there from the LoopStack 1321 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end()); 1322 } 1323 1324 void ScopBuilder::buildEscapingDependences(Instruction *Inst) { 1325 // Check for uses of this instruction outside the scop. Because we do not 1326 // iterate over such instructions and therefore did not "ensure" the existence 1327 // of a write, we must determine such use here. 1328 if (scop->isEscaping(Inst)) 1329 ensureValueWrite(Inst); 1330 } 1331 1332 void ScopBuilder::addRecordedAssumptions() { 1333 for (auto &AS : llvm::reverse(RecordedAssumptions)) { 1334 1335 if (!AS.BB) { 1336 scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, 1337 nullptr /* BasicBlock */, AS.RequiresRTC); 1338 continue; 1339 } 1340 1341 // If the domain was deleted the assumptions are void. 1342 isl_set *Dom = scop->getDomainConditions(AS.BB).release(); 1343 if (!Dom) 1344 continue; 1345 1346 // If a basic block was given use its domain to simplify the assumption. 1347 // In case of restrictions we know they only have to hold on the domain, 1348 // thus we can intersect them with the domain of the block. However, for 1349 // assumptions the domain has to imply them, thus: 1350 // _ _____ 1351 // Dom => S <==> A v B <==> A - B 1352 // 1353 // To avoid the complement we will register A - B as a restriction not an 1354 // assumption. 1355 isl_set *S = AS.Set.copy(); 1356 if (AS.Sign == AS_RESTRICTION) 1357 S = isl_set_params(isl_set_intersect(S, Dom)); 1358 else /* (AS.Sign == AS_ASSUMPTION) */ 1359 S = isl_set_params(isl_set_subtract(Dom, S)); 1360 1361 scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB, 1362 AS.RequiresRTC); 1363 } 1364 } 1365 1366 void ScopBuilder::addUserAssumptions( 1367 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 1368 for (auto &Assumption : AC.assumptions()) { 1369 auto *CI = dyn_cast_or_null<CallInst>(Assumption); 1370 if (!CI || CI->arg_size() != 1) 1371 continue; 1372 1373 bool InScop = scop->contains(CI); 1374 if (!InScop && !scop->isDominatedBy(DT, CI->getParent())) 1375 continue; 1376 1377 auto *L = LI.getLoopFor(CI->getParent()); 1378 auto *Val = CI->getArgOperand(0); 1379 ParameterSetTy DetectedParams; 1380 auto &R = scop->getRegion(); 1381 if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) { 1382 ORE.emit( 1383 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI) 1384 << "Non-affine user assumption ignored."); 1385 continue; 1386 } 1387 1388 // Collect all newly introduced parameters. 1389 ParameterSetTy NewParams; 1390 for (auto *Param : DetectedParams) { 1391 Param = extractConstantFactor(Param, SE).second; 1392 Param = scop->getRepresentingInvariantLoadSCEV(Param); 1393 if (scop->isParam(Param)) 1394 continue; 1395 NewParams.insert(Param); 1396 } 1397 1398 SmallVector<isl_set *, 2> ConditionSets; 1399 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr; 1400 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry(); 1401 auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get()) 1402 : isl_set_copy(scop->getContext().get()); 1403 assert(Dom && "Cannot propagate a nullptr."); 1404 bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap, 1405 ConditionSets); 1406 isl_set_free(Dom); 1407 1408 if (!Valid) 1409 continue; 1410 1411 isl_set *AssumptionCtx = nullptr; 1412 if (InScop) { 1413 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1])); 1414 isl_set_free(ConditionSets[0]); 1415 } else { 1416 AssumptionCtx = isl_set_complement(ConditionSets[1]); 1417 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]); 1418 } 1419 1420 // Project out newly introduced parameters as they are not otherwise useful. 1421 if (!NewParams.empty()) { 1422 for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) { 1423 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u); 1424 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id)); 1425 isl_id_free(Id); 1426 1427 if (!NewParams.count(Param)) 1428 continue; 1429 1430 AssumptionCtx = 1431 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1); 1432 } 1433 } 1434 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI) 1435 << "Use user assumption: " 1436 << stringFromIslObj(AssumptionCtx, "null")); 1437 isl::set newContext = 1438 scop->getContext().intersect(isl::manage(AssumptionCtx)); 1439 scop->setContext(newContext); 1440 } 1441 } 1442 1443 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) { 1444 Value *Val = Inst.getValueOperand(); 1445 Type *ElementType = Val->getType(); 1446 Value *Address = Inst.getPointerOperand(); 1447 const SCEV *AccessFunction = 1448 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); 1449 const SCEVUnknown *BasePointer = 1450 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1451 enum MemoryAccess::AccessType AccType = 1452 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; 1453 1454 if (auto *BitCast = dyn_cast<BitCastInst>(Address)) { 1455 auto *Src = BitCast->getOperand(0); 1456 auto *SrcTy = Src->getType(); 1457 auto *DstTy = BitCast->getType(); 1458 // Do not try to delinearize non-sized (opaque) pointers. 1459 if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) || 1460 (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) { 1461 return false; 1462 } 1463 if (SrcTy->isPointerTy() && DstTy->isPointerTy() && 1464 DL.getTypeAllocSize(SrcTy->getPointerElementType()) == 1465 DL.getTypeAllocSize(DstTy->getPointerElementType())) 1466 Address = Src; 1467 } 1468 1469 auto *GEP = dyn_cast<GetElementPtrInst>(Address); 1470 if (!GEP) 1471 return false; 1472 1473 SmallVector<const SCEV *, 4> Subscripts; 1474 SmallVector<int, 4> Sizes; 1475 getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes); 1476 auto *BasePtr = GEP->getOperand(0); 1477 1478 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr)) 1479 BasePtr = BasePtrCast->getOperand(0); 1480 1481 // Check for identical base pointers to ensure that we do not miss index 1482 // offsets that have been added before this GEP is applied. 1483 if (BasePtr != BasePointer->getValue()) 1484 return false; 1485 1486 std::vector<const SCEV *> SizesSCEV; 1487 1488 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); 1489 1490 Loop *SurroundingLoop = Stmt->getSurroundingLoop(); 1491 for (auto *Subscript : Subscripts) { 1492 InvariantLoadsSetTy AccessILS; 1493 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE, 1494 &AccessILS)) 1495 return false; 1496 1497 for (LoadInst *LInst : AccessILS) 1498 if (!ScopRIL.count(LInst)) 1499 return false; 1500 } 1501 1502 if (Sizes.empty()) 1503 return false; 1504 1505 SizesSCEV.push_back(nullptr); 1506 1507 for (auto V : Sizes) 1508 SizesSCEV.push_back(SE.getSCEV( 1509 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); 1510 1511 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, 1512 true, Subscripts, SizesSCEV, Val); 1513 return true; 1514 } 1515 1516 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) { 1517 if (!PollyDelinearize) 1518 return false; 1519 1520 Value *Address = Inst.getPointerOperand(); 1521 Value *Val = Inst.getValueOperand(); 1522 Type *ElementType = Val->getType(); 1523 unsigned ElementSize = DL.getTypeAllocSize(ElementType); 1524 enum MemoryAccess::AccessType AccType = 1525 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; 1526 1527 const SCEV *AccessFunction = 1528 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); 1529 const SCEVUnknown *BasePointer = 1530 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1531 1532 assert(BasePointer && "Could not find base pointer"); 1533 1534 auto &InsnToMemAcc = scop->getInsnToMemAccMap(); 1535 auto AccItr = InsnToMemAcc.find(Inst); 1536 if (AccItr == InsnToMemAcc.end()) 1537 return false; 1538 1539 std::vector<const SCEV *> Sizes = {nullptr}; 1540 1541 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(), 1542 AccItr->second.Shape->DelinearizedSizes.end()); 1543 1544 // In case only the element size is contained in the 'Sizes' array, the 1545 // access does not access a real multi-dimensional array. Hence, we allow 1546 // the normal single-dimensional access construction to handle this. 1547 if (Sizes.size() == 1) 1548 return false; 1549 1550 // Remove the element size. This information is already provided by the 1551 // ElementSize parameter. In case the element size of this access and the 1552 // element size used for delinearization differs the delinearization is 1553 // incorrect. Hence, we invalidate the scop. 1554 // 1555 // TODO: Handle delinearization with differing element sizes. 1556 auto DelinearizedSize = 1557 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue(); 1558 Sizes.pop_back(); 1559 if (ElementSize != DelinearizedSize) 1560 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); 1561 1562 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, 1563 true, AccItr->second.DelinearizedSubscripts, Sizes, Val); 1564 return true; 1565 } 1566 1567 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) { 1568 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst); 1569 1570 if (MemIntr == nullptr) 1571 return false; 1572 1573 auto *L = LI.getLoopFor(Inst->getParent()); 1574 auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L); 1575 assert(LengthVal); 1576 1577 // Check if the length val is actually affine or if we overapproximate it 1578 InvariantLoadsSetTy AccessILS; 1579 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); 1580 1581 Loop *SurroundingLoop = Stmt->getSurroundingLoop(); 1582 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop, 1583 LengthVal, SE, &AccessILS); 1584 for (LoadInst *LInst : AccessILS) 1585 if (!ScopRIL.count(LInst)) 1586 LengthIsAffine = false; 1587 if (!LengthIsAffine) 1588 LengthVal = nullptr; 1589 1590 auto *DestPtrVal = MemIntr->getDest(); 1591 assert(DestPtrVal); 1592 1593 auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L); 1594 assert(DestAccFunc); 1595 // Ignore accesses to "NULL". 1596 // TODO: We could use this to optimize the region further, e.g., intersect 1597 // the context with 1598 // isl_set_complement(isl_set_params(getDomain())) 1599 // as we know it would be undefined to execute this instruction anyway. 1600 if (DestAccFunc->isZero()) 1601 return true; 1602 1603 if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) { 1604 if (isa<ConstantPointerNull>(U->getValue())) 1605 return true; 1606 } 1607 1608 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc)); 1609 assert(DestPtrSCEV); 1610 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); 1611 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), 1612 IntegerType::getInt8Ty(DestPtrVal->getContext()), 1613 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, 1614 Inst.getValueOperand()); 1615 1616 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr); 1617 if (!MemTrans) 1618 return true; 1619 1620 auto *SrcPtrVal = MemTrans->getSource(); 1621 assert(SrcPtrVal); 1622 1623 auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L); 1624 assert(SrcAccFunc); 1625 // Ignore accesses to "NULL". 1626 // TODO: See above TODO 1627 if (SrcAccFunc->isZero()) 1628 return true; 1629 1630 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc)); 1631 assert(SrcPtrSCEV); 1632 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); 1633 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), 1634 IntegerType::getInt8Ty(SrcPtrVal->getContext()), 1635 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, 1636 Inst.getValueOperand()); 1637 1638 return true; 1639 } 1640 1641 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { 1642 auto *CI = dyn_cast_or_null<CallInst>(Inst); 1643 1644 if (CI == nullptr) 1645 return false; 1646 1647 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI)) 1648 return true; 1649 1650 bool ReadOnly = false; 1651 auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); 1652 auto *CalledFunction = CI->getCalledFunction(); 1653 switch (AA.getModRefBehavior(CalledFunction)) { 1654 case FMRB_UnknownModRefBehavior: 1655 llvm_unreachable("Unknown mod ref behaviour cannot be represented."); 1656 case FMRB_DoesNotAccessMemory: 1657 return true; 1658 case FMRB_OnlyWritesMemory: 1659 case FMRB_OnlyWritesInaccessibleMem: 1660 case FMRB_OnlyWritesInaccessibleOrArgMem: 1661 case FMRB_OnlyAccessesInaccessibleMem: 1662 case FMRB_OnlyAccessesInaccessibleOrArgMem: 1663 return false; 1664 case FMRB_OnlyReadsMemory: 1665 case FMRB_OnlyReadsInaccessibleMem: 1666 case FMRB_OnlyReadsInaccessibleOrArgMem: 1667 GlobalReads.emplace_back(Stmt, CI); 1668 return true; 1669 case FMRB_OnlyReadsArgumentPointees: 1670 ReadOnly = true; 1671 LLVM_FALLTHROUGH; 1672 case FMRB_OnlyWritesArgumentPointees: 1673 case FMRB_OnlyAccessesArgumentPointees: { 1674 auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE; 1675 Loop *L = LI.getLoopFor(Inst->getParent()); 1676 for (const auto &Arg : CI->args()) { 1677 if (!Arg->getType()->isPointerTy()) 1678 continue; 1679 1680 auto *ArgSCEV = SE.getSCEVAtScope(Arg, L); 1681 if (ArgSCEV->isZero()) 1682 continue; 1683 1684 if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) { 1685 if (isa<ConstantPointerNull>(U->getValue())) 1686 return true; 1687 } 1688 1689 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 1690 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), 1691 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); 1692 } 1693 return true; 1694 } 1695 } 1696 1697 return true; 1698 } 1699 1700 void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) { 1701 Value *Address = Inst.getPointerOperand(); 1702 Value *Val = Inst.getValueOperand(); 1703 Type *ElementType = Val->getType(); 1704 enum MemoryAccess::AccessType AccType = 1705 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; 1706 1707 const SCEV *AccessFunction = 1708 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); 1709 const SCEVUnknown *BasePointer = 1710 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1711 1712 assert(BasePointer && "Could not find base pointer"); 1713 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer); 1714 1715 // Check if the access depends on a loop contained in a non-affine subregion. 1716 bool isVariantInNonAffineLoop = false; 1717 SetVector<const Loop *> Loops; 1718 findLoops(AccessFunction, Loops); 1719 for (const Loop *L : Loops) 1720 if (Stmt->contains(L)) { 1721 isVariantInNonAffineLoop = true; 1722 break; 1723 } 1724 1725 InvariantLoadsSetTy AccessILS; 1726 1727 Loop *SurroundingLoop = Stmt->getSurroundingLoop(); 1728 bool IsAffine = !isVariantInNonAffineLoop && 1729 isAffineExpr(&scop->getRegion(), SurroundingLoop, 1730 AccessFunction, SE, &AccessILS); 1731 1732 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); 1733 for (LoadInst *LInst : AccessILS) 1734 if (!ScopRIL.count(LInst)) 1735 IsAffine = false; 1736 1737 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE) 1738 AccType = MemoryAccess::MAY_WRITE; 1739 1740 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, 1741 IsAffine, {AccessFunction}, {nullptr}, Val); 1742 } 1743 1744 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { 1745 if (buildAccessMemIntrinsic(Inst, Stmt)) 1746 return; 1747 1748 if (buildAccessCallInst(Inst, Stmt)) 1749 return; 1750 1751 if (buildAccessMultiDimFixed(Inst, Stmt)) 1752 return; 1753 1754 if (buildAccessMultiDimParam(Inst, Stmt)) 1755 return; 1756 1757 buildAccessSingleDim(Inst, Stmt); 1758 } 1759 1760 void ScopBuilder::buildAccessFunctions() { 1761 for (auto &Stmt : *scop) { 1762 if (Stmt.isBlockStmt()) { 1763 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock()); 1764 continue; 1765 } 1766 1767 Region *R = Stmt.getRegion(); 1768 for (BasicBlock *BB : R->blocks()) 1769 buildAccessFunctions(&Stmt, *BB, R); 1770 } 1771 1772 // Build write accesses for values that are used after the SCoP. 1773 // The instructions defining them might be synthesizable and therefore not 1774 // contained in any statement, hence we iterate over the original instructions 1775 // to identify all escaping values. 1776 for (BasicBlock *BB : scop->getRegion().blocks()) { 1777 for (Instruction &Inst : *BB) 1778 buildEscapingDependences(&Inst); 1779 } 1780 } 1781 1782 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) { 1783 return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) && 1784 !canSynthesize(Inst, *scop, &SE, L); 1785 } 1786 1787 /// Generate a name for a statement. 1788 /// 1789 /// @param BB The basic block the statement will represent. 1790 /// @param BBIdx The index of the @p BB relative to other BBs/regions. 1791 /// @param Count The index of the created statement in @p BB. 1792 /// @param IsMain Whether this is the main of all statement for @p BB. If true, 1793 /// no suffix will be added. 1794 /// @param IsLast Uses a special indicator for the last statement of a BB. 1795 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, 1796 bool IsMain, bool IsLast = false) { 1797 std::string Suffix; 1798 if (!IsMain) { 1799 if (UseInstructionNames) 1800 Suffix = '_'; 1801 if (IsLast) 1802 Suffix += "last"; 1803 else if (Count < 26) 1804 Suffix += 'a' + Count; 1805 else 1806 Suffix += std::to_string(Count); 1807 } 1808 return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames); 1809 } 1810 1811 /// Generate a name for a statement that represents a non-affine subregion. 1812 /// 1813 /// @param R The region the statement will represent. 1814 /// @param RIdx The index of the @p R relative to other BBs/regions. 1815 static std::string makeStmtName(Region *R, long RIdx) { 1816 return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "", 1817 UseInstructionNames); 1818 } 1819 1820 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) { 1821 Loop *SurroundingLoop = LI.getLoopFor(BB); 1822 1823 int Count = 0; 1824 long BBIdx = scop->getNextStmtIdx(); 1825 std::vector<Instruction *> Instructions; 1826 for (Instruction &Inst : *BB) { 1827 if (shouldModelInst(&Inst, SurroundingLoop)) 1828 Instructions.push_back(&Inst); 1829 if (Inst.getMetadata("polly_split_after") || 1830 (SplitOnStore && isa<StoreInst>(Inst))) { 1831 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); 1832 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); 1833 Count++; 1834 Instructions.clear(); 1835 } 1836 } 1837 1838 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); 1839 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); 1840 } 1841 1842 /// Is @p Inst an ordered instruction? 1843 /// 1844 /// An unordered instruction is an instruction, such that a sequence of 1845 /// unordered instructions can be permuted without changing semantics. Any 1846 /// instruction for which this is not always the case is ordered. 1847 static bool isOrderedInstruction(Instruction *Inst) { 1848 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory(); 1849 } 1850 1851 /// Join instructions to the same statement if one uses the scalar result of the 1852 /// other. 1853 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind, 1854 ArrayRef<Instruction *> ModeledInsts) { 1855 for (Instruction *Inst : ModeledInsts) { 1856 if (isa<PHINode>(Inst)) 1857 continue; 1858 1859 for (Use &Op : Inst->operands()) { 1860 Instruction *OpInst = dyn_cast<Instruction>(Op.get()); 1861 if (!OpInst) 1862 continue; 1863 1864 // Check if OpInst is in the BB and is a modeled instruction. 1865 auto OpVal = UnionFind.findValue(OpInst); 1866 if (OpVal == UnionFind.end()) 1867 continue; 1868 1869 UnionFind.unionSets(Inst, OpInst); 1870 } 1871 } 1872 } 1873 1874 /// Ensure that the order of ordered instructions does not change. 1875 /// 1876 /// If we encounter an ordered instruction enclosed in instructions belonging to 1877 /// a different statement (which might as well contain ordered instructions, but 1878 /// this is not tested here), join them. 1879 static void 1880 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind, 1881 ArrayRef<Instruction *> ModeledInsts) { 1882 SetVector<Instruction *> SeenLeaders; 1883 for (Instruction *Inst : ModeledInsts) { 1884 if (!isOrderedInstruction(Inst)) 1885 continue; 1886 1887 Instruction *Leader = UnionFind.getLeaderValue(Inst); 1888 // Since previous iterations might have merged sets, some items in 1889 // SeenLeaders are not leaders anymore. However, The new leader of 1890 // previously merged instructions must be one of the former leaders of 1891 // these merged instructions. 1892 bool Inserted = SeenLeaders.insert(Leader); 1893 if (Inserted) 1894 continue; 1895 1896 // Merge statements to close holes. Say, we have already seen statements A 1897 // and B, in this order. Then we see an instruction of A again and we would 1898 // see the pattern "A B A". This function joins all statements until the 1899 // only seen occurrence of A. 1900 for (Instruction *Prev : reverse(SeenLeaders)) { 1901 // We are backtracking from the last element until we see Inst's leader 1902 // in SeenLeaders and merge all into one set. Although leaders of 1903 // instructions change during the execution of this loop, it's irrelevant 1904 // as we are just searching for the element that we already confirmed is 1905 // in the list. 1906 if (Prev == Leader) 1907 break; 1908 UnionFind.unionSets(Prev, Leader); 1909 } 1910 } 1911 } 1912 1913 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for 1914 /// the incoming values from this block are executed after the PHI READ. 1915 /// 1916 /// Otherwise it could overwrite the incoming value from before the BB with the 1917 /// value for the next execution. This can happen if the PHI WRITE is added to 1918 /// the statement with the instruction that defines the incoming value (instead 1919 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE 1920 /// are in order, we put both into the statement. PHI WRITEs are always executed 1921 /// after PHI READs when they are in the same statement. 1922 /// 1923 /// TODO: This is an overpessimization. We only have to ensure that the PHI 1924 /// WRITE is not put into a statement containing the PHI itself. That could also 1925 /// be done by 1926 /// - having all (strongly connected) PHIs in a single statement, 1927 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only 1928 /// has a chance of being lifted before a PHI by being in a statement with a 1929 /// PHI that comes before in the basic block), or 1930 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken. 1931 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind, 1932 ArrayRef<Instruction *> ModeledInsts) { 1933 for (Instruction *Inst : ModeledInsts) { 1934 PHINode *PHI = dyn_cast<PHINode>(Inst); 1935 if (!PHI) 1936 continue; 1937 1938 int Idx = PHI->getBasicBlockIndex(PHI->getParent()); 1939 if (Idx < 0) 1940 continue; 1941 1942 Instruction *IncomingVal = 1943 dyn_cast<Instruction>(PHI->getIncomingValue(Idx)); 1944 if (!IncomingVal) 1945 continue; 1946 1947 UnionFind.unionSets(PHI, IncomingVal); 1948 } 1949 } 1950 1951 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) { 1952 Loop *L = LI.getLoopFor(BB); 1953 1954 // Extracting out modeled instructions saves us from checking 1955 // shouldModelInst() repeatedly. 1956 SmallVector<Instruction *, 32> ModeledInsts; 1957 EquivalenceClasses<Instruction *> UnionFind; 1958 Instruction *MainInst = nullptr, *MainLeader = nullptr; 1959 for (Instruction &Inst : *BB) { 1960 if (!shouldModelInst(&Inst, L)) 1961 continue; 1962 ModeledInsts.push_back(&Inst); 1963 UnionFind.insert(&Inst); 1964 1965 // When a BB is split into multiple statements, the main statement is the 1966 // one containing the 'main' instruction. We select the first instruction 1967 // that is unlikely to be removed (because it has side-effects) as the main 1968 // one. It is used to ensure that at least one statement from the bb has the 1969 // same name as with -polly-stmt-granularity=bb. 1970 if (!MainInst && (isa<StoreInst>(Inst) || 1971 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))) 1972 MainInst = &Inst; 1973 } 1974 1975 joinOperandTree(UnionFind, ModeledInsts); 1976 joinOrderedInstructions(UnionFind, ModeledInsts); 1977 joinOrderedPHIs(UnionFind, ModeledInsts); 1978 1979 // The list of instructions for statement (statement represented by the leader 1980 // instruction). 1981 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList; 1982 1983 // The order of statements must be preserved w.r.t. their ordered 1984 // instructions. Without this explicit scan, we would also use non-ordered 1985 // instructions (whose order is arbitrary) to determine statement order. 1986 for (Instruction *Inst : ModeledInsts) { 1987 if (!isOrderedInstruction(Inst)) 1988 continue; 1989 1990 auto LeaderIt = UnionFind.findLeader(Inst); 1991 if (LeaderIt == UnionFind.member_end()) 1992 continue; 1993 1994 // Insert element for the leader instruction. 1995 (void)LeaderToInstList[*LeaderIt]; 1996 } 1997 1998 // Collect the instructions of all leaders. UnionFind's member iterator 1999 // unfortunately are not in any specific order. 2000 for (Instruction *Inst : ModeledInsts) { 2001 auto LeaderIt = UnionFind.findLeader(Inst); 2002 if (LeaderIt == UnionFind.member_end()) 2003 continue; 2004 2005 if (Inst == MainInst) 2006 MainLeader = *LeaderIt; 2007 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt]; 2008 InstList.push_back(Inst); 2009 } 2010 2011 // Finally build the statements. 2012 int Count = 0; 2013 long BBIdx = scop->getNextStmtIdx(); 2014 for (auto &Instructions : LeaderToInstList) { 2015 std::vector<Instruction *> &InstList = Instructions.second; 2016 2017 // If there is no main instruction, make the first statement the main. 2018 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0); 2019 2020 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain); 2021 scop->addScopStmt(BB, Name, L, std::move(InstList)); 2022 Count += 1; 2023 } 2024 2025 // Unconditionally add an epilogue (last statement). It contains no 2026 // instructions, but holds the PHI write accesses for successor basic blocks, 2027 // if the incoming value is not defined in another statement if the same BB. 2028 // The epilogue becomes the main statement only if there is no other 2029 // statement that could become main. 2030 // The epilogue will be removed if no PHIWrite is added to it. 2031 std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true); 2032 scop->addScopStmt(BB, EpilogueName, L, {}); 2033 } 2034 2035 void ScopBuilder::buildStmts(Region &SR) { 2036 if (scop->isNonAffineSubRegion(&SR)) { 2037 std::vector<Instruction *> Instructions; 2038 Loop *SurroundingLoop = 2039 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops()); 2040 for (Instruction &Inst : *SR.getEntry()) 2041 if (shouldModelInst(&Inst, SurroundingLoop)) 2042 Instructions.push_back(&Inst); 2043 long RIdx = scop->getNextStmtIdx(); 2044 std::string Name = makeStmtName(&SR, RIdx); 2045 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions); 2046 return; 2047 } 2048 2049 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I) 2050 if (I->isSubRegion()) 2051 buildStmts(*I->getNodeAs<Region>()); 2052 else { 2053 BasicBlock *BB = I->getNodeAs<BasicBlock>(); 2054 switch (StmtGranularity) { 2055 case GranularityChoice::BasicBlocks: 2056 buildSequentialBlockStmts(BB); 2057 break; 2058 case GranularityChoice::ScalarIndependence: 2059 buildEqivClassBlockStmts(BB); 2060 break; 2061 case GranularityChoice::Stores: 2062 buildSequentialBlockStmts(BB, true); 2063 break; 2064 } 2065 } 2066 } 2067 2068 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB, 2069 Region *NonAffineSubRegion) { 2070 assert( 2071 Stmt && 2072 "The exit BB is the only one that cannot be represented by a statement"); 2073 assert(Stmt->represents(&BB)); 2074 2075 // We do not build access functions for error blocks, as they may contain 2076 // instructions we can not model. 2077 if (SD.isErrorBlock(BB, scop->getRegion())) 2078 return; 2079 2080 auto BuildAccessesForInst = [this, Stmt, 2081 NonAffineSubRegion](Instruction *Inst) { 2082 PHINode *PHI = dyn_cast<PHINode>(Inst); 2083 if (PHI) 2084 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false); 2085 2086 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) { 2087 assert(Stmt && "Cannot build access function in non-existing statement"); 2088 buildMemoryAccess(MemInst, Stmt); 2089 } 2090 2091 // PHI nodes have already been modeled above and terminators that are 2092 // not part of a non-affine subregion are fully modeled and regenerated 2093 // from the polyhedral domains. Hence, they do not need to be modeled as 2094 // explicit data dependences. 2095 if (!PHI) 2096 buildScalarDependences(Stmt, Inst); 2097 }; 2098 2099 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); 2100 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB); 2101 if (IsEntryBlock) { 2102 for (Instruction *Inst : Stmt->getInstructions()) 2103 BuildAccessesForInst(Inst); 2104 if (Stmt->isRegionStmt()) 2105 BuildAccessesForInst(BB.getTerminator()); 2106 } else { 2107 for (Instruction &Inst : BB) { 2108 if (isIgnoredIntrinsic(&Inst)) 2109 continue; 2110 2111 // Invariant loads already have been processed. 2112 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst))) 2113 continue; 2114 2115 BuildAccessesForInst(&Inst); 2116 } 2117 } 2118 } 2119 2120 MemoryAccess *ScopBuilder::addMemoryAccess( 2121 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, 2122 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, 2123 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes, 2124 MemoryKind Kind) { 2125 bool isKnownMustAccess = false; 2126 2127 // Accesses in single-basic block statements are always executed. 2128 if (Stmt->isBlockStmt()) 2129 isKnownMustAccess = true; 2130 2131 if (Stmt->isRegionStmt()) { 2132 // Accesses that dominate the exit block of a non-affine region are always 2133 // executed. In non-affine regions there may exist MemoryKind::Values that 2134 // do not dominate the exit. MemoryKind::Values will always dominate the 2135 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the 2136 // non-affine region. 2137 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())) 2138 isKnownMustAccess = true; 2139 } 2140 2141 // Non-affine PHI writes do not "happen" at a particular instruction, but 2142 // after exiting the statement. Therefore they are guaranteed to execute and 2143 // overwrite the old value. 2144 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI) 2145 isKnownMustAccess = true; 2146 2147 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE) 2148 AccType = MemoryAccess::MAY_WRITE; 2149 2150 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, 2151 Affine, Subscripts, Sizes, AccessValue, Kind); 2152 2153 scop->addAccessFunction(Access); 2154 Stmt->addAccess(Access); 2155 return Access; 2156 } 2157 2158 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, 2159 MemoryAccess::AccessType AccType, 2160 Value *BaseAddress, Type *ElementType, 2161 bool IsAffine, 2162 ArrayRef<const SCEV *> Subscripts, 2163 ArrayRef<const SCEV *> Sizes, 2164 Value *AccessValue) { 2165 ArrayBasePointers.insert(BaseAddress); 2166 addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine, 2167 AccessValue, Subscripts, Sizes, MemoryKind::Array); 2168 } 2169 2170 /// Check if @p Expr is divisible by @p Size. 2171 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) { 2172 assert(Size != 0); 2173 if (Size == 1) 2174 return true; 2175 2176 // Only one factor needs to be divisible. 2177 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) { 2178 for (auto *FactorExpr : MulExpr->operands()) 2179 if (isDivisible(FactorExpr, Size, SE)) 2180 return true; 2181 return false; 2182 } 2183 2184 // For other n-ary expressions (Add, AddRec, Max,...) all operands need 2185 // to be divisible. 2186 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) { 2187 for (auto *OpExpr : NAryExpr->operands()) 2188 if (!isDivisible(OpExpr, Size, SE)) 2189 return false; 2190 return true; 2191 } 2192 2193 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size); 2194 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV); 2195 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV); 2196 return MulSCEV == Expr; 2197 } 2198 2199 void ScopBuilder::foldSizeConstantsToRight() { 2200 isl::union_set Accessed = scop->getAccesses().range(); 2201 2202 for (auto Array : scop->arrays()) { 2203 if (Array->getNumberOfDimensions() <= 1) 2204 continue; 2205 2206 isl::space Space = Array->getSpace(); 2207 Space = Space.align_params(Accessed.get_space()); 2208 2209 if (!Accessed.contains(Space)) 2210 continue; 2211 2212 isl::set Elements = Accessed.extract_set(Space); 2213 isl::map Transform = isl::map::universe(Array->getSpace().map_from_set()); 2214 2215 std::vector<int> Int; 2216 unsigned Dims = unsignedFromIslSize(Elements.tuple_dim()); 2217 for (unsigned i = 0; i < Dims; i++) { 2218 isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i); 2219 DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1); 2220 DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0); 2221 2222 isl::basic_set DimHull = DimOnly.affine_hull(); 2223 2224 if (i == Dims - 1) { 2225 Int.push_back(1); 2226 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i); 2227 continue; 2228 } 2229 2230 if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) { 2231 isl::aff Diff = DimHull.get_div(0); 2232 isl::val Val = Diff.get_denominator_val(); 2233 2234 int ValInt = 1; 2235 if (Val.is_int()) { 2236 auto ValAPInt = APIntFromVal(Val); 2237 if (ValAPInt.isSignedIntN(32)) 2238 ValInt = ValAPInt.getSExtValue(); 2239 } else { 2240 } 2241 2242 Int.push_back(ValInt); 2243 isl::constraint C = isl::constraint::alloc_equality( 2244 isl::local_space(Transform.get_space())); 2245 C = C.set_coefficient_si(isl::dim::out, i, ValInt); 2246 C = C.set_coefficient_si(isl::dim::in, i, -1); 2247 Transform = Transform.add_constraint(C); 2248 continue; 2249 } 2250 2251 isl::basic_set ZeroSet = isl::basic_set(DimHull); 2252 ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0); 2253 2254 int ValInt = 1; 2255 if (ZeroSet.is_equal(DimHull)) { 2256 ValInt = 0; 2257 } 2258 2259 Int.push_back(ValInt); 2260 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i); 2261 } 2262 2263 isl::set MappedElements = isl::map(Transform).domain(); 2264 if (!Elements.is_subset(MappedElements)) 2265 continue; 2266 2267 bool CanFold = true; 2268 if (Int[0] <= 1) 2269 CanFold = false; 2270 2271 unsigned NumDims = Array->getNumberOfDimensions(); 2272 for (unsigned i = 1; i < NumDims - 1; i++) 2273 if (Int[0] != Int[i] && Int[i]) 2274 CanFold = false; 2275 2276 if (!CanFold) 2277 continue; 2278 2279 for (auto &Access : scop->access_functions()) 2280 if (Access->getScopArrayInfo() == Array) 2281 Access->setAccessRelation( 2282 Access->getAccessRelation().apply_range(Transform)); 2283 2284 std::vector<const SCEV *> Sizes; 2285 for (unsigned i = 0; i < NumDims; i++) { 2286 auto Size = Array->getDimensionSize(i); 2287 2288 if (i == NumDims - 1) 2289 Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0])); 2290 Sizes.push_back(Size); 2291 } 2292 2293 Array->updateSizes(Sizes, false /* CheckConsistency */); 2294 } 2295 } 2296 2297 void ScopBuilder::finalizeAccesses() { 2298 updateAccessDimensionality(); 2299 foldSizeConstantsToRight(); 2300 foldAccessRelations(); 2301 assumeNoOutOfBounds(); 2302 } 2303 2304 void ScopBuilder::updateAccessDimensionality() { 2305 // Check all array accesses for each base pointer and find a (virtual) element 2306 // size for the base pointer that divides all access functions. 2307 for (ScopStmt &Stmt : *scop) 2308 for (MemoryAccess *Access : Stmt) { 2309 if (!Access->isArrayKind()) 2310 continue; 2311 ScopArrayInfo *Array = 2312 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo()); 2313 2314 if (Array->getNumberOfDimensions() != 1) 2315 continue; 2316 unsigned DivisibleSize = Array->getElemSizeInBytes(); 2317 const SCEV *Subscript = Access->getSubscript(0); 2318 while (!isDivisible(Subscript, DivisibleSize, SE)) 2319 DivisibleSize /= 2; 2320 auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8); 2321 Array->updateElementType(Ty); 2322 } 2323 2324 for (auto &Stmt : *scop) 2325 for (auto &Access : Stmt) 2326 Access->updateDimensionality(); 2327 } 2328 2329 void ScopBuilder::foldAccessRelations() { 2330 for (auto &Stmt : *scop) 2331 for (auto &Access : Stmt) 2332 Access->foldAccessRelation(); 2333 } 2334 2335 void ScopBuilder::assumeNoOutOfBounds() { 2336 if (PollyIgnoreInbounds) 2337 return; 2338 for (auto &Stmt : *scop) 2339 for (auto &Access : Stmt) { 2340 isl::set Outside = Access->assumeNoOutOfBound(); 2341 const auto &Loc = Access->getAccessInstruction() 2342 ? Access->getAccessInstruction()->getDebugLoc() 2343 : DebugLoc(); 2344 recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc, 2345 AS_ASSUMPTION); 2346 } 2347 } 2348 2349 void ScopBuilder::ensureValueWrite(Instruction *Inst) { 2350 // Find the statement that defines the value of Inst. That statement has to 2351 // write the value to make it available to those statements that read it. 2352 ScopStmt *Stmt = scop->getStmtFor(Inst); 2353 2354 // It is possible that the value is synthesizable within a loop (such that it 2355 // is not part of any statement), but not after the loop (where you need the 2356 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will 2357 // avoid this. In case the IR has no such PHI, use the last statement (where 2358 // the value is synthesizable) to write the value. 2359 if (!Stmt) 2360 Stmt = scop->getLastStmtFor(Inst->getParent()); 2361 2362 // Inst not defined within this SCoP. 2363 if (!Stmt) 2364 return; 2365 2366 // Do not process further if the instruction is already written. 2367 if (Stmt->lookupValueWriteOf(Inst)) 2368 return; 2369 2370 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), 2371 true, Inst, ArrayRef<const SCEV *>(), 2372 ArrayRef<const SCEV *>(), MemoryKind::Value); 2373 } 2374 2375 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { 2376 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality 2377 // to be able to replace this one. Currently, there is a split responsibility. 2378 // In a first step, the MemoryAccess is created, but without the 2379 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the 2380 // AccessRelation is created. At least for scalar accesses, there is no new 2381 // information available at ScopStmt::buildAccessRelations(), so we could 2382 // create the AccessRelation right away. This is what 2383 // ScopStmt::ensureValueRead(Value*) does. 2384 2385 auto *Scope = UserStmt->getSurroundingLoop(); 2386 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false); 2387 switch (VUse.getKind()) { 2388 case VirtualUse::Constant: 2389 case VirtualUse::Block: 2390 case VirtualUse::Synthesizable: 2391 case VirtualUse::Hoisted: 2392 case VirtualUse::Intra: 2393 // Uses of these kinds do not need a MemoryAccess. 2394 break; 2395 2396 case VirtualUse::ReadOnly: 2397 // Add MemoryAccess for invariant values only if requested. 2398 if (!ModelReadOnlyScalars) 2399 break; 2400 2401 LLVM_FALLTHROUGH; 2402 case VirtualUse::Inter: 2403 2404 // Do not create another MemoryAccess for reloading the value if one already 2405 // exists. 2406 if (UserStmt->lookupValueReadOf(V)) 2407 break; 2408 2409 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), 2410 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), 2411 MemoryKind::Value); 2412 2413 // Inter-statement uses need to write the value in their defining statement. 2414 if (VUse.isInter()) 2415 ensureValueWrite(cast<Instruction>(V)); 2416 break; 2417 } 2418 } 2419 2420 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt, 2421 BasicBlock *IncomingBlock, 2422 Value *IncomingValue, bool IsExitBlock) { 2423 // As the incoming block might turn out to be an error statement ensure we 2424 // will create an exit PHI SAI object. It is needed during code generation 2425 // and would be created later anyway. 2426 if (IsExitBlock) 2427 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, 2428 MemoryKind::ExitPHI); 2429 2430 // This is possible if PHI is in the SCoP's entry block. The incoming blocks 2431 // from outside the SCoP's region have no statement representation. 2432 if (!IncomingStmt) 2433 return; 2434 2435 // Take care for the incoming value being available in the incoming block. 2436 // This must be done before the check for multiple PHI writes because multiple 2437 // exiting edges from subregion each can be the effective written value of the 2438 // subregion. As such, all of them must be made available in the subregion 2439 // statement. 2440 ensureValueRead(IncomingValue, IncomingStmt); 2441 2442 // Do not add more than one MemoryAccess per PHINode and ScopStmt. 2443 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) { 2444 assert(Acc->getAccessInstruction() == PHI); 2445 Acc->addIncoming(IncomingBlock, IncomingValue); 2446 return; 2447 } 2448 2449 MemoryAccess *Acc = addMemoryAccess( 2450 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, 2451 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), 2452 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); 2453 assert(Acc); 2454 Acc->addIncoming(IncomingBlock, IncomingValue); 2455 } 2456 2457 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { 2458 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, 2459 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), 2460 MemoryKind::PHI); 2461 } 2462 2463 void ScopBuilder::buildDomain(ScopStmt &Stmt) { 2464 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt); 2465 2466 Stmt.Domain = scop->getDomainConditions(&Stmt); 2467 Stmt.Domain = Stmt.Domain.set_tuple_id(Id); 2468 } 2469 2470 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) { 2471 isl::set Domain = Stmt.getDomain(); 2472 BasicBlock *BB = Stmt.getEntryBlock(); 2473 2474 Loop *L = LI.getLoopFor(BB); 2475 2476 while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L)) 2477 L = L->getParentLoop(); 2478 2479 SmallVector<llvm::Loop *, 8> Loops; 2480 2481 while (L && Stmt.getParent()->getRegion().contains(L)) { 2482 Loops.push_back(L); 2483 L = L->getParentLoop(); 2484 } 2485 2486 Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend()); 2487 } 2488 2489 /// Return the reduction type for a given binary operator. 2490 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp, 2491 const Instruction *Load) { 2492 if (!BinOp) 2493 return MemoryAccess::RT_NONE; 2494 switch (BinOp->getOpcode()) { 2495 case Instruction::FAdd: 2496 if (!BinOp->isFast()) 2497 return MemoryAccess::RT_NONE; 2498 LLVM_FALLTHROUGH; 2499 case Instruction::Add: 2500 return MemoryAccess::RT_ADD; 2501 case Instruction::Or: 2502 return MemoryAccess::RT_BOR; 2503 case Instruction::Xor: 2504 return MemoryAccess::RT_BXOR; 2505 case Instruction::And: 2506 return MemoryAccess::RT_BAND; 2507 case Instruction::FMul: 2508 if (!BinOp->isFast()) 2509 return MemoryAccess::RT_NONE; 2510 LLVM_FALLTHROUGH; 2511 case Instruction::Mul: 2512 if (DisableMultiplicativeReductions) 2513 return MemoryAccess::RT_NONE; 2514 return MemoryAccess::RT_MUL; 2515 default: 2516 return MemoryAccess::RT_NONE; 2517 } 2518 } 2519 2520 void ScopBuilder::checkForReductions(ScopStmt &Stmt) { 2521 SmallVector<MemoryAccess *, 2> Loads; 2522 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates; 2523 2524 // First collect candidate load-store reduction chains by iterating over all 2525 // stores and collecting possible reduction loads. 2526 for (MemoryAccess *StoreMA : Stmt) { 2527 if (StoreMA->isRead()) 2528 continue; 2529 2530 Loads.clear(); 2531 collectCandidateReductionLoads(StoreMA, Loads); 2532 for (MemoryAccess *LoadMA : Loads) 2533 Candidates.push_back(std::make_pair(LoadMA, StoreMA)); 2534 } 2535 2536 // Then check each possible candidate pair. 2537 for (const auto &CandidatePair : Candidates) { 2538 bool Valid = true; 2539 isl::map LoadAccs = CandidatePair.first->getAccessRelation(); 2540 isl::map StoreAccs = CandidatePair.second->getAccessRelation(); 2541 2542 // Skip those with obviously unequal base addresses. 2543 if (!LoadAccs.has_equal_space(StoreAccs)) { 2544 continue; 2545 } 2546 2547 // And check if the remaining for overlap with other memory accesses. 2548 isl::map AllAccsRel = LoadAccs.unite(StoreAccs); 2549 AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain()); 2550 isl::set AllAccs = AllAccsRel.range(); 2551 2552 for (MemoryAccess *MA : Stmt) { 2553 if (MA == CandidatePair.first || MA == CandidatePair.second) 2554 continue; 2555 2556 isl::map AccRel = 2557 MA->getAccessRelation().intersect_domain(Stmt.getDomain()); 2558 isl::set Accs = AccRel.range(); 2559 2560 if (AllAccs.has_equal_space(Accs)) { 2561 isl::set OverlapAccs = Accs.intersect(AllAccs); 2562 Valid = Valid && OverlapAccs.is_empty(); 2563 } 2564 } 2565 2566 if (!Valid) 2567 continue; 2568 2569 const LoadInst *Load = 2570 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction()); 2571 MemoryAccess::ReductionType RT = 2572 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load); 2573 2574 // If no overlapping access was found we mark the load and store as 2575 // reduction like. 2576 CandidatePair.first->markAsReductionLike(RT); 2577 CandidatePair.second->markAsReductionLike(RT); 2578 } 2579 } 2580 2581 void ScopBuilder::verifyInvariantLoads() { 2582 auto &RIL = scop->getRequiredInvariantLoads(); 2583 for (LoadInst *LI : RIL) { 2584 assert(LI && scop->contains(LI)); 2585 // If there exists a statement in the scop which has a memory access for 2586 // @p LI, then mark this scop as infeasible for optimization. 2587 for (ScopStmt &Stmt : *scop) 2588 if (Stmt.getArrayAccessOrNULLFor(LI)) { 2589 scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent()); 2590 return; 2591 } 2592 } 2593 } 2594 2595 void ScopBuilder::hoistInvariantLoads() { 2596 if (!PollyInvariantLoadHoisting) 2597 return; 2598 2599 isl::union_map Writes = scop->getWrites(); 2600 for (ScopStmt &Stmt : *scop) { 2601 InvariantAccessesTy InvariantAccesses; 2602 2603 for (MemoryAccess *Access : Stmt) { 2604 isl::set NHCtx = getNonHoistableCtx(Access, Writes); 2605 if (!NHCtx.is_null()) 2606 InvariantAccesses.push_back({Access, NHCtx}); 2607 } 2608 2609 // Transfer the memory access from the statement to the SCoP. 2610 for (auto InvMA : InvariantAccesses) 2611 Stmt.removeMemoryAccess(InvMA.MA); 2612 addInvariantLoads(Stmt, InvariantAccesses); 2613 } 2614 } 2615 2616 /// Check if an access range is too complex. 2617 /// 2618 /// An access range is too complex, if it contains either many disjuncts or 2619 /// very complex expressions. As a simple heuristic, we assume if a set to 2620 /// be too complex if the sum of existentially quantified dimensions and 2621 /// set dimensions is larger than a threshold. This reliably detects both 2622 /// sets with many disjuncts as well as sets with many divisions as they 2623 /// arise in h264. 2624 /// 2625 /// @param AccessRange The range to check for complexity. 2626 /// 2627 /// @returns True if the access range is too complex. 2628 static bool isAccessRangeTooComplex(isl::set AccessRange) { 2629 unsigned NumTotalDims = 0; 2630 2631 for (isl::basic_set BSet : AccessRange.get_basic_set_list()) { 2632 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div)); 2633 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set)); 2634 } 2635 2636 if (NumTotalDims > MaxDimensionsInAccessRange) 2637 return true; 2638 2639 return false; 2640 } 2641 2642 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA, 2643 isl::union_map Writes) { 2644 if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) { 2645 return getNonHoistableCtx(BasePtrMA, Writes).is_null(); 2646 } 2647 2648 Value *BaseAddr = MA->getOriginalBaseAddr(); 2649 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr)) 2650 if (!isa<LoadInst>(BasePtrInst)) 2651 return scop->contains(BasePtrInst); 2652 2653 return false; 2654 } 2655 2656 void ScopBuilder::addUserContext() { 2657 if (UserContextStr.empty()) 2658 return; 2659 2660 isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str()); 2661 isl::space Space = scop->getParamSpace(); 2662 isl::size SpaceParams = Space.dim(isl::dim::param); 2663 if (unsignedFromIslSize(SpaceParams) != 2664 unsignedFromIslSize(UserContext.dim(isl::dim::param))) { 2665 std::string SpaceStr = stringFromIslObj(Space, "null"); 2666 errs() << "Error: the context provided in -polly-context has not the same " 2667 << "number of dimensions than the computed context. Due to this " 2668 << "mismatch, the -polly-context option is ignored. Please provide " 2669 << "the context in the parameter space: " << SpaceStr << ".\n"; 2670 return; 2671 } 2672 2673 for (auto i : rangeIslSize(0, SpaceParams)) { 2674 std::string NameContext = 2675 scop->getContext().get_dim_name(isl::dim::param, i); 2676 std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i); 2677 2678 if (NameContext != NameUserContext) { 2679 std::string SpaceStr = stringFromIslObj(Space, "null"); 2680 errs() << "Error: the name of dimension " << i 2681 << " provided in -polly-context " 2682 << "is '" << NameUserContext << "', but the name in the computed " 2683 << "context is '" << NameContext 2684 << "'. Due to this name mismatch, " 2685 << "the -polly-context option is ignored. Please provide " 2686 << "the context in the parameter space: " << SpaceStr << ".\n"; 2687 return; 2688 } 2689 2690 UserContext = UserContext.set_dim_id(isl::dim::param, i, 2691 Space.get_dim_id(isl::dim::param, i)); 2692 } 2693 isl::set newContext = scop->getContext().intersect(UserContext); 2694 scop->setContext(newContext); 2695 } 2696 2697 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access, 2698 isl::union_map Writes) { 2699 // TODO: Loads that are not loop carried, hence are in a statement with 2700 // zero iterators, are by construction invariant, though we 2701 // currently "hoist" them anyway. This is necessary because we allow 2702 // them to be treated as parameters (e.g., in conditions) and our code 2703 // generation would otherwise use the old value. 2704 2705 auto &Stmt = *Access->getStatement(); 2706 BasicBlock *BB = Stmt.getEntryBlock(); 2707 2708 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() || 2709 Access->isMemoryIntrinsic()) 2710 return {}; 2711 2712 // Skip accesses that have an invariant base pointer which is defined but 2713 // not loaded inside the SCoP. This can happened e.g., if a readnone call 2714 // returns a pointer that is used as a base address. However, as we want 2715 // to hoist indirect pointers, we allow the base pointer to be defined in 2716 // the region if it is also a memory access. Each ScopArrayInfo object 2717 // that has a base pointer origin has a base pointer that is loaded and 2718 // that it is invariant, thus it will be hoisted too. However, if there is 2719 // no base pointer origin we check that the base pointer is defined 2720 // outside the region. 2721 auto *LI = cast<LoadInst>(Access->getAccessInstruction()); 2722 if (hasNonHoistableBasePtrInScop(Access, Writes)) 2723 return {}; 2724 2725 isl::map AccessRelation = Access->getAccessRelation(); 2726 assert(!AccessRelation.is_empty()); 2727 2728 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators())) 2729 return {}; 2730 2731 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain()); 2732 isl::set SafeToLoad; 2733 2734 auto &DL = scop->getFunction().getParent()->getDataLayout(); 2735 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(), 2736 LI->getAlign(), DL)) { 2737 SafeToLoad = isl::set::universe(AccessRelation.get_space().range()); 2738 } else if (BB != LI->getParent()) { 2739 // Skip accesses in non-affine subregions as they might not be executed 2740 // under the same condition as the entry of the non-affine subregion. 2741 return {}; 2742 } else { 2743 SafeToLoad = AccessRelation.range(); 2744 } 2745 2746 if (isAccessRangeTooComplex(AccessRelation.range())) 2747 return {}; 2748 2749 isl::union_map Written = Writes.intersect_range(SafeToLoad); 2750 isl::set WrittenCtx = Written.params(); 2751 bool IsWritten = !WrittenCtx.is_empty(); 2752 2753 if (!IsWritten) 2754 return WrittenCtx; 2755 2756 WrittenCtx = WrittenCtx.remove_divs(); 2757 bool TooComplex = 2758 unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain; 2759 if (TooComplex || !isRequiredInvariantLoad(LI)) 2760 return {}; 2761 2762 scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), 2763 AS_RESTRICTION, LI->getParent()); 2764 return WrittenCtx; 2765 } 2766 2767 static bool isAParameter(llvm::Value *maybeParam, const Function &F) { 2768 for (const llvm::Argument &Arg : F.args()) 2769 if (&Arg == maybeParam) 2770 return true; 2771 2772 return false; 2773 } 2774 2775 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA, 2776 bool StmtInvalidCtxIsEmpty, 2777 bool MAInvalidCtxIsEmpty, 2778 bool NonHoistableCtxIsEmpty) { 2779 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); 2780 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout(); 2781 if (PollyAllowDereferenceOfAllFunctionParams && 2782 isAParameter(LInst->getPointerOperand(), scop->getFunction())) 2783 return true; 2784 2785 // TODO: We can provide more information for better but more expensive 2786 // results. 2787 if (!isDereferenceableAndAlignedPointer( 2788 LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL)) 2789 return false; 2790 2791 // If the location might be overwritten we do not hoist it unconditionally. 2792 // 2793 // TODO: This is probably too conservative. 2794 if (!NonHoistableCtxIsEmpty) 2795 return false; 2796 2797 // If a dereferenceable load is in a statement that is modeled precisely we 2798 // can hoist it. 2799 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty) 2800 return true; 2801 2802 // Even if the statement is not modeled precisely we can hoist the load if it 2803 // does not involve any parameters that might have been specialized by the 2804 // statement domain. 2805 for (const SCEV *Subscript : MA->subscripts()) 2806 if (!isa<SCEVConstant>(Subscript)) 2807 return false; 2808 return true; 2809 } 2810 2811 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt, 2812 InvariantAccessesTy &InvMAs) { 2813 if (InvMAs.empty()) 2814 return; 2815 2816 isl::set StmtInvalidCtx = Stmt.getInvalidContext(); 2817 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty(); 2818 2819 // Get the context under which the statement is executed but remove the error 2820 // context under which this statement is reached. 2821 isl::set DomainCtx = Stmt.getDomain().params(); 2822 DomainCtx = DomainCtx.subtract(StmtInvalidCtx); 2823 2824 if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) { 2825 auto *AccInst = InvMAs.front().MA->getAccessInstruction(); 2826 scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent()); 2827 return; 2828 } 2829 2830 // Project out all parameters that relate to loads in the statement. Otherwise 2831 // we could have cyclic dependences on the constraints under which the 2832 // hoisted loads are executed and we could not determine an order in which to 2833 // pre-load them. This happens because not only lower bounds are part of the 2834 // domain but also upper bounds. 2835 for (auto &InvMA : InvMAs) { 2836 auto *MA = InvMA.MA; 2837 Instruction *AccInst = MA->getAccessInstruction(); 2838 if (SE.isSCEVable(AccInst->getType())) { 2839 SetVector<Value *> Values; 2840 for (const SCEV *Parameter : scop->parameters()) { 2841 Values.clear(); 2842 findValues(Parameter, SE, Values); 2843 if (!Values.count(AccInst)) 2844 continue; 2845 2846 isl::id ParamId = scop->getIdForParam(Parameter); 2847 if (!ParamId.is_null()) { 2848 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId); 2849 if (Dim >= 0) 2850 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1); 2851 } 2852 } 2853 } 2854 } 2855 2856 for (auto &InvMA : InvMAs) { 2857 auto *MA = InvMA.MA; 2858 isl::set NHCtx = InvMA.NonHoistableCtx; 2859 2860 // Check for another invariant access that accesses the same location as 2861 // MA and if found consolidate them. Otherwise create a new equivalence 2862 // class at the end of InvariantEquivClasses. 2863 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); 2864 Type *Ty = LInst->getType(); 2865 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); 2866 2867 isl::set MAInvalidCtx = MA->getInvalidContext(); 2868 bool NonHoistableCtxIsEmpty = NHCtx.is_empty(); 2869 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty(); 2870 2871 isl::set MACtx; 2872 // Check if we know that this pointer can be speculatively accessed. 2873 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty, 2874 NonHoistableCtxIsEmpty)) { 2875 MACtx = isl::set::universe(DomainCtx.get_space()); 2876 } else { 2877 MACtx = DomainCtx; 2878 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx)); 2879 MACtx = MACtx.gist_params(scop->getContext()); 2880 } 2881 2882 bool Consolidated = false; 2883 for (auto &IAClass : scop->invariantEquivClasses()) { 2884 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) 2885 continue; 2886 2887 // If the pointer and the type is equal check if the access function wrt. 2888 // to the domain is equal too. It can happen that the domain fixes 2889 // parameter values and these can be different for distinct part of the 2890 // SCoP. If this happens we cannot consolidate the loads but need to 2891 // create a new invariant load equivalence class. 2892 auto &MAs = IAClass.InvariantAccesses; 2893 if (!MAs.empty()) { 2894 auto *LastMA = MAs.front(); 2895 2896 isl::set AR = MA->getAccessRelation().range(); 2897 isl::set LastAR = LastMA->getAccessRelation().range(); 2898 bool SameAR = AR.is_equal(LastAR); 2899 2900 if (!SameAR) 2901 continue; 2902 } 2903 2904 // Add MA to the list of accesses that are in this class. 2905 MAs.push_front(MA); 2906 2907 Consolidated = true; 2908 2909 // Unify the execution context of the class and this statement. 2910 isl::set IAClassDomainCtx = IAClass.ExecutionContext; 2911 if (!IAClassDomainCtx.is_null()) 2912 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce(); 2913 else 2914 IAClassDomainCtx = MACtx; 2915 IAClass.ExecutionContext = IAClassDomainCtx; 2916 break; 2917 } 2918 2919 if (Consolidated) 2920 continue; 2921 2922 MACtx = MACtx.coalesce(); 2923 2924 // If we did not consolidate MA, thus did not find an equivalence class 2925 // for it, we create a new one. 2926 scop->addInvariantEquivClass( 2927 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty}); 2928 } 2929 } 2930 2931 void ScopBuilder::collectCandidateReductionLoads( 2932 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) { 2933 ScopStmt *Stmt = StoreMA->getStatement(); 2934 2935 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction()); 2936 if (!Store) 2937 return; 2938 2939 // Skip if there is not one binary operator between the load and the store 2940 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand()); 2941 if (!BinOp) 2942 return; 2943 2944 // Skip if the binary operators has multiple uses 2945 if (BinOp->getNumUses() != 1) 2946 return; 2947 2948 // Skip if the opcode of the binary operator is not commutative/associative 2949 if (!BinOp->isCommutative() || !BinOp->isAssociative()) 2950 return; 2951 2952 // Skip if the binary operator is outside the current SCoP 2953 if (BinOp->getParent() != Store->getParent()) 2954 return; 2955 2956 // Skip if it is a multiplicative reduction and we disabled them 2957 if (DisableMultiplicativeReductions && 2958 (BinOp->getOpcode() == Instruction::Mul || 2959 BinOp->getOpcode() == Instruction::FMul)) 2960 return; 2961 2962 // Check the binary operator operands for a candidate load 2963 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0)); 2964 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1)); 2965 if (!PossibleLoad0 && !PossibleLoad1) 2966 return; 2967 2968 // A load is only a candidate if it cannot escape (thus has only this use) 2969 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1) 2970 if (PossibleLoad0->getParent() == Store->getParent()) 2971 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0)); 2972 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1) 2973 if (PossibleLoad1->getParent() == Store->getParent()) 2974 Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1)); 2975 } 2976 2977 /// Find the canonical scop array info object for a set of invariant load 2978 /// hoisted loads. The canonical array is the one that corresponds to the 2979 /// first load in the list of accesses which is used as base pointer of a 2980 /// scop array. 2981 static const ScopArrayInfo *findCanonicalArray(Scop &S, 2982 MemoryAccessList &Accesses) { 2983 for (MemoryAccess *Access : Accesses) { 2984 const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull( 2985 Access->getAccessInstruction(), MemoryKind::Array); 2986 if (CanonicalArray) 2987 return CanonicalArray; 2988 } 2989 return nullptr; 2990 } 2991 2992 /// Check if @p Array severs as base array in an invariant load. 2993 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) { 2994 for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses()) 2995 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses) 2996 if (Access2->getScopArrayInfo() == Array) 2997 return true; 2998 return false; 2999 } 3000 3001 /// Replace the base pointer arrays in all memory accesses referencing @p Old, 3002 /// with a reference to @p New. 3003 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old, 3004 const ScopArrayInfo *New) { 3005 for (ScopStmt &Stmt : S) 3006 for (MemoryAccess *Access : Stmt) { 3007 if (Access->getLatestScopArrayInfo() != Old) 3008 continue; 3009 3010 isl::id Id = New->getBasePtrId(); 3011 isl::map Map = Access->getAccessRelation(); 3012 Map = Map.set_tuple_id(isl::dim::out, Id); 3013 Access->setAccessRelation(Map); 3014 } 3015 } 3016 3017 void ScopBuilder::canonicalizeDynamicBasePtrs() { 3018 for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) { 3019 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses; 3020 3021 const ScopArrayInfo *CanonicalBasePtrSAI = 3022 findCanonicalArray(*scop, BasePtrAccesses); 3023 3024 if (!CanonicalBasePtrSAI) 3025 continue; 3026 3027 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) { 3028 const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull( 3029 BasePtrAccess->getAccessInstruction(), MemoryKind::Array); 3030 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI || 3031 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI)) 3032 continue; 3033 3034 // we currently do not canonicalize arrays where some accesses are 3035 // hoisted as invariant loads. If we would, we need to update the access 3036 // function of the invariant loads as well. However, as this is not a 3037 // very common situation, we leave this for now to avoid further 3038 // complexity increases. 3039 if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI)) 3040 continue; 3041 3042 replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI); 3043 } 3044 } 3045 } 3046 3047 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) { 3048 for (MemoryAccess *Access : Stmt.MemAccs) { 3049 Type *ElementType = Access->getElementType(); 3050 3051 MemoryKind Ty; 3052 if (Access->isPHIKind()) 3053 Ty = MemoryKind::PHI; 3054 else if (Access->isExitPHIKind()) 3055 Ty = MemoryKind::ExitPHI; 3056 else if (Access->isValueKind()) 3057 Ty = MemoryKind::Value; 3058 else 3059 Ty = MemoryKind::Array; 3060 3061 // Create isl::pw_aff for SCEVs which describe sizes. Collect all 3062 // assumptions which are taken. isl::pw_aff objects are cached internally 3063 // and they are used later by scop. 3064 for (const SCEV *Size : Access->Sizes) { 3065 if (!Size) 3066 continue; 3067 scop->getPwAff(Size, nullptr, false, &RecordedAssumptions); 3068 } 3069 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), 3070 ElementType, Access->Sizes, Ty); 3071 3072 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all 3073 // assumptions which are taken. isl::pw_aff objects are cached internally 3074 // and they are used later by scop. 3075 for (const SCEV *Subscript : Access->subscripts()) { 3076 if (!Access->isAffine() || !Subscript) 3077 continue; 3078 scop->getPwAff(Subscript, Stmt.getEntryBlock(), false, 3079 &RecordedAssumptions); 3080 } 3081 Access->buildAccessRelation(SAI); 3082 scop->addAccessData(Access); 3083 } 3084 } 3085 3086 /// Add the minimal/maximal access in @p Set to @p User. 3087 /// 3088 /// @return True if more accesses should be added, false if we reached the 3089 /// maximal number of run-time checks to be generated. 3090 static bool buildMinMaxAccess(isl::set Set, 3091 Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) { 3092 isl::pw_multi_aff MinPMA, MaxPMA; 3093 isl::pw_aff LastDimAff; 3094 isl::aff OneAff; 3095 unsigned Pos; 3096 3097 Set = Set.remove_divs(); 3098 polly::simplify(Set); 3099 3100 if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts) 3101 Set = Set.simple_hull(); 3102 3103 // Restrict the number of parameters involved in the access as the lexmin/ 3104 // lexmax computation will take too long if this number is high. 3105 // 3106 // Experiments with a simple test case using an i7 4800MQ: 3107 // 3108 // #Parameters involved | Time (in sec) 3109 // 6 | 0.01 3110 // 7 | 0.04 3111 // 8 | 0.12 3112 // 9 | 0.40 3113 // 10 | 1.54 3114 // 11 | 6.78 3115 // 12 | 30.38 3116 // 3117 if (isl_set_n_param(Set.get()) > 3118 static_cast<isl_size>(RunTimeChecksMaxParameters)) { 3119 unsigned InvolvedParams = 0; 3120 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++) 3121 if (Set.involves_dims(isl::dim::param, u, 1)) 3122 InvolvedParams++; 3123 3124 if (InvolvedParams > RunTimeChecksMaxParameters) 3125 return false; 3126 } 3127 3128 MinPMA = Set.lexmin_pw_multi_aff(); 3129 MaxPMA = Set.lexmax_pw_multi_aff(); 3130 3131 MinPMA = MinPMA.coalesce(); 3132 MaxPMA = MaxPMA.coalesce(); 3133 3134 if (MaxPMA.is_null()) 3135 return false; 3136 3137 unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out)); 3138 3139 // Adjust the last dimension of the maximal access by one as we want to 3140 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer 3141 // we test during code generation might now point after the end of the 3142 // allocated array but we will never dereference it anyway. 3143 assert(MaxOutputSize >= 1 && "Assumed at least one output dimension"); 3144 3145 Pos = MaxOutputSize - 1; 3146 LastDimAff = MaxPMA.at(Pos); 3147 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space())); 3148 OneAff = OneAff.add_constant_si(1); 3149 LastDimAff = LastDimAff.add(OneAff); 3150 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff); 3151 3152 if (MinPMA.is_null() || MaxPMA.is_null()) 3153 return false; 3154 3155 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA)); 3156 3157 return true; 3158 } 3159 3160 /// Wrapper function to calculate minimal/maximal accesses to each array. 3161 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup, 3162 Scop::MinMaxVectorTy &MinMaxAccesses) { 3163 MinMaxAccesses.reserve(AliasGroup.size()); 3164 3165 isl::union_set Domains = scop->getDomains(); 3166 isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx()); 3167 3168 for (MemoryAccess *MA : AliasGroup) 3169 Accesses = Accesses.unite(MA->getAccessRelation()); 3170 3171 Accesses = Accesses.intersect_domain(Domains); 3172 isl::union_set Locations = Accesses.range(); 3173 3174 bool LimitReached = false; 3175 for (isl::set Set : Locations.get_set_list()) { 3176 LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop); 3177 if (LimitReached) 3178 break; 3179 } 3180 3181 return !LimitReached; 3182 } 3183 3184 static isl::set getAccessDomain(MemoryAccess *MA) { 3185 isl::set Domain = MA->getStatement()->getDomain(); 3186 Domain = Domain.project_out(isl::dim::set, 0, 3187 unsignedFromIslSize(Domain.tuple_dim())); 3188 return Domain.reset_tuple_id(); 3189 } 3190 3191 bool ScopBuilder::buildAliasChecks() { 3192 if (!PollyUseRuntimeAliasChecks) 3193 return true; 3194 3195 if (buildAliasGroups()) { 3196 // Aliasing assumptions do not go through addAssumption but we still want to 3197 // collect statistics so we do it here explicitly. 3198 if (scop->getAliasGroups().size()) 3199 Scop::incrementNumberOfAliasingAssumptions(1); 3200 return true; 3201 } 3202 3203 // If a problem occurs while building the alias groups we need to delete 3204 // this SCoP and pretend it wasn't valid in the first place. To this end 3205 // we make the assumed context infeasible. 3206 scop->invalidate(ALIASING, DebugLoc()); 3207 3208 LLVM_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr() 3209 << " could not be created. This SCoP has been dismissed."); 3210 return false; 3211 } 3212 3213 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>> 3214 ScopBuilder::buildAliasGroupsForAccesses() { 3215 AliasSetTracker AST(AA); 3216 3217 DenseMap<Value *, MemoryAccess *> PtrToAcc; 3218 DenseSet<const ScopArrayInfo *> HasWriteAccess; 3219 for (ScopStmt &Stmt : *scop) { 3220 3221 isl::set StmtDomain = Stmt.getDomain(); 3222 bool StmtDomainEmpty = StmtDomain.is_empty(); 3223 3224 // Statements with an empty domain will never be executed. 3225 if (StmtDomainEmpty) 3226 continue; 3227 3228 for (MemoryAccess *MA : Stmt) { 3229 if (MA->isScalarKind()) 3230 continue; 3231 if (!MA->isRead()) 3232 HasWriteAccess.insert(MA->getScopArrayInfo()); 3233 MemAccInst Acc(MA->getAccessInstruction()); 3234 if (MA->isRead() && isa<MemTransferInst>(Acc)) 3235 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA; 3236 else 3237 PtrToAcc[Acc.getPointerOperand()] = MA; 3238 AST.add(Acc); 3239 } 3240 } 3241 3242 AliasGroupVectorTy AliasGroups; 3243 for (AliasSet &AS : AST) { 3244 if (AS.isMustAlias() || AS.isForwardingAliasSet()) 3245 continue; 3246 AliasGroupTy AG; 3247 for (auto &PR : AS) 3248 AG.push_back(PtrToAcc[PR.getValue()]); 3249 if (AG.size() < 2) 3250 continue; 3251 AliasGroups.push_back(std::move(AG)); 3252 } 3253 3254 return std::make_tuple(AliasGroups, HasWriteAccess); 3255 } 3256 3257 bool ScopBuilder::buildAliasGroups() { 3258 // To create sound alias checks we perform the following steps: 3259 // o) We partition each group into read only and non read only accesses. 3260 // o) For each group with more than one base pointer we then compute minimal 3261 // and maximal accesses to each array of a group in read only and non 3262 // read only partitions separately. 3263 AliasGroupVectorTy AliasGroups; 3264 DenseSet<const ScopArrayInfo *> HasWriteAccess; 3265 3266 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(); 3267 3268 splitAliasGroupsByDomain(AliasGroups); 3269 3270 for (AliasGroupTy &AG : AliasGroups) { 3271 if (!scop->hasFeasibleRuntimeContext()) 3272 return false; 3273 3274 { 3275 IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut); 3276 bool Valid = buildAliasGroup(AG, HasWriteAccess); 3277 if (!Valid) 3278 return false; 3279 } 3280 if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) { 3281 scop->invalidate(COMPLEXITY, DebugLoc()); 3282 return false; 3283 } 3284 } 3285 3286 return true; 3287 } 3288 3289 bool ScopBuilder::buildAliasGroup( 3290 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) { 3291 AliasGroupTy ReadOnlyAccesses; 3292 AliasGroupTy ReadWriteAccesses; 3293 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays; 3294 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays; 3295 3296 if (AliasGroup.size() < 2) 3297 return true; 3298 3299 for (MemoryAccess *Access : AliasGroup) { 3300 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias", 3301 Access->getAccessInstruction()) 3302 << "Possibly aliasing pointer, use restrict keyword."); 3303 const ScopArrayInfo *Array = Access->getScopArrayInfo(); 3304 if (HasWriteAccess.count(Array)) { 3305 ReadWriteArrays.insert(Array); 3306 ReadWriteAccesses.push_back(Access); 3307 } else { 3308 ReadOnlyArrays.insert(Array); 3309 ReadOnlyAccesses.push_back(Access); 3310 } 3311 } 3312 3313 // If there are no read-only pointers, and less than two read-write pointers, 3314 // no alias check is needed. 3315 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1) 3316 return true; 3317 3318 // If there is no read-write pointer, no alias check is needed. 3319 if (ReadWriteArrays.empty()) 3320 return true; 3321 3322 // For non-affine accesses, no alias check can be generated as we cannot 3323 // compute a sufficiently tight lower and upper bound: bail out. 3324 for (MemoryAccess *MA : AliasGroup) { 3325 if (!MA->isAffine()) { 3326 scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(), 3327 MA->getAccessInstruction()->getParent()); 3328 return false; 3329 } 3330 } 3331 3332 // Ensure that for all memory accesses for which we generate alias checks, 3333 // their base pointers are available. 3334 for (MemoryAccess *MA : AliasGroup) { 3335 if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA)) 3336 scop->addRequiredInvariantLoad( 3337 cast<LoadInst>(BasePtrMA->getAccessInstruction())); 3338 } 3339 3340 // scop->getAliasGroups().emplace_back(); 3341 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back(); 3342 Scop::MinMaxVectorTy MinMaxAccessesReadWrite; 3343 Scop::MinMaxVectorTy MinMaxAccessesReadOnly; 3344 3345 bool Valid; 3346 3347 Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite); 3348 3349 if (!Valid) 3350 return false; 3351 3352 // Bail out if the number of values we need to compare is too large. 3353 // This is important as the number of comparisons grows quadratically with 3354 // the number of values we need to compare. 3355 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() > 3356 RunTimeChecksMaxArraysPerGroup) 3357 return false; 3358 3359 Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly); 3360 3361 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly); 3362 if (!Valid) 3363 return false; 3364 3365 return true; 3366 } 3367 3368 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) { 3369 for (unsigned u = 0; u < AliasGroups.size(); u++) { 3370 AliasGroupTy NewAG; 3371 AliasGroupTy &AG = AliasGroups[u]; 3372 AliasGroupTy::iterator AGI = AG.begin(); 3373 isl::set AGDomain = getAccessDomain(*AGI); 3374 while (AGI != AG.end()) { 3375 MemoryAccess *MA = *AGI; 3376 isl::set MADomain = getAccessDomain(MA); 3377 if (AGDomain.is_disjoint(MADomain)) { 3378 NewAG.push_back(MA); 3379 AGI = AG.erase(AGI); 3380 } else { 3381 AGDomain = AGDomain.unite(MADomain); 3382 AGI++; 3383 } 3384 } 3385 if (NewAG.size() > 1) 3386 AliasGroups.push_back(std::move(NewAG)); 3387 } 3388 } 3389 3390 #ifndef NDEBUG 3391 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) { 3392 auto PhysUse = VirtualUse::create(S, Op, &LI, false); 3393 auto VirtUse = VirtualUse::create(S, Op, &LI, true); 3394 assert(PhysUse.getKind() == VirtUse.getKind()); 3395 } 3396 3397 /// Check the consistency of every statement's MemoryAccesses. 3398 /// 3399 /// The check is carried out by expecting the "physical" kind of use (derived 3400 /// from the BasicBlocks instructions resides in) to be same as the "virtual" 3401 /// kind of use (derived from a statement's MemoryAccess). 3402 /// 3403 /// The "physical" uses are taken by ensureValueRead to determine whether to 3404 /// create MemoryAccesses. When done, the kind of scalar access should be the 3405 /// same no matter which way it was derived. 3406 /// 3407 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence 3408 /// can intentionally influence on the kind of uses (not corresponding to the 3409 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has 3410 /// to pick up the virtual uses. But here in the code generator, this has not 3411 /// happened yet, such that virtual and physical uses are equivalent. 3412 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) { 3413 for (auto *BB : S->getRegion().blocks()) { 3414 for (auto &Inst : *BB) { 3415 auto *Stmt = S->getStmtFor(&Inst); 3416 if (!Stmt) 3417 continue; 3418 3419 if (isIgnoredIntrinsic(&Inst)) 3420 continue; 3421 3422 // Branch conditions are encoded in the statement domains. 3423 if (Inst.isTerminator() && Stmt->isBlockStmt()) 3424 continue; 3425 3426 // Verify all uses. 3427 for (auto &Op : Inst.operands()) 3428 verifyUse(S, Op, LI); 3429 3430 // Stores do not produce values used by other statements. 3431 if (isa<StoreInst>(Inst)) 3432 continue; 3433 3434 // For every value defined in the block, also check that a use of that 3435 // value in the same statement would not be an inter-statement use. It can 3436 // still be synthesizable or load-hoisted, but these kind of instructions 3437 // are not directly copied in code-generation. 3438 auto VirtDef = 3439 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true); 3440 assert(VirtDef.getKind() == VirtualUse::Synthesizable || 3441 VirtDef.getKind() == VirtualUse::Intra || 3442 VirtDef.getKind() == VirtualUse::Hoisted); 3443 } 3444 } 3445 3446 if (S->hasSingleExitEdge()) 3447 return; 3448 3449 // PHINodes in the SCoP region's exit block are also uses to be checked. 3450 if (!S->getRegion().isTopLevelRegion()) { 3451 for (auto &Inst : *S->getRegion().getExit()) { 3452 if (!isa<PHINode>(Inst)) 3453 break; 3454 3455 for (auto &Op : Inst.operands()) 3456 verifyUse(S, Op, LI); 3457 } 3458 } 3459 } 3460 #endif 3461 3462 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) { 3463 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE, 3464 SD.getNextID())); 3465 3466 buildStmts(R); 3467 3468 // Create all invariant load instructions first. These are categorized as 3469 // 'synthesizable', therefore are not part of any ScopStmt but need to be 3470 // created somewhere. 3471 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); 3472 for (BasicBlock *BB : scop->getRegion().blocks()) { 3473 if (SD.isErrorBlock(*BB, scop->getRegion())) 3474 continue; 3475 3476 for (Instruction &Inst : *BB) { 3477 LoadInst *Load = dyn_cast<LoadInst>(&Inst); 3478 if (!Load) 3479 continue; 3480 3481 if (!RIL.count(Load)) 3482 continue; 3483 3484 // Invariant loads require a MemoryAccess to be created in some statement. 3485 // It is not important to which statement the MemoryAccess is added 3486 // because it will later be removed from the ScopStmt again. We chose the 3487 // first statement of the basic block the LoadInst is in. 3488 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB); 3489 assert(!List.empty()); 3490 ScopStmt *RILStmt = List.front(); 3491 buildMemoryAccess(Load, RILStmt); 3492 } 3493 } 3494 buildAccessFunctions(); 3495 3496 // In case the region does not have an exiting block we will later (during 3497 // code generation) split the exit block. This will move potential PHI nodes 3498 // from the current exit block into the new region exiting block. Hence, PHI 3499 // nodes that are at this point not part of the region will be. 3500 // To handle these PHI nodes later we will now model their operands as scalar 3501 // accesses. Note that we do not model anything in the exit block if we have 3502 // an exiting block in the region, as there will not be any splitting later. 3503 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) { 3504 for (Instruction &Inst : *R.getExit()) { 3505 PHINode *PHI = dyn_cast<PHINode>(&Inst); 3506 if (!PHI) 3507 break; 3508 3509 buildPHIAccesses(nullptr, PHI, nullptr, true); 3510 } 3511 } 3512 3513 // Create memory accesses for global reads since all arrays are now known. 3514 auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0); 3515 for (auto GlobalReadPair : GlobalReads) { 3516 ScopStmt *GlobalReadStmt = GlobalReadPair.first; 3517 Instruction *GlobalRead = GlobalReadPair.second; 3518 for (auto *BP : ArrayBasePointers) 3519 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, 3520 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); 3521 } 3522 3523 buildInvariantEquivalenceClasses(); 3524 3525 /// A map from basic blocks to their invalid domains. 3526 DenseMap<BasicBlock *, isl::set> InvalidDomainMap; 3527 3528 if (!buildDomains(&R, InvalidDomainMap)) { 3529 LLVM_DEBUG( 3530 dbgs() << "Bailing-out because buildDomains encountered problems\n"); 3531 return; 3532 } 3533 3534 addUserAssumptions(AC, InvalidDomainMap); 3535 3536 // Initialize the invalid domain. 3537 for (ScopStmt &Stmt : scop->Stmts) 3538 if (Stmt.isBlockStmt()) 3539 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]); 3540 else 3541 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock( 3542 Stmt.getRegion()->getNode())]); 3543 3544 // Remove empty statements. 3545 // Exit early in case there are no executable statements left in this scop. 3546 scop->removeStmtNotInDomainMap(); 3547 scop->simplifySCoP(false); 3548 if (scop->isEmpty()) { 3549 LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n"); 3550 return; 3551 } 3552 3553 // The ScopStmts now have enough information to initialize themselves. 3554 for (ScopStmt &Stmt : *scop) { 3555 collectSurroundingLoops(Stmt); 3556 3557 buildDomain(Stmt); 3558 buildAccessRelations(Stmt); 3559 3560 if (DetectReductions) 3561 checkForReductions(Stmt); 3562 } 3563 3564 // Check early for a feasible runtime context. 3565 if (!scop->hasFeasibleRuntimeContext()) { 3566 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n"); 3567 return; 3568 } 3569 3570 // Check early for profitability. Afterwards it cannot change anymore, 3571 // only the runtime context could become infeasible. 3572 if (!scop->isProfitable(UnprofitableScalarAccs)) { 3573 scop->invalidate(PROFITABLE, DebugLoc()); 3574 LLVM_DEBUG( 3575 dbgs() << "Bailing-out because SCoP is not considered profitable\n"); 3576 return; 3577 } 3578 3579 buildSchedule(); 3580 3581 finalizeAccesses(); 3582 3583 scop->realignParams(); 3584 addUserContext(); 3585 3586 // After the context was fully constructed, thus all our knowledge about 3587 // the parameters is in there, we add all recorded assumptions to the 3588 // assumed/invalid context. 3589 addRecordedAssumptions(); 3590 3591 scop->simplifyContexts(); 3592 if (!buildAliasChecks()) { 3593 LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n"); 3594 return; 3595 } 3596 3597 hoistInvariantLoads(); 3598 canonicalizeDynamicBasePtrs(); 3599 verifyInvariantLoads(); 3600 scop->simplifySCoP(true); 3601 3602 // Check late for a feasible runtime context because profitability did not 3603 // change. 3604 if (!scop->hasFeasibleRuntimeContext()) { 3605 LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n"); 3606 return; 3607 } 3608 3609 #ifndef NDEBUG 3610 verifyUses(scop.get(), LI, DT); 3611 #endif 3612 } 3613 3614 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA, 3615 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, 3616 ScopDetection &SD, ScalarEvolution &SE, 3617 OptimizationRemarkEmitter &ORE) 3618 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) { 3619 DebugLoc Beg, End; 3620 auto P = getBBPairForRegion(R); 3621 getDebugLocations(P, Beg, End); 3622 3623 std::string Msg = "SCoP begins here."; 3624 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first) 3625 << Msg); 3626 3627 buildScop(*R, AC); 3628 3629 LLVM_DEBUG(dbgs() << *scop); 3630 3631 if (!scop->hasFeasibleRuntimeContext()) { 3632 InfeasibleScops++; 3633 Msg = "SCoP ends here but was dismissed."; 3634 LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n"); 3635 RecordedAssumptions.clear(); 3636 scop.reset(); 3637 } else { 3638 Msg = "SCoP ends here."; 3639 ++ScopFound; 3640 if (scop->getMaxLoopDepth() > 0) 3641 ++RichScopFound; 3642 } 3643 3644 if (R->isTopLevelRegion()) 3645 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first) 3646 << Msg); 3647 else 3648 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second) 3649 << Msg); 3650 } 3651