1 //===--------- ScopInfo.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Create a polyhedral description for a static control flow region. 11 // 12 // The pass creates a polyhedral description of the Scops detected by the Scop 13 // detection derived from their LLVM-IR code. 14 // 15 // This representation is shared among several tools in the polyhedral 16 // community, which are e.g. Cloog, Pluto, Loopo, Graphite. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "polly/ScopInfo.h" 21 #include "polly/LinkAllPasses.h" 22 #include "polly/Options.h" 23 #include "polly/ScopBuilder.h" 24 #include "polly/Support/GICHelper.h" 25 #include "polly/Support/SCEVValidator.h" 26 #include "polly/Support/ScopHelper.h" 27 #include "llvm/ADT/DepthFirstIterator.h" 28 #include "llvm/ADT/MapVector.h" 29 #include "llvm/ADT/PostOrderIterator.h" 30 #include "llvm/ADT/STLExtras.h" 31 #include "llvm/ADT/SetVector.h" 32 #include "llvm/ADT/SmallSet.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/ADT/StringExtras.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/Loads.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/LoopIterator.h" 39 #include "llvm/Analysis/RegionIterator.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/IR/DiagnosticInfo.h" 42 #include "llvm/Support/Debug.h" 43 #include "isl/aff.h" 44 #include "isl/constraint.h" 45 #include "isl/local_space.h" 46 #include "isl/map.h" 47 #include "isl/options.h" 48 #include "isl/printer.h" 49 #include "isl/schedule.h" 50 #include "isl/schedule_node.h" 51 #include "isl/set.h" 52 #include "isl/union_map.h" 53 #include "isl/union_set.h" 54 #include "isl/val.h" 55 #include <sstream> 56 #include <string> 57 #include <vector> 58 59 using namespace llvm; 60 using namespace polly; 61 62 #define DEBUG_TYPE "polly-scops" 63 64 STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken."); 65 STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken."); 66 STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken."); 67 STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken."); 68 STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs."); 69 STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs."); 70 STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken."); 71 STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken."); 72 STATISTIC(AssumptionsInvariantLoad, 73 "Number of invariant loads assumptions taken."); 74 STATISTIC(AssumptionsDelinearization, 75 "Number of delinearization assumptions taken."); 76 77 // The maximal number of basic sets we allow during domain construction to 78 // be created. More complex scops will result in very high compile time and 79 // are also unlikely to result in good code 80 static int const MaxDisjunctionsInDomain = 20; 81 82 static cl::opt<bool> PollyRemarksMinimal( 83 "polly-remarks-minimal", 84 cl::desc("Do not emit remarks about assumptions that are known"), 85 cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); 86 87 // Multiplicative reductions can be disabled separately as these kind of 88 // operations can overflow easily. Additive reductions and bit operations 89 // are in contrast pretty stable. 90 static cl::opt<bool> DisableMultiplicativeReductions( 91 "polly-disable-multiplicative-reductions", 92 cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, 93 cl::init(false), cl::cat(PollyCategory)); 94 95 static cl::opt<unsigned> RunTimeChecksMaxParameters( 96 "polly-rtc-max-parameters", 97 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, 98 cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); 99 100 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup( 101 "polly-rtc-max-arrays-per-group", 102 cl::desc("The maximal number of arrays to compare in each alias group."), 103 cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); 104 105 static cl::opt<std::string> UserContextStr( 106 "polly-context", cl::value_desc("isl parameter set"), 107 cl::desc("Provide additional constraints on the context parameters"), 108 cl::init(""), cl::cat(PollyCategory)); 109 110 static cl::opt<bool> DetectReductions("polly-detect-reductions", 111 cl::desc("Detect and exploit reductions"), 112 cl::Hidden, cl::ZeroOrMore, 113 cl::init(true), cl::cat(PollyCategory)); 114 115 static cl::opt<bool> 116 IslOnErrorAbort("polly-on-isl-error-abort", 117 cl::desc("Abort if an isl error is encountered"), 118 cl::init(true), cl::cat(PollyCategory)); 119 120 static cl::opt<bool> UnprofitableScalarAccs( 121 "polly-unprofitable-scalar-accs", 122 cl::desc("Count statements with scalar accesses as not optimizable"), 123 cl::Hidden, cl::init(true), cl::cat(PollyCategory)); 124 125 //===----------------------------------------------------------------------===// 126 127 // Create a sequence of two schedules. Either argument may be null and is 128 // interpreted as the empty schedule. Can also return null if both schedules are 129 // empty. 130 static __isl_give isl_schedule * 131 combineInSequence(__isl_take isl_schedule *Prev, 132 __isl_take isl_schedule *Succ) { 133 if (!Prev) 134 return Succ; 135 if (!Succ) 136 return Prev; 137 138 return isl_schedule_sequence(Prev, Succ); 139 } 140 141 static __isl_give isl_set *addRangeBoundsToSet(__isl_take isl_set *S, 142 const ConstantRange &Range, 143 int dim, 144 enum isl_dim_type type) { 145 isl_val *V; 146 isl_ctx *ctx = isl_set_get_ctx(S); 147 148 bool useLowerUpperBound = Range.isSignWrappedSet() && !Range.isFullSet(); 149 const auto LB = useLowerUpperBound ? Range.getLower() : Range.getSignedMin(); 150 V = isl_valFromAPInt(ctx, LB, true); 151 isl_set *SLB = isl_set_lower_bound_val(isl_set_copy(S), type, dim, V); 152 153 const auto UB = useLowerUpperBound ? Range.getUpper() : Range.getSignedMax(); 154 V = isl_valFromAPInt(ctx, UB, true); 155 if (useLowerUpperBound) 156 V = isl_val_sub_ui(V, 1); 157 isl_set *SUB = isl_set_upper_bound_val(S, type, dim, V); 158 159 if (useLowerUpperBound) 160 return isl_set_union(SLB, SUB); 161 else 162 return isl_set_intersect(SLB, SUB); 163 } 164 165 static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) { 166 LoadInst *BasePtrLI = dyn_cast<LoadInst>(BasePtr); 167 if (!BasePtrLI) 168 return nullptr; 169 170 if (!S->contains(BasePtrLI)) 171 return nullptr; 172 173 ScalarEvolution &SE = *S->getSE(); 174 175 auto *OriginBaseSCEV = 176 SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand())); 177 if (!OriginBaseSCEV) 178 return nullptr; 179 180 auto *OriginBaseSCEVUnknown = dyn_cast<SCEVUnknown>(OriginBaseSCEV); 181 if (!OriginBaseSCEVUnknown) 182 return nullptr; 183 184 return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(), 185 ScopArrayInfo::MK_Array); 186 } 187 188 ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl_ctx *Ctx, 189 ArrayRef<const SCEV *> Sizes, enum MemoryKind Kind, 190 const DataLayout &DL, Scop *S, 191 const char *BaseName) 192 : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) { 193 std::string BasePtrName = 194 BaseName ? BaseName : getIslCompatibleName("MemRef_", BasePtr, 195 Kind == MK_PHI ? "__phi" : ""); 196 Id = isl_id_alloc(Ctx, BasePtrName.c_str(), this); 197 198 updateSizes(Sizes); 199 200 if (!BasePtr || Kind != MK_Array) { 201 BasePtrOriginSAI = nullptr; 202 return; 203 } 204 205 BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr); 206 if (BasePtrOriginSAI) 207 const_cast<ScopArrayInfo *>(BasePtrOriginSAI)->addDerivedSAI(this); 208 } 209 210 __isl_give isl_space *ScopArrayInfo::getSpace() const { 211 auto *Space = 212 isl_space_set_alloc(isl_id_get_ctx(Id), 0, getNumberOfDimensions()); 213 Space = isl_space_set_tuple_id(Space, isl_dim_set, isl_id_copy(Id)); 214 return Space; 215 } 216 217 bool ScopArrayInfo::isReadOnly() { 218 isl_union_set *WriteSet = isl_union_map_range(S.getWrites()); 219 isl_space *Space = getSpace(); 220 WriteSet = isl_union_set_intersect( 221 WriteSet, isl_union_set_from_set(isl_set_universe(Space))); 222 223 bool IsReadOnly = isl_union_set_is_empty(WriteSet); 224 isl_union_set_free(WriteSet); 225 226 return IsReadOnly; 227 } 228 229 void ScopArrayInfo::updateElementType(Type *NewElementType) { 230 if (NewElementType == ElementType) 231 return; 232 233 auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType); 234 auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType); 235 236 if (NewElementSize == OldElementSize || NewElementSize == 0) 237 return; 238 239 if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) { 240 ElementType = NewElementType; 241 } else { 242 auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize); 243 ElementType = IntegerType::get(ElementType->getContext(), GCD); 244 } 245 } 246 247 bool ScopArrayInfo::updateSizes(ArrayRef<const SCEV *> NewSizes, 248 bool CheckConsistency) { 249 int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); 250 int ExtraDimsNew = NewSizes.size() - SharedDims; 251 int ExtraDimsOld = DimensionSizes.size() - SharedDims; 252 253 if (CheckConsistency) { 254 for (int i = 0; i < SharedDims; i++) { 255 auto *NewSize = NewSizes[i + ExtraDimsNew]; 256 auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; 257 if (NewSize && KnownSize && NewSize != KnownSize) 258 return false; 259 } 260 261 if (DimensionSizes.size() >= NewSizes.size()) 262 return true; 263 } 264 265 DimensionSizes.clear(); 266 DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(), 267 NewSizes.end()); 268 for (isl_pw_aff *Size : DimensionSizesPw) 269 isl_pw_aff_free(Size); 270 DimensionSizesPw.clear(); 271 for (const SCEV *Expr : DimensionSizes) { 272 if (!Expr) { 273 DimensionSizesPw.push_back(nullptr); 274 continue; 275 } 276 isl_pw_aff *Size = S.getPwAffOnly(Expr); 277 DimensionSizesPw.push_back(Size); 278 } 279 return true; 280 } 281 282 ScopArrayInfo::~ScopArrayInfo() { 283 isl_id_free(Id); 284 for (isl_pw_aff *Size : DimensionSizesPw) 285 isl_pw_aff_free(Size); 286 } 287 288 std::string ScopArrayInfo::getName() const { return isl_id_get_name(Id); } 289 290 int ScopArrayInfo::getElemSizeInBytes() const { 291 return DL.getTypeAllocSize(ElementType); 292 } 293 294 __isl_give isl_id *ScopArrayInfo::getBasePtrId() const { 295 return isl_id_copy(Id); 296 } 297 298 void ScopArrayInfo::dump() const { print(errs()); } 299 300 void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const { 301 OS.indent(8) << *getElementType() << " " << getName(); 302 unsigned u = 0; 303 if (getNumberOfDimensions() > 0 && !getDimensionSize(0)) { 304 OS << "[*]"; 305 u++; 306 } 307 for (; u < getNumberOfDimensions(); u++) { 308 OS << "["; 309 310 if (SizeAsPwAff) { 311 auto *Size = getDimensionSizePw(u); 312 OS << " " << Size << " "; 313 isl_pw_aff_free(Size); 314 } else { 315 OS << *getDimensionSize(u); 316 } 317 318 OS << "]"; 319 } 320 321 OS << ";"; 322 323 if (BasePtrOriginSAI) 324 OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]"; 325 326 OS << " // Element size " << getElemSizeInBytes() << "\n"; 327 } 328 329 const ScopArrayInfo * 330 ScopArrayInfo::getFromAccessFunction(__isl_keep isl_pw_multi_aff *PMA) { 331 isl_id *Id = isl_pw_multi_aff_get_tuple_id(PMA, isl_dim_out); 332 assert(Id && "Output dimension didn't have an ID"); 333 return getFromId(Id); 334 } 335 336 const ScopArrayInfo *ScopArrayInfo::getFromId(__isl_take isl_id *Id) { 337 void *User = isl_id_get_user(Id); 338 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User); 339 isl_id_free(Id); 340 return SAI; 341 } 342 343 void MemoryAccess::wrapConstantDimensions() { 344 auto *SAI = getScopArrayInfo(); 345 auto *ArraySpace = SAI->getSpace(); 346 auto *Ctx = isl_space_get_ctx(ArraySpace); 347 unsigned DimsArray = SAI->getNumberOfDimensions(); 348 349 auto *DivModAff = isl_multi_aff_identity(isl_space_map_from_domain_and_range( 350 isl_space_copy(ArraySpace), isl_space_copy(ArraySpace))); 351 auto *LArraySpace = isl_local_space_from_space(ArraySpace); 352 353 // Begin with last dimension, to iteratively carry into higher dimensions. 354 for (int i = DimsArray - 1; i > 0; i--) { 355 auto *DimSize = SAI->getDimensionSize(i); 356 auto *DimSizeCst = dyn_cast<SCEVConstant>(DimSize); 357 358 // This transformation is not applicable to dimensions with dynamic size. 359 if (!DimSizeCst) 360 continue; 361 362 auto *DimSizeVal = isl_valFromAPInt(Ctx, DimSizeCst->getAPInt(), false); 363 auto *Var = isl_aff_var_on_domain(isl_local_space_copy(LArraySpace), 364 isl_dim_set, i); 365 auto *PrevVar = isl_aff_var_on_domain(isl_local_space_copy(LArraySpace), 366 isl_dim_set, i - 1); 367 368 // Compute: index % size 369 // Modulo must apply in the divide of the previous iteration, if any. 370 auto *Modulo = isl_aff_copy(Var); 371 Modulo = isl_aff_mod_val(Modulo, isl_val_copy(DimSizeVal)); 372 Modulo = isl_aff_pullback_multi_aff(Modulo, isl_multi_aff_copy(DivModAff)); 373 374 // Compute: floor(index / size) 375 auto *Divide = Var; 376 Divide = isl_aff_div( 377 Divide, 378 isl_aff_val_on_domain(isl_local_space_copy(LArraySpace), DimSizeVal)); 379 Divide = isl_aff_floor(Divide); 380 Divide = isl_aff_add(Divide, PrevVar); 381 Divide = isl_aff_pullback_multi_aff(Divide, isl_multi_aff_copy(DivModAff)); 382 383 // Apply Modulo and Divide. 384 DivModAff = isl_multi_aff_set_aff(DivModAff, i, Modulo); 385 DivModAff = isl_multi_aff_set_aff(DivModAff, i - 1, Divide); 386 } 387 388 // Apply all modulo/divides on the accesses. 389 AccessRelation = 390 isl_map_apply_range(AccessRelation, isl_map_from_multi_aff(DivModAff)); 391 AccessRelation = isl_map_detect_equalities(AccessRelation); 392 isl_local_space_free(LArraySpace); 393 } 394 395 void MemoryAccess::updateDimensionality() { 396 auto *SAI = getScopArrayInfo(); 397 auto *ArraySpace = SAI->getSpace(); 398 auto *AccessSpace = isl_space_range(isl_map_get_space(AccessRelation)); 399 auto *Ctx = isl_space_get_ctx(AccessSpace); 400 401 auto DimsArray = isl_space_dim(ArraySpace, isl_dim_set); 402 auto DimsAccess = isl_space_dim(AccessSpace, isl_dim_set); 403 auto DimsMissing = DimsArray - DimsAccess; 404 405 auto *BB = getStatement()->getEntryBlock(); 406 auto &DL = BB->getModule()->getDataLayout(); 407 unsigned ArrayElemSize = SAI->getElemSizeInBytes(); 408 unsigned ElemBytes = DL.getTypeAllocSize(getElementType()); 409 410 auto *Map = isl_map_from_domain_and_range( 411 isl_set_universe(AccessSpace), 412 isl_set_universe(isl_space_copy(ArraySpace))); 413 414 for (unsigned i = 0; i < DimsMissing; i++) 415 Map = isl_map_fix_si(Map, isl_dim_out, i, 0); 416 417 for (unsigned i = DimsMissing; i < DimsArray; i++) 418 Map = isl_map_equate(Map, isl_dim_in, i - DimsMissing, isl_dim_out, i); 419 420 AccessRelation = isl_map_apply_range(AccessRelation, Map); 421 422 // For the non delinearized arrays, divide the access function of the last 423 // subscript by the size of the elements in the array. 424 // 425 // A stride one array access in C expressed as A[i] is expressed in 426 // LLVM-IR as something like A[i * elementsize]. This hides the fact that 427 // two subsequent values of 'i' index two values that are stored next to 428 // each other in memory. By this division we make this characteristic 429 // obvious again. If the base pointer was accessed with offsets not divisible 430 // by the accesses element size, we will have chosen a smaller ArrayElemSize 431 // that divides the offsets of all accesses to this base pointer. 432 if (DimsAccess == 1) { 433 isl_val *V = isl_val_int_from_si(Ctx, ArrayElemSize); 434 AccessRelation = isl_map_floordiv_val(AccessRelation, V); 435 } 436 437 // We currently do this only if we added at least one dimension, which means 438 // some dimension's indices have not been specified, an indicator that some 439 // index values have been added together. 440 // TODO: Investigate general usefulness; Effect on unit tests is to make index 441 // expressions more complicated. 442 if (DimsMissing) 443 wrapConstantDimensions(); 444 445 if (!isAffine()) 446 computeBoundsOnAccessRelation(ArrayElemSize); 447 448 // Introduce multi-element accesses in case the type loaded by this memory 449 // access is larger than the canonical element type of the array. 450 // 451 // An access ((float *)A)[i] to an array char *A is modeled as 452 // {[i] -> A[o] : 4 i <= o <= 4 i + 3 453 if (ElemBytes > ArrayElemSize) { 454 assert(ElemBytes % ArrayElemSize == 0 && 455 "Loaded element size should be multiple of canonical element size"); 456 auto *Map = isl_map_from_domain_and_range( 457 isl_set_universe(isl_space_copy(ArraySpace)), 458 isl_set_universe(isl_space_copy(ArraySpace))); 459 for (unsigned i = 0; i < DimsArray - 1; i++) 460 Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); 461 462 isl_constraint *C; 463 isl_local_space *LS; 464 465 LS = isl_local_space_from_space(isl_map_get_space(Map)); 466 int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes(); 467 468 C = isl_constraint_alloc_inequality(isl_local_space_copy(LS)); 469 C = isl_constraint_set_constant_val(C, isl_val_int_from_si(Ctx, Num - 1)); 470 C = isl_constraint_set_coefficient_si(C, isl_dim_in, DimsArray - 1, 1); 471 C = isl_constraint_set_coefficient_si(C, isl_dim_out, DimsArray - 1, -1); 472 Map = isl_map_add_constraint(Map, C); 473 474 C = isl_constraint_alloc_inequality(LS); 475 C = isl_constraint_set_coefficient_si(C, isl_dim_in, DimsArray - 1, -1); 476 C = isl_constraint_set_coefficient_si(C, isl_dim_out, DimsArray - 1, 1); 477 C = isl_constraint_set_constant_val(C, isl_val_int_from_si(Ctx, 0)); 478 Map = isl_map_add_constraint(Map, C); 479 AccessRelation = isl_map_apply_range(AccessRelation, Map); 480 } 481 482 isl_space_free(ArraySpace); 483 } 484 485 const std::string 486 MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) { 487 switch (RT) { 488 case MemoryAccess::RT_NONE: 489 llvm_unreachable("Requested a reduction operator string for a memory " 490 "access which isn't a reduction"); 491 case MemoryAccess::RT_ADD: 492 return "+"; 493 case MemoryAccess::RT_MUL: 494 return "*"; 495 case MemoryAccess::RT_BOR: 496 return "|"; 497 case MemoryAccess::RT_BXOR: 498 return "^"; 499 case MemoryAccess::RT_BAND: 500 return "&"; 501 } 502 llvm_unreachable("Unknown reduction type"); 503 return ""; 504 } 505 506 /// Return the reduction type for a given binary operator. 507 static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp, 508 const Instruction *Load) { 509 if (!BinOp) 510 return MemoryAccess::RT_NONE; 511 switch (BinOp->getOpcode()) { 512 case Instruction::FAdd: 513 if (!BinOp->hasUnsafeAlgebra()) 514 return MemoryAccess::RT_NONE; 515 // Fall through 516 case Instruction::Add: 517 return MemoryAccess::RT_ADD; 518 case Instruction::Or: 519 return MemoryAccess::RT_BOR; 520 case Instruction::Xor: 521 return MemoryAccess::RT_BXOR; 522 case Instruction::And: 523 return MemoryAccess::RT_BAND; 524 case Instruction::FMul: 525 if (!BinOp->hasUnsafeAlgebra()) 526 return MemoryAccess::RT_NONE; 527 // Fall through 528 case Instruction::Mul: 529 if (DisableMultiplicativeReductions) 530 return MemoryAccess::RT_NONE; 531 return MemoryAccess::RT_MUL; 532 default: 533 return MemoryAccess::RT_NONE; 534 } 535 } 536 537 MemoryAccess::~MemoryAccess() { 538 isl_id_free(Id); 539 isl_set_free(InvalidDomain); 540 isl_map_free(AccessRelation); 541 isl_map_free(NewAccessRelation); 542 } 543 544 const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const { 545 isl_id *ArrayId = getArrayId(); 546 void *User = isl_id_get_user(ArrayId); 547 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User); 548 isl_id_free(ArrayId); 549 return SAI; 550 } 551 552 const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const { 553 isl_id *ArrayId = getLatestArrayId(); 554 void *User = isl_id_get_user(ArrayId); 555 const ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(User); 556 isl_id_free(ArrayId); 557 return SAI; 558 } 559 560 __isl_give isl_id *MemoryAccess::getOriginalArrayId() const { 561 return isl_map_get_tuple_id(AccessRelation, isl_dim_out); 562 } 563 564 __isl_give isl_id *MemoryAccess::getLatestArrayId() const { 565 if (!hasNewAccessRelation()) 566 return getOriginalArrayId(); 567 return isl_map_get_tuple_id(NewAccessRelation, isl_dim_out); 568 } 569 570 __isl_give isl_map *MemoryAccess::getAddressFunction() const { 571 return isl_map_lexmin(getAccessRelation()); 572 } 573 574 __isl_give isl_pw_multi_aff *MemoryAccess::applyScheduleToAccessRelation( 575 __isl_take isl_union_map *USchedule) const { 576 isl_map *Schedule, *ScheduledAccRel; 577 isl_union_set *UDomain; 578 579 UDomain = isl_union_set_from_set(getStatement()->getDomain()); 580 USchedule = isl_union_map_intersect_domain(USchedule, UDomain); 581 Schedule = isl_map_from_union_map(USchedule); 582 ScheduledAccRel = isl_map_apply_domain(getAddressFunction(), Schedule); 583 return isl_pw_multi_aff_from_map(ScheduledAccRel); 584 } 585 586 __isl_give isl_map *MemoryAccess::getOriginalAccessRelation() const { 587 return isl_map_copy(AccessRelation); 588 } 589 590 std::string MemoryAccess::getOriginalAccessRelationStr() const { 591 return stringFromIslObj(AccessRelation); 592 } 593 594 __isl_give isl_space *MemoryAccess::getOriginalAccessRelationSpace() const { 595 return isl_map_get_space(AccessRelation); 596 } 597 598 __isl_give isl_map *MemoryAccess::getNewAccessRelation() const { 599 return isl_map_copy(NewAccessRelation); 600 } 601 602 std::string MemoryAccess::getNewAccessRelationStr() const { 603 return stringFromIslObj(NewAccessRelation); 604 } 605 606 __isl_give isl_basic_map * 607 MemoryAccess::createBasicAccessMap(ScopStmt *Statement) { 608 isl_space *Space = isl_space_set_alloc(Statement->getIslCtx(), 0, 1); 609 Space = isl_space_align_params(Space, Statement->getDomainSpace()); 610 611 return isl_basic_map_from_domain_and_range( 612 isl_basic_set_universe(Statement->getDomainSpace()), 613 isl_basic_set_universe(Space)); 614 } 615 616 // Formalize no out-of-bound access assumption 617 // 618 // When delinearizing array accesses we optimistically assume that the 619 // delinearized accesses do not access out of bound locations (the subscript 620 // expression of each array evaluates for each statement instance that is 621 // executed to a value that is larger than zero and strictly smaller than the 622 // size of the corresponding dimension). The only exception is the outermost 623 // dimension for which we do not need to assume any upper bound. At this point 624 // we formalize this assumption to ensure that at code generation time the 625 // relevant run-time checks can be generated. 626 // 627 // To find the set of constraints necessary to avoid out of bound accesses, we 628 // first build the set of data locations that are not within array bounds. We 629 // then apply the reverse access relation to obtain the set of iterations that 630 // may contain invalid accesses and reduce this set of iterations to the ones 631 // that are actually executed by intersecting them with the domain of the 632 // statement. If we now project out all loop dimensions, we obtain a set of 633 // parameters that may cause statement instances to be executed that may 634 // possibly yield out of bound memory accesses. The complement of these 635 // constraints is the set of constraints that needs to be assumed to ensure such 636 // statement instances are never executed. 637 void MemoryAccess::assumeNoOutOfBound() { 638 auto *SAI = getScopArrayInfo(); 639 isl_space *Space = isl_space_range(getOriginalAccessRelationSpace()); 640 isl_set *Outside = isl_set_empty(isl_space_copy(Space)); 641 for (int i = 1, Size = isl_space_dim(Space, isl_dim_set); i < Size; ++i) { 642 isl_local_space *LS = isl_local_space_from_space(isl_space_copy(Space)); 643 isl_pw_aff *Var = 644 isl_pw_aff_var_on_domain(isl_local_space_copy(LS), isl_dim_set, i); 645 isl_pw_aff *Zero = isl_pw_aff_zero_on_domain(LS); 646 647 isl_set *DimOutside; 648 649 DimOutside = isl_pw_aff_lt_set(isl_pw_aff_copy(Var), Zero); 650 isl_pw_aff *SizeE = SAI->getDimensionSizePw(i); 651 SizeE = isl_pw_aff_add_dims(SizeE, isl_dim_in, 652 isl_space_dim(Space, isl_dim_set)); 653 SizeE = isl_pw_aff_set_tuple_id(SizeE, isl_dim_in, 654 isl_space_get_tuple_id(Space, isl_dim_set)); 655 656 DimOutside = isl_set_union(DimOutside, isl_pw_aff_le_set(SizeE, Var)); 657 658 Outside = isl_set_union(Outside, DimOutside); 659 } 660 661 Outside = isl_set_apply(Outside, isl_map_reverse(getAccessRelation())); 662 Outside = isl_set_intersect(Outside, Statement->getDomain()); 663 Outside = isl_set_params(Outside); 664 665 // Remove divs to avoid the construction of overly complicated assumptions. 666 // Doing so increases the set of parameter combinations that are assumed to 667 // not appear. This is always save, but may make the resulting run-time check 668 // bail out more often than strictly necessary. 669 Outside = isl_set_remove_divs(Outside); 670 Outside = isl_set_complement(Outside); 671 const auto &Loc = getAccessInstruction() 672 ? getAccessInstruction()->getDebugLoc() 673 : DebugLoc(); 674 Statement->getParent()->recordAssumption(INBOUNDS, Outside, Loc, 675 AS_ASSUMPTION); 676 isl_space_free(Space); 677 } 678 679 void MemoryAccess::buildMemIntrinsicAccessRelation() { 680 assert(isMemoryIntrinsic()); 681 assert(Subscripts.size() == 2 && Sizes.size() == 1); 682 683 auto *SubscriptPWA = getPwAff(Subscripts[0]); 684 auto *SubscriptMap = isl_map_from_pw_aff(SubscriptPWA); 685 686 isl_map *LengthMap; 687 if (Subscripts[1] == nullptr) { 688 LengthMap = isl_map_universe(isl_map_get_space(SubscriptMap)); 689 } else { 690 auto *LengthPWA = getPwAff(Subscripts[1]); 691 LengthMap = isl_map_from_pw_aff(LengthPWA); 692 auto *RangeSpace = isl_space_range(isl_map_get_space(LengthMap)); 693 LengthMap = isl_map_apply_range(LengthMap, isl_map_lex_gt(RangeSpace)); 694 } 695 LengthMap = isl_map_lower_bound_si(LengthMap, isl_dim_out, 0, 0); 696 LengthMap = isl_map_align_params(LengthMap, isl_map_get_space(SubscriptMap)); 697 SubscriptMap = 698 isl_map_align_params(SubscriptMap, isl_map_get_space(LengthMap)); 699 LengthMap = isl_map_sum(LengthMap, SubscriptMap); 700 AccessRelation = isl_map_set_tuple_id(LengthMap, isl_dim_in, 701 getStatement()->getDomainId()); 702 } 703 704 void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) { 705 ScalarEvolution *SE = Statement->getParent()->getSE(); 706 707 auto MAI = MemAccInst(getAccessInstruction()); 708 if (isa<MemIntrinsic>(MAI)) 709 return; 710 711 Value *Ptr = MAI.getPointerOperand(); 712 if (!Ptr || !SE->isSCEVable(Ptr->getType())) 713 return; 714 715 auto *PtrSCEV = SE->getSCEV(Ptr); 716 if (isa<SCEVCouldNotCompute>(PtrSCEV)) 717 return; 718 719 auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV); 720 if (BasePtrSCEV && !isa<SCEVCouldNotCompute>(BasePtrSCEV)) 721 PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV); 722 723 const ConstantRange &Range = SE->getSignedRange(PtrSCEV); 724 if (Range.isFullSet()) 725 return; 726 727 bool isWrapping = Range.isSignWrappedSet(); 728 unsigned BW = Range.getBitWidth(); 729 const auto One = APInt(BW, 1); 730 const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin(); 731 const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax(); 732 733 auto Min = LB.sdiv(APInt(BW, ElementSize)); 734 auto Max = UB.sdiv(APInt(BW, ElementSize)) + One; 735 736 isl_set *AccessRange = isl_map_range(isl_map_copy(AccessRelation)); 737 AccessRange = 738 addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0, isl_dim_set); 739 AccessRelation = isl_map_intersect_range(AccessRelation, AccessRange); 740 } 741 742 void MemoryAccess::foldAccessRelation() { 743 if (Sizes.size() < 2 || isa<SCEVConstant>(Sizes[1])) 744 return; 745 746 int Size = Subscripts.size(); 747 748 for (int i = Size - 2; i >= 0; --i) { 749 isl_space *Space; 750 isl_map *MapOne, *MapTwo; 751 isl_pw_aff *DimSize = getPwAff(Sizes[i + 1]); 752 753 isl_space *SpaceSize = isl_pw_aff_get_space(DimSize); 754 isl_pw_aff_free(DimSize); 755 isl_id *ParamId = isl_space_get_dim_id(SpaceSize, isl_dim_param, 0); 756 757 Space = isl_map_get_space(AccessRelation); 758 Space = isl_space_map_from_set(isl_space_range(Space)); 759 Space = isl_space_align_params(Space, SpaceSize); 760 761 int ParamLocation = isl_space_find_dim_by_id(Space, isl_dim_param, ParamId); 762 isl_id_free(ParamId); 763 764 MapOne = isl_map_universe(isl_space_copy(Space)); 765 for (int j = 0; j < Size; ++j) 766 MapOne = isl_map_equate(MapOne, isl_dim_in, j, isl_dim_out, j); 767 MapOne = isl_map_lower_bound_si(MapOne, isl_dim_in, i + 1, 0); 768 769 MapTwo = isl_map_universe(isl_space_copy(Space)); 770 for (int j = 0; j < Size; ++j) 771 if (j < i || j > i + 1) 772 MapTwo = isl_map_equate(MapTwo, isl_dim_in, j, isl_dim_out, j); 773 774 isl_local_space *LS = isl_local_space_from_space(Space); 775 isl_constraint *C; 776 C = isl_equality_alloc(isl_local_space_copy(LS)); 777 C = isl_constraint_set_constant_si(C, -1); 778 C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, 1); 779 C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, -1); 780 MapTwo = isl_map_add_constraint(MapTwo, C); 781 C = isl_equality_alloc(LS); 782 C = isl_constraint_set_coefficient_si(C, isl_dim_in, i + 1, 1); 783 C = isl_constraint_set_coefficient_si(C, isl_dim_out, i + 1, -1); 784 C = isl_constraint_set_coefficient_si(C, isl_dim_param, ParamLocation, 1); 785 MapTwo = isl_map_add_constraint(MapTwo, C); 786 MapTwo = isl_map_upper_bound_si(MapTwo, isl_dim_in, i + 1, -1); 787 788 MapOne = isl_map_union(MapOne, MapTwo); 789 AccessRelation = isl_map_apply_range(AccessRelation, MapOne); 790 } 791 792 isl_id *BaseAddrId = getScopArrayInfo()->getBasePtrId(); 793 auto Space = Statement->getDomainSpace(); 794 AccessRelation = isl_map_set_tuple_id( 795 AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set)); 796 AccessRelation = 797 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); 798 AccessRelation = isl_map_gist_domain(AccessRelation, Statement->getDomain()); 799 isl_space_free(Space); 800 } 801 802 /// Check if @p Expr is divisible by @p Size. 803 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) { 804 assert(Size != 0); 805 if (Size == 1) 806 return true; 807 808 // Only one factor needs to be divisible. 809 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) { 810 for (auto *FactorExpr : MulExpr->operands()) 811 if (isDivisible(FactorExpr, Size, SE)) 812 return true; 813 return false; 814 } 815 816 // For other n-ary expressions (Add, AddRec, Max,...) all operands need 817 // to be divisble. 818 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) { 819 for (auto *OpExpr : NAryExpr->operands()) 820 if (!isDivisible(OpExpr, Size, SE)) 821 return false; 822 return true; 823 } 824 825 auto *SizeSCEV = SE.getConstant(Expr->getType(), Size); 826 auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV); 827 auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV); 828 return MulSCEV == Expr; 829 } 830 831 void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) { 832 assert(!AccessRelation && "AccessReltation already built"); 833 834 // Initialize the invalid domain which describes all iterations for which the 835 // access relation is not modeled correctly. 836 auto *StmtInvalidDomain = getStatement()->getInvalidDomain(); 837 InvalidDomain = isl_set_empty(isl_set_get_space(StmtInvalidDomain)); 838 isl_set_free(StmtInvalidDomain); 839 840 isl_ctx *Ctx = isl_id_get_ctx(Id); 841 isl_id *BaseAddrId = SAI->getBasePtrId(); 842 843 if (getAccessInstruction() && isa<MemIntrinsic>(getAccessInstruction())) { 844 buildMemIntrinsicAccessRelation(); 845 AccessRelation = 846 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); 847 return; 848 } 849 850 if (!isAffine()) { 851 // We overapproximate non-affine accesses with a possible access to the 852 // whole array. For read accesses it does not make a difference, if an 853 // access must or may happen. However, for write accesses it is important to 854 // differentiate between writes that must happen and writes that may happen. 855 if (!AccessRelation) 856 AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement)); 857 858 AccessRelation = 859 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); 860 return; 861 } 862 863 isl_space *Space = isl_space_alloc(Ctx, 0, Statement->getNumIterators(), 0); 864 AccessRelation = isl_map_universe(Space); 865 866 for (int i = 0, Size = Subscripts.size(); i < Size; ++i) { 867 isl_pw_aff *Affine = getPwAff(Subscripts[i]); 868 isl_map *SubscriptMap = isl_map_from_pw_aff(Affine); 869 AccessRelation = isl_map_flat_range_product(AccessRelation, SubscriptMap); 870 } 871 872 Space = Statement->getDomainSpace(); 873 AccessRelation = isl_map_set_tuple_id( 874 AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set)); 875 AccessRelation = 876 isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); 877 878 AccessRelation = isl_map_gist_domain(AccessRelation, Statement->getDomain()); 879 isl_space_free(Space); 880 } 881 882 MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, 883 AccessType AccType, Value *BaseAddress, 884 Type *ElementType, bool Affine, 885 ArrayRef<const SCEV *> Subscripts, 886 ArrayRef<const SCEV *> Sizes, Value *AccessValue, 887 ScopArrayInfo::MemoryKind Kind, StringRef BaseName) 888 : Kind(Kind), AccType(AccType), RedType(RT_NONE), Statement(Stmt), 889 InvalidDomain(nullptr), BaseAddr(BaseAddress), BaseName(BaseName), 890 ElementType(ElementType), Sizes(Sizes.begin(), Sizes.end()), 891 AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), 892 Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), 893 NewAccessRelation(nullptr) { 894 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; 895 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()) + "_"; 896 897 std::string IdName = 898 getIslCompatibleName(Stmt->getBaseName(), Access, BaseName); 899 Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this); 900 } 901 902 MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, 903 __isl_take isl_map *AccRel) 904 : Kind(ScopArrayInfo::MemoryKind::MK_Array), AccType(AccType), 905 RedType(RT_NONE), Statement(Stmt), InvalidDomain(nullptr), 906 AccessInstruction(nullptr), IsAffine(true), AccessRelation(nullptr), 907 NewAccessRelation(AccRel) { 908 auto *ArrayInfoId = isl_map_get_tuple_id(NewAccessRelation, isl_dim_out); 909 auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId); 910 Sizes.push_back(nullptr); 911 for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++) 912 Sizes.push_back(SAI->getDimensionSize(i)); 913 ElementType = SAI->getElementType(); 914 BaseAddr = SAI->getBasePtr(); 915 BaseName = SAI->getName(); 916 static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; 917 const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()) + "_"; 918 919 std::string IdName = 920 getIslCompatibleName(Stmt->getBaseName(), Access, BaseName); 921 Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this); 922 } 923 924 void MemoryAccess::realignParams() { 925 auto *Ctx = Statement->getParent()->getContext(); 926 InvalidDomain = isl_set_gist_params(InvalidDomain, isl_set_copy(Ctx)); 927 AccessRelation = isl_map_gist_params(AccessRelation, Ctx); 928 } 929 930 const std::string MemoryAccess::getReductionOperatorStr() const { 931 return MemoryAccess::getReductionOperatorStr(getReductionType()); 932 } 933 934 __isl_give isl_id *MemoryAccess::getId() const { return isl_id_copy(Id); } 935 936 raw_ostream &polly::operator<<(raw_ostream &OS, 937 MemoryAccess::ReductionType RT) { 938 if (RT == MemoryAccess::RT_NONE) 939 OS << "NONE"; 940 else 941 OS << MemoryAccess::getReductionOperatorStr(RT); 942 return OS; 943 } 944 945 void MemoryAccess::print(raw_ostream &OS) const { 946 switch (AccType) { 947 case READ: 948 OS.indent(12) << "ReadAccess :=\t"; 949 break; 950 case MUST_WRITE: 951 OS.indent(12) << "MustWriteAccess :=\t"; 952 break; 953 case MAY_WRITE: 954 OS.indent(12) << "MayWriteAccess :=\t"; 955 break; 956 } 957 OS << "[Reduction Type: " << getReductionType() << "] "; 958 OS << "[Scalar: " << isScalarKind() << "]\n"; 959 OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; 960 if (hasNewAccessRelation()) 961 OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n"; 962 } 963 964 void MemoryAccess::dump() const { print(errs()); } 965 966 __isl_give isl_pw_aff *MemoryAccess::getPwAff(const SCEV *E) { 967 auto *Stmt = getStatement(); 968 PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock()); 969 isl_set *StmtDom = isl_set_reset_tuple_id(getStatement()->getDomain()); 970 isl_set *NewInvalidDom = isl_set_intersect(StmtDom, PWAC.second); 971 InvalidDomain = isl_set_union(InvalidDomain, NewInvalidDom); 972 return PWAC.first; 973 } 974 975 // Create a map in the size of the provided set domain, that maps from the 976 // one element of the provided set domain to another element of the provided 977 // set domain. 978 // The mapping is limited to all points that are equal in all but the last 979 // dimension and for which the last dimension of the input is strict smaller 980 // than the last dimension of the output. 981 // 982 // getEqualAndLarger(set[i0, i1, ..., iX]): 983 // 984 // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX] 985 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX 986 // 987 static isl_map *getEqualAndLarger(__isl_take isl_space *setDomain) { 988 isl_space *Space = isl_space_map_from_set(setDomain); 989 isl_map *Map = isl_map_universe(Space); 990 unsigned lastDimension = isl_map_dim(Map, isl_dim_in) - 1; 991 992 // Set all but the last dimension to be equal for the input and output 993 // 994 // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX] 995 // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1) 996 for (unsigned i = 0; i < lastDimension; ++i) 997 Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); 998 999 // Set the last dimension of the input to be strict smaller than the 1000 // last dimension of the output. 1001 // 1002 // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX 1003 Map = isl_map_order_lt(Map, isl_dim_in, lastDimension, isl_dim_out, 1004 lastDimension); 1005 return Map; 1006 } 1007 1008 __isl_give isl_set * 1009 MemoryAccess::getStride(__isl_take const isl_map *Schedule) const { 1010 isl_map *S = const_cast<isl_map *>(Schedule); 1011 isl_map *AccessRelation = getAccessRelation(); 1012 isl_space *Space = isl_space_range(isl_map_get_space(S)); 1013 isl_map *NextScatt = getEqualAndLarger(Space); 1014 1015 S = isl_map_reverse(S); 1016 NextScatt = isl_map_lexmin(NextScatt); 1017 1018 NextScatt = isl_map_apply_range(NextScatt, isl_map_copy(S)); 1019 NextScatt = isl_map_apply_range(NextScatt, isl_map_copy(AccessRelation)); 1020 NextScatt = isl_map_apply_domain(NextScatt, S); 1021 NextScatt = isl_map_apply_domain(NextScatt, AccessRelation); 1022 1023 isl_set *Deltas = isl_map_deltas(NextScatt); 1024 return Deltas; 1025 } 1026 1027 bool MemoryAccess::isStrideX(__isl_take const isl_map *Schedule, 1028 int StrideWidth) const { 1029 isl_set *Stride, *StrideX; 1030 bool IsStrideX; 1031 1032 Stride = getStride(Schedule); 1033 StrideX = isl_set_universe(isl_set_get_space(Stride)); 1034 for (unsigned i = 0; i < isl_set_dim(StrideX, isl_dim_set) - 1; i++) 1035 StrideX = isl_set_fix_si(StrideX, isl_dim_set, i, 0); 1036 StrideX = isl_set_fix_si(StrideX, isl_dim_set, 1037 isl_set_dim(StrideX, isl_dim_set) - 1, StrideWidth); 1038 IsStrideX = isl_set_is_subset(Stride, StrideX); 1039 1040 isl_set_free(StrideX); 1041 isl_set_free(Stride); 1042 1043 return IsStrideX; 1044 } 1045 1046 bool MemoryAccess::isStrideZero(__isl_take const isl_map *Schedule) const { 1047 return isStrideX(Schedule, 0); 1048 } 1049 1050 bool MemoryAccess::isStrideOne(__isl_take const isl_map *Schedule) const { 1051 return isStrideX(Schedule, 1); 1052 } 1053 1054 void MemoryAccess::setAccessRelation(__isl_take isl_map *NewAccess) { 1055 isl_map_free(AccessRelation); 1056 AccessRelation = NewAccess; 1057 } 1058 1059 void MemoryAccess::setNewAccessRelation(__isl_take isl_map *NewAccess) { 1060 assert(NewAccess); 1061 1062 #ifndef NDEBUG 1063 // Check domain space compatibility. 1064 auto *NewSpace = isl_map_get_space(NewAccess); 1065 auto *NewDomainSpace = isl_space_domain(isl_space_copy(NewSpace)); 1066 auto *OriginalDomainSpace = getStatement()->getDomainSpace(); 1067 assert(isl_space_has_equal_tuples(OriginalDomainSpace, NewDomainSpace)); 1068 isl_space_free(NewDomainSpace); 1069 isl_space_free(OriginalDomainSpace); 1070 1071 // Check whether there is an access for every statement instance. 1072 auto *StmtDomain = getStatement()->getDomain(); 1073 StmtDomain = isl_set_intersect_params( 1074 StmtDomain, getStatement()->getParent()->getContext()); 1075 auto *NewDomain = isl_map_domain(isl_map_copy(NewAccess)); 1076 assert(isl_set_is_subset(StmtDomain, NewDomain) && 1077 "Partial accesses not supported"); 1078 isl_set_free(NewDomain); 1079 isl_set_free(StmtDomain); 1080 1081 // Check whether access dimensions correspond to number of dimensions of the 1082 // accesses array. 1083 auto *NewAccessSpace = isl_space_range(NewSpace); 1084 assert(isl_space_has_tuple_id(NewAccessSpace, isl_dim_set) && 1085 "Must specify the array that is accessed"); 1086 auto *NewArrayId = isl_space_get_tuple_id(NewAccessSpace, isl_dim_set); 1087 auto *SAI = static_cast<ScopArrayInfo *>(isl_id_get_user(NewArrayId)); 1088 assert(SAI && "Must set a ScopArrayInfo"); 1089 assert(!SAI->getBasePtrOriginSAI() && 1090 "Indirect array not supported by codegen"); 1091 auto Dims = SAI->getNumberOfDimensions(); 1092 assert(isl_space_dim(NewAccessSpace, isl_dim_set) == Dims && 1093 "Access dims must match array dims"); 1094 isl_space_free(NewAccessSpace); 1095 isl_id_free(NewArrayId); 1096 #endif 1097 1098 isl_map_free(NewAccessRelation); 1099 NewAccessRelation = NewAccess; 1100 } 1101 1102 //===----------------------------------------------------------------------===// 1103 1104 __isl_give isl_map *ScopStmt::getSchedule() const { 1105 isl_set *Domain = getDomain(); 1106 if (isl_set_is_empty(Domain)) { 1107 isl_set_free(Domain); 1108 return isl_map_from_aff( 1109 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace()))); 1110 } 1111 auto *Schedule = getParent()->getSchedule(); 1112 if (!Schedule) { 1113 isl_set_free(Domain); 1114 return nullptr; 1115 } 1116 Schedule = isl_union_map_intersect_domain( 1117 Schedule, isl_union_set_from_set(isl_set_copy(Domain))); 1118 if (isl_union_map_is_empty(Schedule)) { 1119 isl_set_free(Domain); 1120 isl_union_map_free(Schedule); 1121 return isl_map_from_aff( 1122 isl_aff_zero_on_domain(isl_local_space_from_space(getDomainSpace()))); 1123 } 1124 auto *M = isl_map_from_union_map(Schedule); 1125 M = isl_map_coalesce(M); 1126 M = isl_map_gist_domain(M, Domain); 1127 M = isl_map_coalesce(M); 1128 return M; 1129 } 1130 1131 __isl_give isl_pw_aff *ScopStmt::getPwAff(const SCEV *E, bool NonNegative) { 1132 PWACtx PWAC = getParent()->getPwAff(E, getEntryBlock(), NonNegative); 1133 InvalidDomain = isl_set_union(InvalidDomain, PWAC.second); 1134 return PWAC.first; 1135 } 1136 1137 void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) { 1138 assert(isl_set_is_subset(NewDomain, Domain) && 1139 "New domain is not a subset of old domain!"); 1140 isl_set_free(Domain); 1141 Domain = NewDomain; 1142 } 1143 1144 void ScopStmt::buildAccessRelations() { 1145 Scop &S = *getParent(); 1146 for (MemoryAccess *Access : MemAccs) { 1147 Type *ElementType = Access->getElementType(); 1148 1149 ScopArrayInfo::MemoryKind Ty; 1150 if (Access->isPHIKind()) 1151 Ty = ScopArrayInfo::MK_PHI; 1152 else if (Access->isExitPHIKind()) 1153 Ty = ScopArrayInfo::MK_ExitPHI; 1154 else if (Access->isValueKind()) 1155 Ty = ScopArrayInfo::MK_Value; 1156 else 1157 Ty = ScopArrayInfo::MK_Array; 1158 1159 auto *SAI = S.getOrCreateScopArrayInfo(Access->getBaseAddr(), ElementType, 1160 Access->Sizes, Ty); 1161 Access->buildAccessRelation(SAI); 1162 } 1163 } 1164 1165 void ScopStmt::addAccess(MemoryAccess *Access) { 1166 Instruction *AccessInst = Access->getAccessInstruction(); 1167 1168 if (Access->isArrayKind()) { 1169 MemoryAccessList &MAL = InstructionToAccess[AccessInst]; 1170 MAL.emplace_front(Access); 1171 } else if (Access->isValueKind() && Access->isWrite()) { 1172 Instruction *AccessVal = cast<Instruction>(Access->getAccessValue()); 1173 assert(Parent.getStmtFor(AccessVal) == this); 1174 assert(!ValueWrites.lookup(AccessVal)); 1175 1176 ValueWrites[AccessVal] = Access; 1177 } else if (Access->isValueKind() && Access->isRead()) { 1178 Value *AccessVal = Access->getAccessValue(); 1179 assert(!ValueReads.lookup(AccessVal)); 1180 1181 ValueReads[AccessVal] = Access; 1182 } else if (Access->isAnyPHIKind() && Access->isWrite()) { 1183 PHINode *PHI = cast<PHINode>(Access->getBaseAddr()); 1184 assert(!PHIWrites.lookup(PHI)); 1185 1186 PHIWrites[PHI] = Access; 1187 } 1188 1189 MemAccs.push_back(Access); 1190 } 1191 1192 void ScopStmt::realignParams() { 1193 for (MemoryAccess *MA : *this) 1194 MA->realignParams(); 1195 1196 auto *Ctx = Parent.getContext(); 1197 InvalidDomain = isl_set_gist_params(InvalidDomain, isl_set_copy(Ctx)); 1198 Domain = isl_set_gist_params(Domain, Ctx); 1199 } 1200 1201 /// Add @p BSet to the set @p User if @p BSet is bounded. 1202 static isl_stat collectBoundedParts(__isl_take isl_basic_set *BSet, 1203 void *User) { 1204 isl_set **BoundedParts = static_cast<isl_set **>(User); 1205 if (isl_basic_set_is_bounded(BSet)) 1206 *BoundedParts = isl_set_union(*BoundedParts, isl_set_from_basic_set(BSet)); 1207 else 1208 isl_basic_set_free(BSet); 1209 return isl_stat_ok; 1210 } 1211 1212 /// Return the bounded parts of @p S. 1213 static __isl_give isl_set *collectBoundedParts(__isl_take isl_set *S) { 1214 isl_set *BoundedParts = isl_set_empty(isl_set_get_space(S)); 1215 isl_set_foreach_basic_set(S, collectBoundedParts, &BoundedParts); 1216 isl_set_free(S); 1217 return BoundedParts; 1218 } 1219 1220 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim. 1221 /// 1222 /// @returns A separation of @p S into first an unbounded then a bounded subset, 1223 /// both with regards to the dimension @p Dim. 1224 static std::pair<__isl_give isl_set *, __isl_give isl_set *> 1225 partitionSetParts(__isl_take isl_set *S, unsigned Dim) { 1226 1227 for (unsigned u = 0, e = isl_set_n_dim(S); u < e; u++) 1228 S = isl_set_lower_bound_si(S, isl_dim_set, u, 0); 1229 1230 unsigned NumDimsS = isl_set_n_dim(S); 1231 isl_set *OnlyDimS = isl_set_copy(S); 1232 1233 // Remove dimensions that are greater than Dim as they are not interesting. 1234 assert(NumDimsS >= Dim + 1); 1235 OnlyDimS = 1236 isl_set_project_out(OnlyDimS, isl_dim_set, Dim + 1, NumDimsS - Dim - 1); 1237 1238 // Create artificial parametric upper bounds for dimensions smaller than Dim 1239 // as we are not interested in them. 1240 OnlyDimS = isl_set_insert_dims(OnlyDimS, isl_dim_param, 0, Dim); 1241 for (unsigned u = 0; u < Dim; u++) { 1242 isl_constraint *C = isl_inequality_alloc( 1243 isl_local_space_from_space(isl_set_get_space(OnlyDimS))); 1244 C = isl_constraint_set_coefficient_si(C, isl_dim_param, u, 1); 1245 C = isl_constraint_set_coefficient_si(C, isl_dim_set, u, -1); 1246 OnlyDimS = isl_set_add_constraint(OnlyDimS, C); 1247 } 1248 1249 // Collect all bounded parts of OnlyDimS. 1250 isl_set *BoundedParts = collectBoundedParts(OnlyDimS); 1251 1252 // Create the dimensions greater than Dim again. 1253 BoundedParts = isl_set_insert_dims(BoundedParts, isl_dim_set, Dim + 1, 1254 NumDimsS - Dim - 1); 1255 1256 // Remove the artificial upper bound parameters again. 1257 BoundedParts = isl_set_remove_dims(BoundedParts, isl_dim_param, 0, Dim); 1258 1259 isl_set *UnboundedParts = isl_set_subtract(S, isl_set_copy(BoundedParts)); 1260 return std::make_pair(UnboundedParts, BoundedParts); 1261 } 1262 1263 /// Set the dimension Ids from @p From in @p To. 1264 static __isl_give isl_set *setDimensionIds(__isl_keep isl_set *From, 1265 __isl_take isl_set *To) { 1266 for (unsigned u = 0, e = isl_set_n_dim(From); u < e; u++) { 1267 isl_id *DimId = isl_set_get_dim_id(From, isl_dim_set, u); 1268 To = isl_set_set_dim_id(To, isl_dim_set, u, DimId); 1269 } 1270 return To; 1271 } 1272 1273 /// Create the conditions under which @p L @p Pred @p R is true. 1274 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred, 1275 __isl_take isl_pw_aff *L, 1276 __isl_take isl_pw_aff *R) { 1277 switch (Pred) { 1278 case ICmpInst::ICMP_EQ: 1279 return isl_pw_aff_eq_set(L, R); 1280 case ICmpInst::ICMP_NE: 1281 return isl_pw_aff_ne_set(L, R); 1282 case ICmpInst::ICMP_SLT: 1283 return isl_pw_aff_lt_set(L, R); 1284 case ICmpInst::ICMP_SLE: 1285 return isl_pw_aff_le_set(L, R); 1286 case ICmpInst::ICMP_SGT: 1287 return isl_pw_aff_gt_set(L, R); 1288 case ICmpInst::ICMP_SGE: 1289 return isl_pw_aff_ge_set(L, R); 1290 case ICmpInst::ICMP_ULT: 1291 return isl_pw_aff_lt_set(L, R); 1292 case ICmpInst::ICMP_UGT: 1293 return isl_pw_aff_gt_set(L, R); 1294 case ICmpInst::ICMP_ULE: 1295 return isl_pw_aff_le_set(L, R); 1296 case ICmpInst::ICMP_UGE: 1297 return isl_pw_aff_ge_set(L, R); 1298 default: 1299 llvm_unreachable("Non integer predicate not supported"); 1300 } 1301 } 1302 1303 /// Create the conditions under which @p L @p Pred @p R is true. 1304 /// 1305 /// Helper function that will make sure the dimensions of the result have the 1306 /// same isl_id's as the @p Domain. 1307 static __isl_give isl_set *buildConditionSet(ICmpInst::Predicate Pred, 1308 __isl_take isl_pw_aff *L, 1309 __isl_take isl_pw_aff *R, 1310 __isl_keep isl_set *Domain) { 1311 isl_set *ConsequenceCondSet = buildConditionSet(Pred, L, R); 1312 return setDimensionIds(Domain, ConsequenceCondSet); 1313 } 1314 1315 /// Build the conditions sets for the switch @p SI in the @p Domain. 1316 /// 1317 /// This will fill @p ConditionSets with the conditions under which control 1318 /// will be moved from @p SI to its successors. Hence, @p ConditionSets will 1319 /// have as many elements as @p SI has successors. 1320 static bool 1321 buildConditionSets(ScopStmt &Stmt, SwitchInst *SI, Loop *L, 1322 __isl_keep isl_set *Domain, 1323 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 1324 1325 Value *Condition = getConditionFromTerminator(SI); 1326 assert(Condition && "No condition for switch"); 1327 1328 Scop &S = *Stmt.getParent(); 1329 ScalarEvolution &SE = *S.getSE(); 1330 isl_pw_aff *LHS, *RHS; 1331 LHS = Stmt.getPwAff(SE.getSCEVAtScope(Condition, L)); 1332 1333 unsigned NumSuccessors = SI->getNumSuccessors(); 1334 ConditionSets.resize(NumSuccessors); 1335 for (auto &Case : SI->cases()) { 1336 unsigned Idx = Case.getSuccessorIndex(); 1337 ConstantInt *CaseValue = Case.getCaseValue(); 1338 1339 RHS = Stmt.getPwAff(SE.getSCEV(CaseValue)); 1340 isl_set *CaseConditionSet = 1341 buildConditionSet(ICmpInst::ICMP_EQ, isl_pw_aff_copy(LHS), RHS, Domain); 1342 ConditionSets[Idx] = isl_set_coalesce( 1343 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain))); 1344 } 1345 1346 assert(ConditionSets[0] == nullptr && "Default condition set was set"); 1347 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]); 1348 for (unsigned u = 2; u < NumSuccessors; u++) 1349 ConditionSetUnion = 1350 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u])); 1351 ConditionSets[0] = setDimensionIds( 1352 Domain, isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion)); 1353 1354 isl_pw_aff_free(LHS); 1355 1356 return true; 1357 } 1358 1359 /// Build the conditions sets for the branch condition @p Condition in 1360 /// the @p Domain. 1361 /// 1362 /// This will fill @p ConditionSets with the conditions under which control 1363 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will 1364 /// have as many elements as @p TI has successors. If @p TI is nullptr the 1365 /// context under which @p Condition is true/false will be returned as the 1366 /// new elements of @p ConditionSets. 1367 static bool 1368 buildConditionSets(ScopStmt &Stmt, Value *Condition, TerminatorInst *TI, 1369 Loop *L, __isl_keep isl_set *Domain, 1370 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 1371 1372 Scop &S = *Stmt.getParent(); 1373 isl_set *ConsequenceCondSet = nullptr; 1374 if (auto *CCond = dyn_cast<ConstantInt>(Condition)) { 1375 if (CCond->isZero()) 1376 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); 1377 else 1378 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); 1379 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 1380 auto Opcode = BinOp->getOpcode(); 1381 assert(Opcode == Instruction::And || Opcode == Instruction::Or); 1382 1383 bool Valid = buildConditionSets(Stmt, BinOp->getOperand(0), TI, L, Domain, 1384 ConditionSets) && 1385 buildConditionSets(Stmt, BinOp->getOperand(1), TI, L, Domain, 1386 ConditionSets); 1387 if (!Valid) { 1388 while (!ConditionSets.empty()) 1389 isl_set_free(ConditionSets.pop_back_val()); 1390 return false; 1391 } 1392 1393 isl_set_free(ConditionSets.pop_back_val()); 1394 isl_set *ConsCondPart0 = ConditionSets.pop_back_val(); 1395 isl_set_free(ConditionSets.pop_back_val()); 1396 isl_set *ConsCondPart1 = ConditionSets.pop_back_val(); 1397 1398 if (Opcode == Instruction::And) 1399 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1); 1400 else 1401 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1); 1402 } else { 1403 auto *ICond = dyn_cast<ICmpInst>(Condition); 1404 assert(ICond && 1405 "Condition of exiting branch was neither constant nor ICmp!"); 1406 1407 ScalarEvolution &SE = *S.getSE(); 1408 isl_pw_aff *LHS, *RHS; 1409 // For unsigned comparisons we assumed the signed bit of neither operand 1410 // to be set. The comparison is equal to a signed comparison under this 1411 // assumption. 1412 bool NonNeg = ICond->isUnsigned(); 1413 LHS = Stmt.getPwAff(SE.getSCEVAtScope(ICond->getOperand(0), L), NonNeg); 1414 RHS = Stmt.getPwAff(SE.getSCEVAtScope(ICond->getOperand(1), L), NonNeg); 1415 ConsequenceCondSet = 1416 buildConditionSet(ICond->getPredicate(), LHS, RHS, Domain); 1417 } 1418 1419 // If no terminator was given we are only looking for parameter constraints 1420 // under which @p Condition is true/false. 1421 if (!TI) 1422 ConsequenceCondSet = isl_set_params(ConsequenceCondSet); 1423 assert(ConsequenceCondSet); 1424 ConsequenceCondSet = isl_set_coalesce( 1425 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain))); 1426 1427 isl_set *AlternativeCondSet = nullptr; 1428 bool TooComplex = 1429 isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctionsInDomain; 1430 1431 if (!TooComplex) { 1432 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain), 1433 isl_set_copy(ConsequenceCondSet)); 1434 TooComplex = 1435 isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctionsInDomain; 1436 } 1437 1438 if (TooComplex) { 1439 S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc()); 1440 isl_set_free(AlternativeCondSet); 1441 isl_set_free(ConsequenceCondSet); 1442 return false; 1443 } 1444 1445 ConditionSets.push_back(ConsequenceCondSet); 1446 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet)); 1447 1448 return true; 1449 } 1450 1451 /// Build the conditions sets for the terminator @p TI in the @p Domain. 1452 /// 1453 /// This will fill @p ConditionSets with the conditions under which control 1454 /// will be moved from @p TI to its successors. Hence, @p ConditionSets will 1455 /// have as many elements as @p TI has successors. 1456 static bool 1457 buildConditionSets(ScopStmt &Stmt, TerminatorInst *TI, Loop *L, 1458 __isl_keep isl_set *Domain, 1459 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 1460 1461 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) 1462 return buildConditionSets(Stmt, SI, L, Domain, ConditionSets); 1463 1464 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch."); 1465 1466 if (TI->getNumSuccessors() == 1) { 1467 ConditionSets.push_back(isl_set_copy(Domain)); 1468 return true; 1469 } 1470 1471 Value *Condition = getConditionFromTerminator(TI); 1472 assert(Condition && "No condition for Terminator"); 1473 1474 return buildConditionSets(Stmt, Condition, TI, L, Domain, ConditionSets); 1475 } 1476 1477 void ScopStmt::buildDomain() { 1478 isl_id *Id = isl_id_alloc(getIslCtx(), getBaseName(), this); 1479 1480 Domain = getParent()->getDomainConditions(this); 1481 Domain = isl_set_set_tuple_id(Domain, Id); 1482 } 1483 1484 void ScopStmt::collectSurroundingLoops() { 1485 for (unsigned u = 0, e = isl_set_n_dim(Domain); u < e; u++) { 1486 isl_id *DimId = isl_set_get_dim_id(Domain, isl_dim_set, u); 1487 NestLoops.push_back(static_cast<Loop *>(isl_id_get_user(DimId))); 1488 isl_id_free(DimId); 1489 } 1490 } 1491 1492 ScopStmt::ScopStmt(Scop &parent, Region &R) 1493 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(nullptr), 1494 R(&R), Build(nullptr) { 1495 1496 BaseName = getIslCompatibleName("Stmt_", R.getNameStr(), ""); 1497 } 1498 1499 ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb) 1500 : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb), 1501 R(nullptr), Build(nullptr) { 1502 1503 BaseName = getIslCompatibleName("Stmt_", &bb, ""); 1504 } 1505 1506 ScopStmt::ScopStmt(Scop &parent, __isl_take isl_map *SourceRel, 1507 __isl_take isl_map *TargetRel, __isl_take isl_set *NewDomain) 1508 : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain), BB(nullptr), 1509 R(nullptr), Build(nullptr) { 1510 BaseName = getIslCompatibleName("CopyStmt_", "", 1511 std::to_string(parent.getCopyStmtsNum())); 1512 auto *Id = isl_id_alloc(getIslCtx(), getBaseName(), this); 1513 Domain = isl_set_set_tuple_id(Domain, isl_id_copy(Id)); 1514 TargetRel = isl_map_set_tuple_id(TargetRel, isl_dim_in, Id); 1515 auto *Access = 1516 new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel); 1517 parent.addAccessFunction(Access); 1518 addAccess(Access); 1519 SourceRel = isl_map_set_tuple_id(SourceRel, isl_dim_in, isl_id_copy(Id)); 1520 Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel); 1521 parent.addAccessFunction(Access); 1522 addAccess(Access); 1523 } 1524 1525 void ScopStmt::init(LoopInfo &LI) { 1526 assert(!Domain && "init must be called only once"); 1527 1528 buildDomain(); 1529 collectSurroundingLoops(); 1530 buildAccessRelations(); 1531 1532 if (DetectReductions) 1533 checkForReductions(); 1534 } 1535 1536 /// Collect loads which might form a reduction chain with @p StoreMA. 1537 /// 1538 /// Check if the stored value for @p StoreMA is a binary operator with one or 1539 /// two loads as operands. If the binary operand is commutative & associative, 1540 /// used only once (by @p StoreMA) and its load operands are also used only 1541 /// once, we have found a possible reduction chain. It starts at an operand 1542 /// load and includes the binary operator and @p StoreMA. 1543 /// 1544 /// Note: We allow only one use to ensure the load and binary operator cannot 1545 /// escape this block or into any other store except @p StoreMA. 1546 void ScopStmt::collectCandiateReductionLoads( 1547 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) { 1548 auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction()); 1549 if (!Store) 1550 return; 1551 1552 // Skip if there is not one binary operator between the load and the store 1553 auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand()); 1554 if (!BinOp) 1555 return; 1556 1557 // Skip if the binary operators has multiple uses 1558 if (BinOp->getNumUses() != 1) 1559 return; 1560 1561 // Skip if the opcode of the binary operator is not commutative/associative 1562 if (!BinOp->isCommutative() || !BinOp->isAssociative()) 1563 return; 1564 1565 // Skip if the binary operator is outside the current SCoP 1566 if (BinOp->getParent() != Store->getParent()) 1567 return; 1568 1569 // Skip if it is a multiplicative reduction and we disabled them 1570 if (DisableMultiplicativeReductions && 1571 (BinOp->getOpcode() == Instruction::Mul || 1572 BinOp->getOpcode() == Instruction::FMul)) 1573 return; 1574 1575 // Check the binary operator operands for a candidate load 1576 auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0)); 1577 auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1)); 1578 if (!PossibleLoad0 && !PossibleLoad1) 1579 return; 1580 1581 // A load is only a candidate if it cannot escape (thus has only this use) 1582 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1) 1583 if (PossibleLoad0->getParent() == Store->getParent()) 1584 Loads.push_back(&getArrayAccessFor(PossibleLoad0)); 1585 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1) 1586 if (PossibleLoad1->getParent() == Store->getParent()) 1587 Loads.push_back(&getArrayAccessFor(PossibleLoad1)); 1588 } 1589 1590 /// Check for reductions in this ScopStmt. 1591 /// 1592 /// Iterate over all store memory accesses and check for valid binary reduction 1593 /// like chains. For all candidates we check if they have the same base address 1594 /// and there are no other accesses which overlap with them. The base address 1595 /// check rules out impossible reductions candidates early. The overlap check, 1596 /// together with the "only one user" check in collectCandiateReductionLoads, 1597 /// guarantees that none of the intermediate results will escape during 1598 /// execution of the loop nest. We basically check here that no other memory 1599 /// access can access the same memory as the potential reduction. 1600 void ScopStmt::checkForReductions() { 1601 SmallVector<MemoryAccess *, 2> Loads; 1602 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates; 1603 1604 // First collect candidate load-store reduction chains by iterating over all 1605 // stores and collecting possible reduction loads. 1606 for (MemoryAccess *StoreMA : MemAccs) { 1607 if (StoreMA->isRead()) 1608 continue; 1609 1610 Loads.clear(); 1611 collectCandiateReductionLoads(StoreMA, Loads); 1612 for (MemoryAccess *LoadMA : Loads) 1613 Candidates.push_back(std::make_pair(LoadMA, StoreMA)); 1614 } 1615 1616 // Then check each possible candidate pair. 1617 for (const auto &CandidatePair : Candidates) { 1618 bool Valid = true; 1619 isl_map *LoadAccs = CandidatePair.first->getAccessRelation(); 1620 isl_map *StoreAccs = CandidatePair.second->getAccessRelation(); 1621 1622 // Skip those with obviously unequal base addresses. 1623 if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) { 1624 isl_map_free(LoadAccs); 1625 isl_map_free(StoreAccs); 1626 continue; 1627 } 1628 1629 // And check if the remaining for overlap with other memory accesses. 1630 isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs); 1631 AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain()); 1632 isl_set *AllAccs = isl_map_range(AllAccsRel); 1633 1634 for (MemoryAccess *MA : MemAccs) { 1635 if (MA == CandidatePair.first || MA == CandidatePair.second) 1636 continue; 1637 1638 isl_map *AccRel = 1639 isl_map_intersect_domain(MA->getAccessRelation(), getDomain()); 1640 isl_set *Accs = isl_map_range(AccRel); 1641 1642 if (isl_set_has_equal_space(AllAccs, Accs)) { 1643 isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs)); 1644 Valid = Valid && isl_set_is_empty(OverlapAccs); 1645 isl_set_free(OverlapAccs); 1646 } else { 1647 isl_set_free(Accs); 1648 } 1649 } 1650 1651 isl_set_free(AllAccs); 1652 if (!Valid) 1653 continue; 1654 1655 const LoadInst *Load = 1656 dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction()); 1657 MemoryAccess::ReductionType RT = 1658 getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load); 1659 1660 // If no overlapping access was found we mark the load and store as 1661 // reduction like. 1662 CandidatePair.first->markAsReductionLike(RT); 1663 CandidatePair.second->markAsReductionLike(RT); 1664 } 1665 } 1666 1667 std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); } 1668 1669 std::string ScopStmt::getScheduleStr() const { 1670 auto *S = getSchedule(); 1671 if (!S) 1672 return ""; 1673 auto Str = stringFromIslObj(S); 1674 isl_map_free(S); 1675 return Str; 1676 } 1677 1678 void ScopStmt::setInvalidDomain(__isl_take isl_set *ID) { 1679 isl_set_free(InvalidDomain); 1680 InvalidDomain = ID; 1681 } 1682 1683 BasicBlock *ScopStmt::getEntryBlock() const { 1684 if (isBlockStmt()) 1685 return getBasicBlock(); 1686 return getRegion()->getEntry(); 1687 } 1688 1689 unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); } 1690 1691 const char *ScopStmt::getBaseName() const { return BaseName.c_str(); } 1692 1693 Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const { 1694 return NestLoops[Dimension]; 1695 } 1696 1697 isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); } 1698 1699 __isl_give isl_set *ScopStmt::getDomain() const { return isl_set_copy(Domain); } 1700 1701 __isl_give isl_space *ScopStmt::getDomainSpace() const { 1702 return isl_set_get_space(Domain); 1703 } 1704 1705 __isl_give isl_id *ScopStmt::getDomainId() const { 1706 return isl_set_get_tuple_id(Domain); 1707 } 1708 1709 ScopStmt::~ScopStmt() { 1710 isl_set_free(Domain); 1711 isl_set_free(InvalidDomain); 1712 } 1713 1714 void ScopStmt::print(raw_ostream &OS) const { 1715 OS << "\t" << getBaseName() << "\n"; 1716 OS.indent(12) << "Domain :=\n"; 1717 1718 if (Domain) { 1719 OS.indent(16) << getDomainStr() << ";\n"; 1720 } else 1721 OS.indent(16) << "n/a\n"; 1722 1723 OS.indent(12) << "Schedule :=\n"; 1724 1725 if (Domain) { 1726 OS.indent(16) << getScheduleStr() << ";\n"; 1727 } else 1728 OS.indent(16) << "n/a\n"; 1729 1730 for (MemoryAccess *Access : MemAccs) 1731 Access->print(OS); 1732 } 1733 1734 void ScopStmt::dump() const { print(dbgs()); } 1735 1736 void ScopStmt::removeMemoryAccess(MemoryAccess *MA) { 1737 // Remove the memory accesses from this statement 1738 // together with all scalar accesses that were caused by it. 1739 // MK_Value READs have no access instruction, hence would not be removed by 1740 // this function. However, it is only used for invariant LoadInst accesses, 1741 // its arguments are always affine, hence synthesizable, and therefore there 1742 // are no MK_Value READ accesses to be removed. 1743 auto Predicate = [&](MemoryAccess *Acc) { 1744 return Acc->getAccessInstruction() == MA->getAccessInstruction(); 1745 }; 1746 MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate), 1747 MemAccs.end()); 1748 InstructionToAccess.erase(MA->getAccessInstruction()); 1749 } 1750 1751 //===----------------------------------------------------------------------===// 1752 /// Scop class implement 1753 1754 void Scop::setContext(__isl_take isl_set *NewContext) { 1755 NewContext = isl_set_align_params(NewContext, isl_set_get_space(Context)); 1756 isl_set_free(Context); 1757 Context = NewContext; 1758 } 1759 1760 /// Remap parameter values but keep AddRecs valid wrt. invariant loads. 1761 struct SCEVSensitiveParameterRewriter 1762 : public SCEVRewriteVisitor<SCEVSensitiveParameterRewriter> { 1763 ValueToValueMap &VMap; 1764 1765 public: 1766 SCEVSensitiveParameterRewriter(ValueToValueMap &VMap, ScalarEvolution &SE) 1767 : SCEVRewriteVisitor(SE), VMap(VMap) {} 1768 1769 static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE, 1770 ValueToValueMap &VMap) { 1771 SCEVSensitiveParameterRewriter SSPR(VMap, SE); 1772 return SSPR.visit(E); 1773 } 1774 1775 const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) { 1776 auto *Start = visit(E->getStart()); 1777 auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0), 1778 visit(E->getStepRecurrence(SE)), 1779 E->getLoop(), SCEV::FlagAnyWrap); 1780 return SE.getAddExpr(Start, AddRec); 1781 } 1782 1783 const SCEV *visitUnknown(const SCEVUnknown *E) { 1784 if (auto *NewValue = VMap.lookup(E->getValue())) 1785 return SE.getUnknown(NewValue); 1786 return E; 1787 } 1788 }; 1789 1790 const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) { 1791 return SCEVSensitiveParameterRewriter::rewrite(S, *SE, InvEquivClassVMap); 1792 } 1793 1794 void Scop::createParameterId(const SCEV *Parameter) { 1795 assert(Parameters.count(Parameter)); 1796 assert(!ParameterIds.count(Parameter)); 1797 1798 std::string ParameterName = "p_" + std::to_string(getNumParams() - 1); 1799 1800 if (const SCEVUnknown *ValueParameter = dyn_cast<SCEVUnknown>(Parameter)) { 1801 Value *Val = ValueParameter->getValue(); 1802 1803 // If this parameter references a specific Value and this value has a name 1804 // we use this name as it is likely to be unique and more useful than just 1805 // a number. 1806 if (Val->hasName()) 1807 ParameterName = Val->getName(); 1808 else if (LoadInst *LI = dyn_cast<LoadInst>(Val)) { 1809 auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets(); 1810 if (LoadOrigin->hasName()) { 1811 ParameterName += "_loaded_from_"; 1812 ParameterName += 1813 LI->getPointerOperand()->stripInBoundsOffsets()->getName(); 1814 } 1815 } 1816 } 1817 1818 ParameterName = getIslCompatibleName("", ParameterName, ""); 1819 1820 auto *Id = isl_id_alloc(getIslCtx(), ParameterName.c_str(), 1821 const_cast<void *>((const void *)Parameter)); 1822 ParameterIds[Parameter] = Id; 1823 } 1824 1825 void Scop::addParams(const ParameterSetTy &NewParameters) { 1826 for (const SCEV *Parameter : NewParameters) { 1827 // Normalize the SCEV to get the representing element for an invariant load. 1828 Parameter = extractConstantFactor(Parameter, *SE).second; 1829 Parameter = getRepresentingInvariantLoadSCEV(Parameter); 1830 1831 if (Parameters.insert(Parameter)) 1832 createParameterId(Parameter); 1833 } 1834 } 1835 1836 __isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) { 1837 // Normalize the SCEV to get the representing element for an invariant load. 1838 Parameter = getRepresentingInvariantLoadSCEV(Parameter); 1839 return isl_id_copy(ParameterIds.lookup(Parameter)); 1840 } 1841 1842 __isl_give isl_set * 1843 Scop::addNonEmptyDomainConstraints(__isl_take isl_set *C) const { 1844 isl_set *DomainContext = isl_union_set_params(getDomains()); 1845 return isl_set_intersect_params(C, DomainContext); 1846 } 1847 1848 bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const { 1849 return DT.dominates(BB, getEntry()); 1850 } 1851 1852 void Scop::addUserAssumptions(DominatorTree &DT, LoopInfo &LI) { 1853 auto &F = getFunction(); 1854 1855 // TODO: Walk the DominatorTree from getRegion().getExit() to its root in 1856 // order to not iterate over blocks we skip anyways. 1857 for (auto &BB : F) { 1858 bool InScop = contains(&BB); 1859 if (!InScop && !isDominatedBy(DT, &BB)) 1860 continue; 1861 1862 for (auto &Assumption : BB) { 1863 auto *CI = dyn_cast_or_null<IntrinsicInst>(&Assumption); 1864 if (!CI || CI->getNumArgOperands() != 1 || 1865 CI->getIntrinsicID() != Intrinsic::assume) 1866 continue; 1867 1868 auto *L = LI.getLoopFor(CI->getParent()); 1869 auto *Val = CI->getArgOperand(0); 1870 ParameterSetTy DetectedParams; 1871 if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) { 1872 emitOptimizationRemarkAnalysis(F.getContext(), DEBUG_TYPE, F, 1873 CI->getDebugLoc(), 1874 "Non-affine user assumption ignored."); 1875 continue; 1876 } 1877 1878 // Collect all newly introduced parameters. 1879 ParameterSetTy NewParams; 1880 for (auto *Param : DetectedParams) { 1881 Param = extractConstantFactor(Param, *SE).second; 1882 Param = getRepresentingInvariantLoadSCEV(Param); 1883 if (Parameters.count(Param)) 1884 continue; 1885 NewParams.insert(Param); 1886 } 1887 1888 SmallVector<isl_set *, 2> ConditionSets; 1889 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr; 1890 auto &Stmt = InScop ? *getStmtFor(CI->getParent()) : *Stmts.begin(); 1891 auto *Dom = InScop ? getDomainConditions(&Stmt) : isl_set_copy(Context); 1892 bool Valid = buildConditionSets(Stmt, Val, TI, L, Dom, ConditionSets); 1893 isl_set_free(Dom); 1894 1895 if (!Valid) 1896 continue; 1897 1898 isl_set *AssumptionCtx = nullptr; 1899 if (InScop) { 1900 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1])); 1901 isl_set_free(ConditionSets[0]); 1902 } else { 1903 AssumptionCtx = isl_set_complement(ConditionSets[1]); 1904 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]); 1905 } 1906 1907 // Project out newly introduced parameters as they are not otherwise 1908 // useful. 1909 if (!NewParams.empty()) { 1910 for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) { 1911 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u); 1912 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id)); 1913 isl_id_free(Id); 1914 1915 if (!NewParams.count(Param)) 1916 continue; 1917 1918 AssumptionCtx = 1919 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1); 1920 } 1921 } 1922 1923 emitOptimizationRemarkAnalysis( 1924 F.getContext(), DEBUG_TYPE, F, CI->getDebugLoc(), 1925 "Use user assumption: " + stringFromIslObj(AssumptionCtx)); 1926 Context = isl_set_intersect(Context, AssumptionCtx); 1927 } 1928 } 1929 } 1930 1931 void Scop::addUserContext() { 1932 if (UserContextStr.empty()) 1933 return; 1934 1935 isl_set *UserContext = 1936 isl_set_read_from_str(getIslCtx(), UserContextStr.c_str()); 1937 isl_space *Space = getParamSpace(); 1938 if (isl_space_dim(Space, isl_dim_param) != 1939 isl_set_dim(UserContext, isl_dim_param)) { 1940 auto SpaceStr = isl_space_to_str(Space); 1941 errs() << "Error: the context provided in -polly-context has not the same " 1942 << "number of dimensions than the computed context. Due to this " 1943 << "mismatch, the -polly-context option is ignored. Please provide " 1944 << "the context in the parameter space: " << SpaceStr << ".\n"; 1945 free(SpaceStr); 1946 isl_set_free(UserContext); 1947 isl_space_free(Space); 1948 return; 1949 } 1950 1951 for (unsigned i = 0; i < isl_space_dim(Space, isl_dim_param); i++) { 1952 auto *NameContext = isl_set_get_dim_name(Context, isl_dim_param, i); 1953 auto *NameUserContext = isl_set_get_dim_name(UserContext, isl_dim_param, i); 1954 1955 if (strcmp(NameContext, NameUserContext) != 0) { 1956 auto SpaceStr = isl_space_to_str(Space); 1957 errs() << "Error: the name of dimension " << i 1958 << " provided in -polly-context " 1959 << "is '" << NameUserContext << "', but the name in the computed " 1960 << "context is '" << NameContext 1961 << "'. Due to this name mismatch, " 1962 << "the -polly-context option is ignored. Please provide " 1963 << "the context in the parameter space: " << SpaceStr << ".\n"; 1964 free(SpaceStr); 1965 isl_set_free(UserContext); 1966 isl_space_free(Space); 1967 return; 1968 } 1969 1970 UserContext = 1971 isl_set_set_dim_id(UserContext, isl_dim_param, i, 1972 isl_space_get_dim_id(Space, isl_dim_param, i)); 1973 } 1974 1975 Context = isl_set_intersect(Context, UserContext); 1976 isl_space_free(Space); 1977 } 1978 1979 void Scop::buildInvariantEquivalenceClasses() { 1980 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses; 1981 1982 const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads(); 1983 for (LoadInst *LInst : RIL) { 1984 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); 1985 1986 Type *Ty = LInst->getType(); 1987 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)]; 1988 if (ClassRep) { 1989 InvEquivClassVMap[LInst] = ClassRep; 1990 continue; 1991 } 1992 1993 ClassRep = LInst; 1994 InvariantEquivClasses.emplace_back( 1995 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty}); 1996 } 1997 } 1998 1999 void Scop::buildContext() { 2000 isl_space *Space = isl_space_params_alloc(getIslCtx(), 0); 2001 Context = isl_set_universe(isl_space_copy(Space)); 2002 InvalidContext = isl_set_empty(isl_space_copy(Space)); 2003 AssumedContext = isl_set_universe(Space); 2004 } 2005 2006 void Scop::addParameterBounds() { 2007 unsigned PDim = 0; 2008 for (auto *Parameter : Parameters) { 2009 ConstantRange SRange = SE->getSignedRange(Parameter); 2010 Context = addRangeBoundsToSet(Context, SRange, PDim++, isl_dim_param); 2011 } 2012 } 2013 2014 void Scop::realignParams() { 2015 // Add all parameters into a common model. 2016 isl_space *Space = isl_space_params_alloc(getIslCtx(), ParameterIds.size()); 2017 2018 unsigned PDim = 0; 2019 for (const auto *Parameter : Parameters) { 2020 isl_id *id = getIdForParam(Parameter); 2021 Space = isl_space_set_dim_id(Space, isl_dim_param, PDim++, id); 2022 } 2023 2024 // Align the parameters of all data structures to the model. 2025 Context = isl_set_align_params(Context, Space); 2026 2027 // As all parameters are known add bounds to them. 2028 addParameterBounds(); 2029 2030 for (ScopStmt &Stmt : *this) 2031 Stmt.realignParams(); 2032 2033 // Simplify the schedule according to the context too. 2034 Schedule = isl_schedule_gist_domain_params(Schedule, getContext()); 2035 } 2036 2037 static __isl_give isl_set * 2038 simplifyAssumptionContext(__isl_take isl_set *AssumptionContext, 2039 const Scop &S) { 2040 // If we have modeled all blocks in the SCoP that have side effects we can 2041 // simplify the context with the constraints that are needed for anything to 2042 // be executed at all. However, if we have error blocks in the SCoP we already 2043 // assumed some parameter combinations cannot occur and removed them from the 2044 // domains, thus we cannot use the remaining domain to simplify the 2045 // assumptions. 2046 if (!S.hasErrorBlock()) { 2047 isl_set *DomainParameters = isl_union_set_params(S.getDomains()); 2048 AssumptionContext = 2049 isl_set_gist_params(AssumptionContext, DomainParameters); 2050 } 2051 2052 AssumptionContext = isl_set_gist_params(AssumptionContext, S.getContext()); 2053 return AssumptionContext; 2054 } 2055 2056 void Scop::simplifyContexts() { 2057 // The parameter constraints of the iteration domains give us a set of 2058 // constraints that need to hold for all cases where at least a single 2059 // statement iteration is executed in the whole scop. We now simplify the 2060 // assumed context under the assumption that such constraints hold and at 2061 // least a single statement iteration is executed. For cases where no 2062 // statement instances are executed, the assumptions we have taken about 2063 // the executed code do not matter and can be changed. 2064 // 2065 // WARNING: This only holds if the assumptions we have taken do not reduce 2066 // the set of statement instances that are executed. Otherwise we 2067 // may run into a case where the iteration domains suggest that 2068 // for a certain set of parameter constraints no code is executed, 2069 // but in the original program some computation would have been 2070 // performed. In such a case, modifying the run-time conditions and 2071 // possibly influencing the run-time check may cause certain scops 2072 // to not be executed. 2073 // 2074 // Example: 2075 // 2076 // When delinearizing the following code: 2077 // 2078 // for (long i = 0; i < 100; i++) 2079 // for (long j = 0; j < m; j++) 2080 // A[i+p][j] = 1.0; 2081 // 2082 // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as 2083 // otherwise we would access out of bound data. Now, knowing that code is 2084 // only executed for the case m >= 0, it is sufficient to assume p >= 0. 2085 AssumedContext = simplifyAssumptionContext(AssumedContext, *this); 2086 InvalidContext = isl_set_align_params(InvalidContext, getParamSpace()); 2087 } 2088 2089 /// Add the minimal/maximal access in @p Set to @p User. 2090 static isl_stat buildMinMaxAccess(__isl_take isl_set *Set, void *User) { 2091 Scop::MinMaxVectorTy *MinMaxAccesses = (Scop::MinMaxVectorTy *)User; 2092 isl_pw_multi_aff *MinPMA, *MaxPMA; 2093 isl_pw_aff *LastDimAff; 2094 isl_aff *OneAff; 2095 unsigned Pos; 2096 2097 Set = isl_set_remove_divs(Set); 2098 2099 if (isl_set_n_basic_set(Set) >= MaxDisjunctionsInDomain) { 2100 isl_set_free(Set); 2101 return isl_stat_error; 2102 } 2103 2104 // Restrict the number of parameters involved in the access as the lexmin/ 2105 // lexmax computation will take too long if this number is high. 2106 // 2107 // Experiments with a simple test case using an i7 4800MQ: 2108 // 2109 // #Parameters involved | Time (in sec) 2110 // 6 | 0.01 2111 // 7 | 0.04 2112 // 8 | 0.12 2113 // 9 | 0.40 2114 // 10 | 1.54 2115 // 11 | 6.78 2116 // 12 | 30.38 2117 // 2118 if (isl_set_n_param(Set) > RunTimeChecksMaxParameters) { 2119 unsigned InvolvedParams = 0; 2120 for (unsigned u = 0, e = isl_set_n_param(Set); u < e; u++) 2121 if (isl_set_involves_dims(Set, isl_dim_param, u, 1)) 2122 InvolvedParams++; 2123 2124 if (InvolvedParams > RunTimeChecksMaxParameters) { 2125 isl_set_free(Set); 2126 return isl_stat_error; 2127 } 2128 } 2129 2130 MinPMA = isl_set_lexmin_pw_multi_aff(isl_set_copy(Set)); 2131 MaxPMA = isl_set_lexmax_pw_multi_aff(isl_set_copy(Set)); 2132 2133 MinPMA = isl_pw_multi_aff_coalesce(MinPMA); 2134 MaxPMA = isl_pw_multi_aff_coalesce(MaxPMA); 2135 2136 // Adjust the last dimension of the maximal access by one as we want to 2137 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer 2138 // we test during code generation might now point after the end of the 2139 // allocated array but we will never dereference it anyway. 2140 assert(isl_pw_multi_aff_dim(MaxPMA, isl_dim_out) && 2141 "Assumed at least one output dimension"); 2142 Pos = isl_pw_multi_aff_dim(MaxPMA, isl_dim_out) - 1; 2143 LastDimAff = isl_pw_multi_aff_get_pw_aff(MaxPMA, Pos); 2144 OneAff = isl_aff_zero_on_domain( 2145 isl_local_space_from_space(isl_pw_aff_get_domain_space(LastDimAff))); 2146 OneAff = isl_aff_add_constant_si(OneAff, 1); 2147 LastDimAff = isl_pw_aff_add(LastDimAff, isl_pw_aff_from_aff(OneAff)); 2148 MaxPMA = isl_pw_multi_aff_set_pw_aff(MaxPMA, Pos, LastDimAff); 2149 2150 MinMaxAccesses->push_back(std::make_pair(MinPMA, MaxPMA)); 2151 2152 isl_set_free(Set); 2153 return isl_stat_ok; 2154 } 2155 2156 static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) { 2157 isl_set *Domain = MA->getStatement()->getDomain(); 2158 Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain)); 2159 return isl_set_reset_tuple_id(Domain); 2160 } 2161 2162 /// Wrapper function to calculate minimal/maximal accesses to each array. 2163 static bool calculateMinMaxAccess(__isl_take isl_union_map *Accesses, 2164 __isl_take isl_union_set *Domains, 2165 Scop::MinMaxVectorTy &MinMaxAccesses) { 2166 2167 Accesses = isl_union_map_intersect_domain(Accesses, Domains); 2168 isl_union_set *Locations = isl_union_map_range(Accesses); 2169 Locations = isl_union_set_coalesce(Locations); 2170 Locations = isl_union_set_detect_equalities(Locations); 2171 bool Valid = (0 == isl_union_set_foreach_set(Locations, buildMinMaxAccess, 2172 &MinMaxAccesses)); 2173 isl_union_set_free(Locations); 2174 return Valid; 2175 } 2176 2177 /// Helper to treat non-affine regions and basic blocks the same. 2178 /// 2179 ///{ 2180 2181 /// Return the block that is the representing block for @p RN. 2182 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { 2183 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry() 2184 : RN->getNodeAs<BasicBlock>(); 2185 } 2186 2187 /// Return the @p idx'th block that is executed after @p RN. 2188 static inline BasicBlock * 2189 getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) { 2190 if (RN->isSubRegion()) { 2191 assert(idx == 0); 2192 return RN->getNodeAs<Region>()->getExit(); 2193 } 2194 return TI->getSuccessor(idx); 2195 } 2196 2197 /// Return the smallest loop surrounding @p RN. 2198 static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) { 2199 if (!RN->isSubRegion()) 2200 return LI.getLoopFor(RN->getNodeAs<BasicBlock>()); 2201 2202 Region *NonAffineSubRegion = RN->getNodeAs<Region>(); 2203 Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry()); 2204 while (L && NonAffineSubRegion->contains(L)) 2205 L = L->getParentLoop(); 2206 return L; 2207 } 2208 2209 static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) { 2210 if (!RN->isSubRegion()) 2211 return 1; 2212 2213 Region *R = RN->getNodeAs<Region>(); 2214 return std::distance(R->block_begin(), R->block_end()); 2215 } 2216 2217 static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI, 2218 const DominatorTree &DT) { 2219 if (!RN->isSubRegion()) 2220 return isErrorBlock(*RN->getNodeAs<BasicBlock>(), R, LI, DT); 2221 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks()) 2222 if (isErrorBlock(*BB, R, LI, DT)) 2223 return true; 2224 return false; 2225 } 2226 2227 ///} 2228 2229 static inline __isl_give isl_set *addDomainDimId(__isl_take isl_set *Domain, 2230 unsigned Dim, Loop *L) { 2231 Domain = isl_set_lower_bound_si(Domain, isl_dim_set, Dim, -1); 2232 isl_id *DimId = 2233 isl_id_alloc(isl_set_get_ctx(Domain), nullptr, static_cast<void *>(L)); 2234 return isl_set_set_dim_id(Domain, isl_dim_set, Dim, DimId); 2235 } 2236 2237 __isl_give isl_set *Scop::getDomainConditions(const ScopStmt *Stmt) const { 2238 return getDomainConditions(Stmt->getEntryBlock()); 2239 } 2240 2241 __isl_give isl_set *Scop::getDomainConditions(BasicBlock *BB) const { 2242 auto DIt = DomainMap.find(BB); 2243 if (DIt != DomainMap.end()) 2244 return isl_set_copy(DIt->getSecond()); 2245 2246 auto &RI = *R.getRegionInfo(); 2247 auto *BBR = RI.getRegionFor(BB); 2248 while (BBR->getEntry() == BB) 2249 BBR = BBR->getParent(); 2250 return getDomainConditions(BBR->getEntry()); 2251 } 2252 2253 bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI) { 2254 2255 bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R); 2256 auto *EntryBB = R->getEntry(); 2257 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB); 2258 int LD = getRelativeLoopDepth(L); 2259 auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx(), 0, LD + 1)); 2260 2261 while (LD-- >= 0) { 2262 S = addDomainDimId(S, LD + 1, L); 2263 L = L->getParentLoop(); 2264 } 2265 2266 // Initialize the invalid domain. 2267 auto *EntryStmt = getStmtFor(EntryBB); 2268 EntryStmt->setInvalidDomain(isl_set_empty(isl_set_get_space(S))); 2269 2270 DomainMap[EntryBB] = S; 2271 2272 if (IsOnlyNonAffineRegion) 2273 return !containsErrorBlock(R->getNode(), *R, LI, DT); 2274 2275 if (!buildDomainsWithBranchConstraints(R, DT, LI)) 2276 return false; 2277 2278 if (!propagateDomainConstraints(R, DT, LI)) 2279 return false; 2280 2281 // Error blocks and blocks dominated by them have been assumed to never be 2282 // executed. Representing them in the Scop does not add any value. In fact, 2283 // it is likely to cause issues during construction of the ScopStmts. The 2284 // contents of error blocks have not been verified to be expressible and 2285 // will cause problems when building up a ScopStmt for them. 2286 // Furthermore, basic blocks dominated by error blocks may reference 2287 // instructions in the error block which, if the error block is not modeled, 2288 // can themselves not be constructed properly. To this end we will replace 2289 // the domains of error blocks and those only reachable via error blocks 2290 // with an empty set. Additionally, we will record for each block under which 2291 // parameter combination it would be reached via an error block in its 2292 // InvalidDomain. This information is needed during load hoisting. 2293 if (!propagateInvalidStmtDomains(R, DT, LI)) 2294 return false; 2295 2296 return true; 2297 } 2298 2299 // If the loop is nonaffine/boxed, return the first non-boxed surrounding loop 2300 // for Polly. If the loop is affine, return the loop itself. Do not call 2301 // `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need 2302 // to analyze the memory accesses of the nonaffine/boxed loops. 2303 static Loop *getFirstNonBoxedLoopFor(BasicBlock *BB, LoopInfo &LI, 2304 const BoxedLoopsSetTy &BoxedLoops) { 2305 auto *L = LI.getLoopFor(BB); 2306 while (BoxedLoops.count(L)) 2307 L = L->getParentLoop(); 2308 return L; 2309 } 2310 2311 /// Adjust the dimensions of @p Dom that was constructed for @p OldL 2312 /// to be compatible to domains constructed for loop @p NewL. 2313 /// 2314 /// This function assumes @p NewL and @p OldL are equal or there is a CFG 2315 /// edge from @p OldL to @p NewL. 2316 static __isl_give isl_set *adjustDomainDimensions(Scop &S, 2317 __isl_take isl_set *Dom, 2318 Loop *OldL, Loop *NewL) { 2319 2320 // If the loops are the same there is nothing to do. 2321 if (NewL == OldL) 2322 return Dom; 2323 2324 int OldDepth = S.getRelativeLoopDepth(OldL); 2325 int NewDepth = S.getRelativeLoopDepth(NewL); 2326 // If both loops are non-affine loops there is nothing to do. 2327 if (OldDepth == -1 && NewDepth == -1) 2328 return Dom; 2329 2330 // Distinguish three cases: 2331 // 1) The depth is the same but the loops are not. 2332 // => One loop was left one was entered. 2333 // 2) The depth increased from OldL to NewL. 2334 // => One loop was entered, none was left. 2335 // 3) The depth decreased from OldL to NewL. 2336 // => Loops were left were difference of the depths defines how many. 2337 if (OldDepth == NewDepth) { 2338 assert(OldL->getParentLoop() == NewL->getParentLoop()); 2339 Dom = isl_set_project_out(Dom, isl_dim_set, NewDepth, 1); 2340 Dom = isl_set_add_dims(Dom, isl_dim_set, 1); 2341 Dom = addDomainDimId(Dom, NewDepth, NewL); 2342 } else if (OldDepth < NewDepth) { 2343 assert(OldDepth + 1 == NewDepth); 2344 auto &R = S.getRegion(); 2345 (void)R; 2346 assert(NewL->getParentLoop() == OldL || 2347 ((!OldL || !R.contains(OldL)) && R.contains(NewL))); 2348 Dom = isl_set_add_dims(Dom, isl_dim_set, 1); 2349 Dom = addDomainDimId(Dom, NewDepth, NewL); 2350 } else { 2351 assert(OldDepth > NewDepth); 2352 int Diff = OldDepth - NewDepth; 2353 int NumDim = isl_set_n_dim(Dom); 2354 assert(NumDim >= Diff); 2355 Dom = isl_set_project_out(Dom, isl_dim_set, NumDim - Diff, Diff); 2356 } 2357 2358 return Dom; 2359 } 2360 2361 bool Scop::propagateInvalidStmtDomains(Region *R, DominatorTree &DT, 2362 LoopInfo &LI) { 2363 auto &BoxedLoops = getBoxedLoops(); 2364 2365 ReversePostOrderTraversal<Region *> RTraversal(R); 2366 for (auto *RN : RTraversal) { 2367 2368 // Recurse for affine subregions but go on for basic blocks and non-affine 2369 // subregions. 2370 if (RN->isSubRegion()) { 2371 Region *SubRegion = RN->getNodeAs<Region>(); 2372 if (!isNonAffineSubRegion(SubRegion)) { 2373 propagateInvalidStmtDomains(SubRegion, DT, LI); 2374 continue; 2375 } 2376 } 2377 2378 bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT); 2379 BasicBlock *BB = getRegionNodeBasicBlock(RN); 2380 ScopStmt *Stmt = getStmtFor(BB); 2381 isl_set *&Domain = DomainMap[BB]; 2382 assert(Domain && "Cannot propagate a nullptr"); 2383 2384 auto *InvalidDomain = Stmt->getInvalidDomain(); 2385 bool IsInvalidBlock = 2386 ContainsErrorBlock || isl_set_is_subset(Domain, InvalidDomain); 2387 2388 if (!IsInvalidBlock) { 2389 InvalidDomain = isl_set_intersect(InvalidDomain, isl_set_copy(Domain)); 2390 } else { 2391 isl_set_free(InvalidDomain); 2392 InvalidDomain = Domain; 2393 isl_set *DomPar = isl_set_params(isl_set_copy(Domain)); 2394 recordAssumption(ERRORBLOCK, DomPar, BB->getTerminator()->getDebugLoc(), 2395 AS_RESTRICTION); 2396 Domain = nullptr; 2397 } 2398 2399 if (isl_set_is_empty(InvalidDomain)) { 2400 Stmt->setInvalidDomain(InvalidDomain); 2401 continue; 2402 } 2403 2404 auto *BBLoop = getRegionNodeLoop(RN, LI); 2405 auto *TI = BB->getTerminator(); 2406 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); 2407 for (unsigned u = 0; u < NumSuccs; u++) { 2408 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); 2409 auto *SuccStmt = getStmtFor(SuccBB); 2410 2411 // Skip successors outside the SCoP. 2412 if (!SuccStmt) 2413 continue; 2414 2415 // Skip backedges. 2416 if (DT.dominates(SuccBB, BB)) 2417 continue; 2418 2419 auto *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, BoxedLoops); 2420 auto *AdjustedInvalidDomain = adjustDomainDimensions( 2421 *this, isl_set_copy(InvalidDomain), BBLoop, SuccBBLoop); 2422 auto *SuccInvalidDomain = SuccStmt->getInvalidDomain(); 2423 SuccInvalidDomain = 2424 isl_set_union(SuccInvalidDomain, AdjustedInvalidDomain); 2425 SuccInvalidDomain = isl_set_coalesce(SuccInvalidDomain); 2426 unsigned NumConjucts = isl_set_n_basic_set(SuccInvalidDomain); 2427 SuccStmt->setInvalidDomain(SuccInvalidDomain); 2428 2429 // Check if the maximal number of domain disjunctions was reached. 2430 // In case this happens we will bail. 2431 if (NumConjucts < MaxDisjunctionsInDomain) 2432 continue; 2433 2434 isl_set_free(InvalidDomain); 2435 invalidate(COMPLEXITY, TI->getDebugLoc()); 2436 return false; 2437 } 2438 2439 Stmt->setInvalidDomain(InvalidDomain); 2440 } 2441 2442 return true; 2443 } 2444 2445 void Scop::propagateDomainConstraintsToRegionExit( 2446 BasicBlock *BB, Loop *BBLoop, 2447 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, LoopInfo &LI) { 2448 2449 // Check if the block @p BB is the entry of a region. If so we propagate it's 2450 // domain to the exit block of the region. Otherwise we are done. 2451 auto *RI = R.getRegionInfo(); 2452 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; 2453 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr; 2454 if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB)) 2455 return; 2456 2457 auto &BoxedLoops = getBoxedLoops(); 2458 // Do not propagate the domain if there is a loop backedge inside the region 2459 // that would prevent the exit block from being executed. 2460 auto *L = BBLoop; 2461 while (L && contains(L)) { 2462 SmallVector<BasicBlock *, 4> LatchBBs; 2463 BBLoop->getLoopLatches(LatchBBs); 2464 for (auto *LatchBB : LatchBBs) 2465 if (BB != LatchBB && BBReg->contains(LatchBB)) 2466 return; 2467 L = L->getParentLoop(); 2468 } 2469 2470 auto *Domain = DomainMap[BB]; 2471 assert(Domain && "Cannot propagate a nullptr"); 2472 2473 auto *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, BoxedLoops); 2474 2475 // Since the dimensions of @p BB and @p ExitBB might be different we have to 2476 // adjust the domain before we can propagate it. 2477 auto *AdjustedDomain = 2478 adjustDomainDimensions(*this, isl_set_copy(Domain), BBLoop, ExitBBLoop); 2479 auto *&ExitDomain = DomainMap[ExitBB]; 2480 2481 // If the exit domain is not yet created we set it otherwise we "add" the 2482 // current domain. 2483 ExitDomain = 2484 ExitDomain ? isl_set_union(AdjustedDomain, ExitDomain) : AdjustedDomain; 2485 2486 // Initialize the invalid domain. 2487 auto *ExitStmt = getStmtFor(ExitBB); 2488 ExitStmt->setInvalidDomain(isl_set_empty(isl_set_get_space(ExitDomain))); 2489 2490 FinishedExitBlocks.insert(ExitBB); 2491 } 2492 2493 bool Scop::buildDomainsWithBranchConstraints(Region *R, DominatorTree &DT, 2494 LoopInfo &LI) { 2495 // To create the domain for each block in R we iterate over all blocks and 2496 // subregions in R and propagate the conditions under which the current region 2497 // element is executed. To this end we iterate in reverse post order over R as 2498 // it ensures that we first visit all predecessors of a region node (either a 2499 // basic block or a subregion) before we visit the region node itself. 2500 // Initially, only the domain for the SCoP region entry block is set and from 2501 // there we propagate the current domain to all successors, however we add the 2502 // condition that the successor is actually executed next. 2503 // As we are only interested in non-loop carried constraints here we can 2504 // simply skip loop back edges. 2505 2506 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks; 2507 ReversePostOrderTraversal<Region *> RTraversal(R); 2508 for (auto *RN : RTraversal) { 2509 2510 // Recurse for affine subregions but go on for basic blocks and non-affine 2511 // subregions. 2512 if (RN->isSubRegion()) { 2513 Region *SubRegion = RN->getNodeAs<Region>(); 2514 if (!isNonAffineSubRegion(SubRegion)) { 2515 if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI)) 2516 return false; 2517 continue; 2518 } 2519 } 2520 2521 if (containsErrorBlock(RN, getRegion(), LI, DT)) 2522 HasErrorBlock = true; 2523 2524 BasicBlock *BB = getRegionNodeBasicBlock(RN); 2525 TerminatorInst *TI = BB->getTerminator(); 2526 2527 if (isa<UnreachableInst>(TI)) 2528 continue; 2529 2530 isl_set *Domain = DomainMap.lookup(BB); 2531 if (!Domain) 2532 continue; 2533 MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain)); 2534 2535 auto *BBLoop = getRegionNodeLoop(RN, LI); 2536 // Propagate the domain from BB directly to blocks that have a superset 2537 // domain, at the moment only region exit nodes of regions that start in BB. 2538 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI); 2539 2540 // If all successors of BB have been set a domain through the propagation 2541 // above we do not need to build condition sets but can just skip this 2542 // block. However, it is important to note that this is a local property 2543 // with regards to the region @p R. To this end FinishedExitBlocks is a 2544 // local variable. 2545 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { 2546 return FinishedExitBlocks.count(SuccBB); 2547 }; 2548 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) 2549 continue; 2550 2551 // Build the condition sets for the successor nodes of the current region 2552 // node. If it is a non-affine subregion we will always execute the single 2553 // exit node, hence the single entry node domain is the condition set. For 2554 // basic blocks we use the helper function buildConditionSets. 2555 SmallVector<isl_set *, 8> ConditionSets; 2556 if (RN->isSubRegion()) 2557 ConditionSets.push_back(isl_set_copy(Domain)); 2558 else if (!buildConditionSets(*getStmtFor(BB), TI, BBLoop, Domain, 2559 ConditionSets)) 2560 return false; 2561 2562 // Now iterate over the successors and set their initial domain based on 2563 // their condition set. We skip back edges here and have to be careful when 2564 // we leave a loop not to keep constraints over a dimension that doesn't 2565 // exist anymore. 2566 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size()); 2567 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) { 2568 isl_set *CondSet = ConditionSets[u]; 2569 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u); 2570 2571 auto *SuccStmt = getStmtFor(SuccBB); 2572 // Skip blocks outside the region. 2573 if (!SuccStmt) { 2574 isl_set_free(CondSet); 2575 continue; 2576 } 2577 2578 // If we propagate the domain of some block to "SuccBB" we do not have to 2579 // adjust the domain. 2580 if (FinishedExitBlocks.count(SuccBB)) { 2581 isl_set_free(CondSet); 2582 continue; 2583 } 2584 2585 // Skip back edges. 2586 if (DT.dominates(SuccBB, BB)) { 2587 isl_set_free(CondSet); 2588 continue; 2589 } 2590 2591 auto &BoxedLoops = getBoxedLoops(); 2592 auto *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, BoxedLoops); 2593 CondSet = adjustDomainDimensions(*this, CondSet, BBLoop, SuccBBLoop); 2594 2595 // Set the domain for the successor or merge it with an existing domain in 2596 // case there are multiple paths (without loop back edges) to the 2597 // successor block. 2598 isl_set *&SuccDomain = DomainMap[SuccBB]; 2599 2600 if (SuccDomain) { 2601 SuccDomain = isl_set_coalesce(isl_set_union(SuccDomain, CondSet)); 2602 } else { 2603 // Initialize the invalid domain. 2604 SuccStmt->setInvalidDomain(isl_set_empty(isl_set_get_space(CondSet))); 2605 SuccDomain = CondSet; 2606 } 2607 2608 // Check if the maximal number of domain disjunctions was reached. 2609 // In case this happens we will clean up and bail. 2610 if (isl_set_n_basic_set(SuccDomain) < MaxDisjunctionsInDomain) 2611 continue; 2612 2613 invalidate(COMPLEXITY, DebugLoc()); 2614 while (++u < ConditionSets.size()) 2615 isl_set_free(ConditionSets[u]); 2616 return false; 2617 } 2618 } 2619 2620 return true; 2621 } 2622 2623 __isl_give isl_set * 2624 Scop::getPredecessorDomainConstraints(BasicBlock *BB, 2625 __isl_keep isl_set *Domain, 2626 DominatorTree &DT, LoopInfo &LI) { 2627 // If @p BB is the ScopEntry we are done 2628 if (R.getEntry() == BB) 2629 return isl_set_universe(isl_set_get_space(Domain)); 2630 2631 // The set of boxed loops (loops in non-affine subregions) for this SCoP. 2632 auto &BoxedLoops = getBoxedLoops(); 2633 2634 // The region info of this function. 2635 auto &RI = *R.getRegionInfo(); 2636 2637 auto *BBLoop = getFirstNonBoxedLoopFor(BB, LI, BoxedLoops); 2638 2639 // A domain to collect all predecessor domains, thus all conditions under 2640 // which the block is executed. To this end we start with the empty domain. 2641 isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain)); 2642 2643 // Set of regions of which the entry block domain has been propagated to BB. 2644 // all predecessors inside any of the regions can be skipped. 2645 SmallSet<Region *, 8> PropagatedRegions; 2646 2647 for (auto *PredBB : predecessors(BB)) { 2648 // Skip backedges. 2649 if (DT.dominates(BB, PredBB)) 2650 continue; 2651 2652 // If the predecessor is in a region we used for propagation we can skip it. 2653 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; 2654 if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(), 2655 PredBBInRegion)) { 2656 continue; 2657 } 2658 2659 // Check if there is a valid region we can use for propagation, thus look 2660 // for a region that contains the predecessor and has @p BB as exit block. 2661 auto *PredR = RI.getRegionFor(PredBB); 2662 while (PredR->getExit() != BB && !PredR->contains(BB)) 2663 PredR->getParent(); 2664 2665 // If a valid region for propagation was found use the entry of that region 2666 // for propagation, otherwise the PredBB directly. 2667 if (PredR->getExit() == BB) { 2668 PredBB = PredR->getEntry(); 2669 PropagatedRegions.insert(PredR); 2670 } 2671 2672 auto *PredBBDom = getDomainConditions(PredBB); 2673 auto *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, BoxedLoops); 2674 PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop); 2675 2676 PredDom = isl_set_union(PredDom, PredBBDom); 2677 } 2678 2679 return PredDom; 2680 } 2681 2682 bool Scop::propagateDomainConstraints(Region *R, DominatorTree &DT, 2683 LoopInfo &LI) { 2684 // Iterate over the region R and propagate the domain constrains from the 2685 // predecessors to the current node. In contrast to the 2686 // buildDomainsWithBranchConstraints function, this one will pull the domain 2687 // information from the predecessors instead of pushing it to the successors. 2688 // Additionally, we assume the domains to be already present in the domain 2689 // map here. However, we iterate again in reverse post order so we know all 2690 // predecessors have been visited before a block or non-affine subregion is 2691 // visited. 2692 2693 ReversePostOrderTraversal<Region *> RTraversal(R); 2694 for (auto *RN : RTraversal) { 2695 2696 // Recurse for affine subregions but go on for basic blocks and non-affine 2697 // subregions. 2698 if (RN->isSubRegion()) { 2699 Region *SubRegion = RN->getNodeAs<Region>(); 2700 if (!isNonAffineSubRegion(SubRegion)) { 2701 if (!propagateDomainConstraints(SubRegion, DT, LI)) 2702 return false; 2703 continue; 2704 } 2705 } 2706 2707 BasicBlock *BB = getRegionNodeBasicBlock(RN); 2708 isl_set *&Domain = DomainMap[BB]; 2709 assert(Domain); 2710 2711 // Under the union of all predecessor conditions we can reach this block. 2712 auto *PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI); 2713 Domain = isl_set_coalesce(isl_set_intersect(Domain, PredDom)); 2714 Domain = isl_set_align_params(Domain, getParamSpace()); 2715 2716 Loop *BBLoop = getRegionNodeLoop(RN, LI); 2717 if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop)) 2718 if (!addLoopBoundsToHeaderDomain(BBLoop, LI)) 2719 return false; 2720 } 2721 2722 return true; 2723 } 2724 2725 /// Create a map to map from a given iteration to a subsequent iteration. 2726 /// 2727 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim 2728 /// is incremented by one and all other dimensions are equal, e.g., 2729 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3] 2730 /// 2731 /// if @p Dim is 2 and @p SetSpace has 4 dimensions. 2732 static __isl_give isl_map * 2733 createNextIterationMap(__isl_take isl_space *SetSpace, unsigned Dim) { 2734 auto *MapSpace = isl_space_map_from_set(SetSpace); 2735 auto *NextIterationMap = isl_map_universe(isl_space_copy(MapSpace)); 2736 for (unsigned u = 0; u < isl_map_n_in(NextIterationMap); u++) 2737 if (u != Dim) 2738 NextIterationMap = 2739 isl_map_equate(NextIterationMap, isl_dim_in, u, isl_dim_out, u); 2740 auto *C = isl_constraint_alloc_equality(isl_local_space_from_space(MapSpace)); 2741 C = isl_constraint_set_constant_si(C, 1); 2742 C = isl_constraint_set_coefficient_si(C, isl_dim_in, Dim, 1); 2743 C = isl_constraint_set_coefficient_si(C, isl_dim_out, Dim, -1); 2744 NextIterationMap = isl_map_add_constraint(NextIterationMap, C); 2745 return NextIterationMap; 2746 } 2747 2748 bool Scop::addLoopBoundsToHeaderDomain(Loop *L, LoopInfo &LI) { 2749 int LoopDepth = getRelativeLoopDepth(L); 2750 assert(LoopDepth >= 0 && "Loop in region should have at least depth one"); 2751 2752 BasicBlock *HeaderBB = L->getHeader(); 2753 assert(DomainMap.count(HeaderBB)); 2754 isl_set *&HeaderBBDom = DomainMap[HeaderBB]; 2755 2756 isl_map *NextIterationMap = 2757 createNextIterationMap(isl_set_get_space(HeaderBBDom), LoopDepth); 2758 2759 isl_set *UnionBackedgeCondition = 2760 isl_set_empty(isl_set_get_space(HeaderBBDom)); 2761 2762 SmallVector<llvm::BasicBlock *, 4> LatchBlocks; 2763 L->getLoopLatches(LatchBlocks); 2764 2765 for (BasicBlock *LatchBB : LatchBlocks) { 2766 2767 // If the latch is only reachable via error statements we skip it. 2768 isl_set *LatchBBDom = DomainMap.lookup(LatchBB); 2769 if (!LatchBBDom) 2770 continue; 2771 2772 isl_set *BackedgeCondition = nullptr; 2773 2774 TerminatorInst *TI = LatchBB->getTerminator(); 2775 BranchInst *BI = dyn_cast<BranchInst>(TI); 2776 assert(BI && "Only branch instructions allowed in loop latches"); 2777 2778 if (BI->isUnconditional()) 2779 BackedgeCondition = isl_set_copy(LatchBBDom); 2780 else { 2781 SmallVector<isl_set *, 8> ConditionSets; 2782 int idx = BI->getSuccessor(0) != HeaderBB; 2783 if (!buildConditionSets(*getStmtFor(LatchBB), TI, L, LatchBBDom, 2784 ConditionSets)) { 2785 isl_map_free(NextIterationMap); 2786 isl_set_free(UnionBackedgeCondition); 2787 return false; 2788 } 2789 2790 // Free the non back edge condition set as we do not need it. 2791 isl_set_free(ConditionSets[1 - idx]); 2792 2793 BackedgeCondition = ConditionSets[idx]; 2794 } 2795 2796 int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB)); 2797 assert(LatchLoopDepth >= LoopDepth); 2798 BackedgeCondition = 2799 isl_set_project_out(BackedgeCondition, isl_dim_set, LoopDepth + 1, 2800 LatchLoopDepth - LoopDepth); 2801 UnionBackedgeCondition = 2802 isl_set_union(UnionBackedgeCondition, BackedgeCondition); 2803 } 2804 2805 isl_map *ForwardMap = isl_map_lex_le(isl_set_get_space(HeaderBBDom)); 2806 for (int i = 0; i < LoopDepth; i++) 2807 ForwardMap = isl_map_equate(ForwardMap, isl_dim_in, i, isl_dim_out, i); 2808 2809 isl_set *UnionBackedgeConditionComplement = 2810 isl_set_complement(UnionBackedgeCondition); 2811 UnionBackedgeConditionComplement = isl_set_lower_bound_si( 2812 UnionBackedgeConditionComplement, isl_dim_set, LoopDepth, 0); 2813 UnionBackedgeConditionComplement = 2814 isl_set_apply(UnionBackedgeConditionComplement, ForwardMap); 2815 HeaderBBDom = isl_set_subtract(HeaderBBDom, UnionBackedgeConditionComplement); 2816 HeaderBBDom = isl_set_apply(HeaderBBDom, NextIterationMap); 2817 2818 auto Parts = partitionSetParts(HeaderBBDom, LoopDepth); 2819 HeaderBBDom = Parts.second; 2820 2821 // Check if there is a <nsw> tagged AddRec for this loop and if so do not add 2822 // the bounded assumptions to the context as they are already implied by the 2823 // <nsw> tag. 2824 if (Affinator.hasNSWAddRecForLoop(L)) { 2825 isl_set_free(Parts.first); 2826 return true; 2827 } 2828 2829 isl_set *UnboundedCtx = isl_set_params(Parts.first); 2830 recordAssumption(INFINITELOOP, UnboundedCtx, 2831 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION); 2832 return true; 2833 } 2834 2835 MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) { 2836 auto *BaseAddr = SE->getSCEV(MA->getBaseAddr()); 2837 auto *PointerBase = dyn_cast<SCEVUnknown>(SE->getPointerBase(BaseAddr)); 2838 if (!PointerBase) 2839 return nullptr; 2840 2841 auto *PointerBaseInst = dyn_cast<Instruction>(PointerBase->getValue()); 2842 if (!PointerBaseInst) 2843 return nullptr; 2844 2845 auto *BasePtrStmt = getStmtFor(PointerBaseInst); 2846 if (!BasePtrStmt) 2847 return nullptr; 2848 2849 return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst); 2850 } 2851 2852 bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA, 2853 __isl_keep isl_union_map *Writes) { 2854 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) { 2855 auto *NHCtx = getNonHoistableCtx(BasePtrMA, Writes); 2856 bool Hoistable = NHCtx != nullptr; 2857 isl_set_free(NHCtx); 2858 return !Hoistable; 2859 } 2860 2861 auto *BaseAddr = SE->getSCEV(MA->getBaseAddr()); 2862 auto *PointerBase = dyn_cast<SCEVUnknown>(SE->getPointerBase(BaseAddr)); 2863 if (auto *BasePtrInst = dyn_cast<Instruction>(PointerBase->getValue())) 2864 if (!isa<LoadInst>(BasePtrInst)) 2865 return contains(BasePtrInst); 2866 2867 return false; 2868 } 2869 2870 bool Scop::buildAliasChecks(AliasAnalysis &AA) { 2871 if (!PollyUseRuntimeAliasChecks) 2872 return true; 2873 2874 if (buildAliasGroups(AA)) { 2875 // Aliasing assumptions do not go through addAssumption but we still want to 2876 // collect statistics so we do it here explicitly. 2877 if (MinMaxAliasGroups.size()) 2878 AssumptionsAliasing++; 2879 return true; 2880 } 2881 2882 // If a problem occurs while building the alias groups we need to delete 2883 // this SCoP and pretend it wasn't valid in the first place. To this end 2884 // we make the assumed context infeasible. 2885 invalidate(ALIASING, DebugLoc()); 2886 2887 DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << getNameStr() 2888 << " could not be created as the number of parameters involved " 2889 "is too high. The SCoP will be " 2890 "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust " 2891 "the maximal number of parameters but be advised that the " 2892 "compile time might increase exponentially.\n\n"); 2893 return false; 2894 } 2895 2896 bool Scop::buildAliasGroups(AliasAnalysis &AA) { 2897 // To create sound alias checks we perform the following steps: 2898 // o) Use the alias analysis and an alias set tracker to build alias sets 2899 // for all memory accesses inside the SCoP. 2900 // o) For each alias set we then map the aliasing pointers back to the 2901 // memory accesses we know, thus obtain groups of memory accesses which 2902 // might alias. 2903 // o) We divide each group based on the domains of the minimal/maximal 2904 // accesses. That means two minimal/maximal accesses are only in a group 2905 // if their access domains intersect, otherwise they are in different 2906 // ones. 2907 // o) We partition each group into read only and non read only accesses. 2908 // o) For each group with more than one base pointer we then compute minimal 2909 // and maximal accesses to each array of a group in read only and non 2910 // read only partitions separately. 2911 using AliasGroupTy = SmallVector<MemoryAccess *, 4>; 2912 2913 AliasSetTracker AST(AA); 2914 2915 DenseMap<Value *, MemoryAccess *> PtrToAcc; 2916 DenseSet<Value *> HasWriteAccess; 2917 for (ScopStmt &Stmt : *this) { 2918 2919 // Skip statements with an empty domain as they will never be executed. 2920 isl_set *StmtDomain = Stmt.getDomain(); 2921 bool StmtDomainEmpty = isl_set_is_empty(StmtDomain); 2922 isl_set_free(StmtDomain); 2923 if (StmtDomainEmpty) 2924 continue; 2925 2926 for (MemoryAccess *MA : Stmt) { 2927 if (MA->isScalarKind()) 2928 continue; 2929 if (!MA->isRead()) 2930 HasWriteAccess.insert(MA->getBaseAddr()); 2931 MemAccInst Acc(MA->getAccessInstruction()); 2932 if (MA->isRead() && isa<MemTransferInst>(Acc)) 2933 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA; 2934 else 2935 PtrToAcc[Acc.getPointerOperand()] = MA; 2936 AST.add(Acc); 2937 } 2938 } 2939 2940 SmallVector<AliasGroupTy, 4> AliasGroups; 2941 for (AliasSet &AS : AST) { 2942 if (AS.isMustAlias() || AS.isForwardingAliasSet()) 2943 continue; 2944 AliasGroupTy AG; 2945 for (auto &PR : AS) 2946 AG.push_back(PtrToAcc[PR.getValue()]); 2947 if (AG.size() < 2) 2948 continue; 2949 AliasGroups.push_back(std::move(AG)); 2950 } 2951 2952 // Split the alias groups based on their domain. 2953 for (unsigned u = 0; u < AliasGroups.size(); u++) { 2954 AliasGroupTy NewAG; 2955 AliasGroupTy &AG = AliasGroups[u]; 2956 AliasGroupTy::iterator AGI = AG.begin(); 2957 isl_set *AGDomain = getAccessDomain(*AGI); 2958 while (AGI != AG.end()) { 2959 MemoryAccess *MA = *AGI; 2960 isl_set *MADomain = getAccessDomain(MA); 2961 if (isl_set_is_disjoint(AGDomain, MADomain)) { 2962 NewAG.push_back(MA); 2963 AGI = AG.erase(AGI); 2964 isl_set_free(MADomain); 2965 } else { 2966 AGDomain = isl_set_union(AGDomain, MADomain); 2967 AGI++; 2968 } 2969 } 2970 if (NewAG.size() > 1) 2971 AliasGroups.push_back(std::move(NewAG)); 2972 isl_set_free(AGDomain); 2973 } 2974 2975 auto &F = getFunction(); 2976 MapVector<const Value *, SmallSetVector<MemoryAccess *, 8>> ReadOnlyPairs; 2977 SmallPtrSet<const Value *, 4> NonReadOnlyBaseValues; 2978 for (AliasGroupTy &AG : AliasGroups) { 2979 NonReadOnlyBaseValues.clear(); 2980 ReadOnlyPairs.clear(); 2981 2982 if (AG.size() < 2) { 2983 AG.clear(); 2984 continue; 2985 } 2986 2987 for (auto II = AG.begin(); II != AG.end();) { 2988 emitOptimizationRemarkAnalysis( 2989 F.getContext(), DEBUG_TYPE, F, 2990 (*II)->getAccessInstruction()->getDebugLoc(), 2991 "Possibly aliasing pointer, use restrict keyword."); 2992 2993 Value *BaseAddr = (*II)->getBaseAddr(); 2994 if (HasWriteAccess.count(BaseAddr)) { 2995 NonReadOnlyBaseValues.insert(BaseAddr); 2996 II++; 2997 } else { 2998 ReadOnlyPairs[BaseAddr].insert(*II); 2999 II = AG.erase(II); 3000 } 3001 } 3002 3003 // If we don't have read only pointers check if there are at least two 3004 // non read only pointers, otherwise clear the alias group. 3005 if (ReadOnlyPairs.empty() && NonReadOnlyBaseValues.size() <= 1) { 3006 AG.clear(); 3007 continue; 3008 } 3009 3010 // If we don't have non read only pointers clear the alias group. 3011 if (NonReadOnlyBaseValues.empty()) { 3012 AG.clear(); 3013 continue; 3014 } 3015 3016 // Check if we have non-affine accesses left, if so bail out as we cannot 3017 // generate a good access range yet. 3018 for (auto *MA : AG) { 3019 if (!MA->isAffine()) { 3020 invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc()); 3021 return false; 3022 } 3023 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) 3024 addRequiredInvariantLoad( 3025 cast<LoadInst>(BasePtrMA->getAccessInstruction())); 3026 } 3027 for (auto &ReadOnlyPair : ReadOnlyPairs) 3028 for (auto *MA : ReadOnlyPair.second) { 3029 if (!MA->isAffine()) { 3030 invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc()); 3031 return false; 3032 } 3033 if (auto *BasePtrMA = lookupBasePtrAccess(MA)) 3034 addRequiredInvariantLoad( 3035 cast<LoadInst>(BasePtrMA->getAccessInstruction())); 3036 } 3037 3038 // Calculate minimal and maximal accesses for non read only accesses. 3039 MinMaxAliasGroups.emplace_back(); 3040 MinMaxVectorPairTy &pair = MinMaxAliasGroups.back(); 3041 MinMaxVectorTy &MinMaxAccessesNonReadOnly = pair.first; 3042 MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second; 3043 MinMaxAccessesNonReadOnly.reserve(AG.size()); 3044 3045 isl_union_map *Accesses = isl_union_map_empty(getParamSpace()); 3046 3047 // AG contains only non read only accesses. 3048 for (MemoryAccess *MA : AG) 3049 Accesses = isl_union_map_add_map(Accesses, MA->getAccessRelation()); 3050 3051 bool Valid = calculateMinMaxAccess(Accesses, getDomains(), 3052 MinMaxAccessesNonReadOnly); 3053 3054 // Bail out if the number of values we need to compare is too large. 3055 // This is important as the number of comparisons grows quadratically with 3056 // the number of values we need to compare. 3057 if (!Valid || (MinMaxAccessesNonReadOnly.size() + ReadOnlyPairs.size() > 3058 RunTimeChecksMaxArraysPerGroup)) 3059 return false; 3060 3061 // Calculate minimal and maximal accesses for read only accesses. 3062 MinMaxAccessesReadOnly.reserve(ReadOnlyPairs.size()); 3063 Accesses = isl_union_map_empty(getParamSpace()); 3064 3065 for (const auto &ReadOnlyPair : ReadOnlyPairs) 3066 for (MemoryAccess *MA : ReadOnlyPair.second) 3067 Accesses = isl_union_map_add_map(Accesses, MA->getAccessRelation()); 3068 3069 Valid = 3070 calculateMinMaxAccess(Accesses, getDomains(), MinMaxAccessesReadOnly); 3071 3072 if (!Valid) 3073 return false; 3074 } 3075 3076 return true; 3077 } 3078 3079 /// Get the smallest loop that contains @p S but is not in @p S. 3080 static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) { 3081 // Start with the smallest loop containing the entry and expand that 3082 // loop until it contains all blocks in the region. If there is a loop 3083 // containing all blocks in the region check if it is itself contained 3084 // and if so take the parent loop as it will be the smallest containing 3085 // the region but not contained by it. 3086 Loop *L = LI.getLoopFor(S.getEntry()); 3087 while (L) { 3088 bool AllContained = true; 3089 for (auto *BB : S.blocks()) 3090 AllContained &= L->contains(BB); 3091 if (AllContained) 3092 break; 3093 L = L->getParentLoop(); 3094 } 3095 3096 return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr; 3097 } 3098 3099 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI, 3100 ScopDetection::DetectionContext &DC) 3101 : SE(&ScalarEvolution), R(R), IsOptimized(false), 3102 HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false), 3103 MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), 3104 IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr), 3105 Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr), 3106 Schedule(nullptr) { 3107 if (IslOnErrorAbort) 3108 isl_options_set_on_error(getIslCtx(), ISL_ON_ERROR_ABORT); 3109 buildContext(); 3110 } 3111 3112 void Scop::foldSizeConstantsToRight() { 3113 isl_union_set *Accessed = isl_union_map_range(getAccesses()); 3114 3115 for (auto Array : arrays()) { 3116 if (Array->getNumberOfDimensions() <= 1) 3117 continue; 3118 3119 isl_space *Space = Array->getSpace(); 3120 3121 Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed)); 3122 3123 if (!isl_union_set_contains(Accessed, Space)) { 3124 isl_space_free(Space); 3125 continue; 3126 } 3127 3128 isl_set *Elements = isl_union_set_extract_set(Accessed, Space); 3129 3130 isl_map *Transform = 3131 isl_map_universe(isl_space_map_from_set(Array->getSpace())); 3132 3133 std::vector<int> Int; 3134 3135 int Dims = isl_set_dim(Elements, isl_dim_set); 3136 for (int i = 0; i < Dims; i++) { 3137 isl_set *DimOnly = 3138 isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i); 3139 DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1); 3140 DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0); 3141 3142 isl_basic_set *DimHull = isl_set_affine_hull(DimOnly); 3143 3144 if (i == Dims - 1) { 3145 Int.push_back(1); 3146 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i); 3147 isl_basic_set_free(DimHull); 3148 continue; 3149 } 3150 3151 if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) { 3152 isl_aff *Diff = isl_basic_set_get_div(DimHull, 0); 3153 isl_val *Val = isl_aff_get_denominator_val(Diff); 3154 isl_aff_free(Diff); 3155 3156 int ValInt = 1; 3157 3158 if (isl_val_is_int(Val)) 3159 ValInt = isl_val_get_num_si(Val); 3160 isl_val_free(Val); 3161 3162 Int.push_back(ValInt); 3163 3164 isl_constraint *C = isl_constraint_alloc_equality( 3165 isl_local_space_from_space(isl_map_get_space(Transform))); 3166 C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt); 3167 C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1); 3168 Transform = isl_map_add_constraint(Transform, C); 3169 isl_basic_set_free(DimHull); 3170 continue; 3171 } 3172 3173 isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull); 3174 ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0); 3175 3176 int ValInt = 1; 3177 if (isl_basic_set_is_equal(ZeroSet, DimHull)) { 3178 ValInt = 0; 3179 } 3180 3181 Int.push_back(ValInt); 3182 Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i); 3183 isl_basic_set_free(DimHull); 3184 isl_basic_set_free(ZeroSet); 3185 } 3186 3187 isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform)); 3188 3189 if (!isl_set_is_subset(Elements, MappedElements)) { 3190 isl_set_free(Elements); 3191 isl_set_free(MappedElements); 3192 isl_map_free(Transform); 3193 continue; 3194 } 3195 3196 isl_set_free(MappedElements); 3197 3198 bool CanFold = true; 3199 3200 if (Int[0] <= 1) 3201 CanFold = false; 3202 3203 unsigned NumDims = Array->getNumberOfDimensions(); 3204 for (unsigned i = 1; i < NumDims - 1; i++) 3205 if (Int[0] != Int[i] && Int[i]) 3206 CanFold = false; 3207 3208 if (!CanFold) { 3209 isl_set_free(Elements); 3210 isl_map_free(Transform); 3211 continue; 3212 } 3213 3214 for (auto &Access : AccessFunctions) 3215 if (Access->getScopArrayInfo() == Array) 3216 Access->setAccessRelation(isl_map_apply_range( 3217 Access->getAccessRelation(), isl_map_copy(Transform))); 3218 3219 isl_map_free(Transform); 3220 3221 std::vector<const SCEV *> Sizes; 3222 for (unsigned i = 0; i < NumDims; i++) { 3223 auto Size = Array->getDimensionSize(i); 3224 3225 if (i == NumDims - 1) 3226 Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0])); 3227 Sizes.push_back(Size); 3228 } 3229 3230 Array->updateSizes(Sizes, false /* CheckConsistency */); 3231 3232 isl_set_free(Elements); 3233 } 3234 isl_union_set_free(Accessed); 3235 return; 3236 } 3237 3238 void Scop::finalizeAccesses() { 3239 updateAccessDimensionality(); 3240 foldSizeConstantsToRight(); 3241 foldAccessRelations(); 3242 assumeNoOutOfBounds(); 3243 } 3244 3245 void Scop::init(AliasAnalysis &AA, DominatorTree &DT, LoopInfo &LI) { 3246 buildInvariantEquivalenceClasses(); 3247 3248 if (!buildDomains(&R, DT, LI)) 3249 return; 3250 3251 addUserAssumptions(DT, LI); 3252 3253 // Remove empty statements. 3254 // Exit early in case there are no executable statements left in this scop. 3255 simplifySCoP(false); 3256 if (Stmts.empty()) 3257 return; 3258 3259 // The ScopStmts now have enough information to initialize themselves. 3260 for (ScopStmt &Stmt : Stmts) 3261 Stmt.init(LI); 3262 3263 // Check early for a feasible runtime context. 3264 if (!hasFeasibleRuntimeContext()) 3265 return; 3266 3267 // Check early for profitability. Afterwards it cannot change anymore, 3268 // only the runtime context could become infeasible. 3269 if (!isProfitable()) { 3270 invalidate(PROFITABLE, DebugLoc()); 3271 return; 3272 } 3273 3274 buildSchedule(LI); 3275 3276 finalizeAccesses(); 3277 3278 realignParams(); 3279 addUserContext(); 3280 3281 // After the context was fully constructed, thus all our knowledge about 3282 // the parameters is in there, we add all recorded assumptions to the 3283 // assumed/invalid context. 3284 addRecordedAssumptions(); 3285 3286 simplifyContexts(); 3287 if (!buildAliasChecks(AA)) 3288 return; 3289 3290 hoistInvariantLoads(); 3291 verifyInvariantLoads(); 3292 simplifySCoP(true); 3293 3294 // Check late for a feasible runtime context because profitability did not 3295 // change. 3296 if (!hasFeasibleRuntimeContext()) 3297 return; 3298 } 3299 3300 Scop::~Scop() { 3301 isl_set_free(Context); 3302 isl_set_free(AssumedContext); 3303 isl_set_free(InvalidContext); 3304 isl_schedule_free(Schedule); 3305 3306 for (auto &It : ParameterIds) 3307 isl_id_free(It.second); 3308 3309 for (auto It : DomainMap) 3310 isl_set_free(It.second); 3311 3312 for (auto &AS : RecordedAssumptions) 3313 isl_set_free(AS.Set); 3314 3315 // Free the alias groups 3316 for (MinMaxVectorPairTy &MinMaxAccessPair : MinMaxAliasGroups) { 3317 for (MinMaxAccessTy &MMA : MinMaxAccessPair.first) { 3318 isl_pw_multi_aff_free(MMA.first); 3319 isl_pw_multi_aff_free(MMA.second); 3320 } 3321 for (MinMaxAccessTy &MMA : MinMaxAccessPair.second) { 3322 isl_pw_multi_aff_free(MMA.first); 3323 isl_pw_multi_aff_free(MMA.second); 3324 } 3325 } 3326 3327 for (const auto &IAClass : InvariantEquivClasses) 3328 isl_set_free(IAClass.ExecutionContext); 3329 3330 // Explicitly release all Scop objects and the underlying isl objects before 3331 // we release the isl context. 3332 Stmts.clear(); 3333 ScopArrayInfoSet.clear(); 3334 ScopArrayInfoMap.clear(); 3335 ScopArrayNameMap.clear(); 3336 AccessFunctions.clear(); 3337 } 3338 3339 void Scop::updateAccessDimensionality() { 3340 // Check all array accesses for each base pointer and find a (virtual) element 3341 // size for the base pointer that divides all access functions. 3342 for (auto &Stmt : *this) 3343 for (auto *Access : Stmt) { 3344 if (!Access->isArrayKind()) 3345 continue; 3346 auto &SAI = ScopArrayInfoMap[std::make_pair(Access->getBaseAddr(), 3347 ScopArrayInfo::MK_Array)]; 3348 if (SAI->getNumberOfDimensions() != 1) 3349 continue; 3350 unsigned DivisibleSize = SAI->getElemSizeInBytes(); 3351 auto *Subscript = Access->getSubscript(0); 3352 while (!isDivisible(Subscript, DivisibleSize, *SE)) 3353 DivisibleSize /= 2; 3354 auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8); 3355 SAI->updateElementType(Ty); 3356 } 3357 3358 for (auto &Stmt : *this) 3359 for (auto &Access : Stmt) 3360 Access->updateDimensionality(); 3361 } 3362 3363 void Scop::foldAccessRelations() { 3364 for (auto &Stmt : *this) 3365 for (auto &Access : Stmt) 3366 Access->foldAccessRelation(); 3367 } 3368 3369 void Scop::assumeNoOutOfBounds() { 3370 for (auto &Stmt : *this) 3371 for (auto &Access : Stmt) 3372 Access->assumeNoOutOfBound(); 3373 } 3374 3375 void Scop::simplifySCoP(bool AfterHoisting) { 3376 for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) { 3377 ScopStmt &Stmt = *StmtIt; 3378 3379 bool RemoveStmt = Stmt.isEmpty(); 3380 if (!RemoveStmt) 3381 RemoveStmt = !DomainMap[Stmt.getEntryBlock()]; 3382 3383 // Remove read only statements only after invariant loop hoisting. 3384 if (!RemoveStmt && AfterHoisting) { 3385 bool OnlyRead = true; 3386 for (MemoryAccess *MA : Stmt) { 3387 if (MA->isRead()) 3388 continue; 3389 3390 OnlyRead = false; 3391 break; 3392 } 3393 3394 RemoveStmt = OnlyRead; 3395 } 3396 3397 if (!RemoveStmt) { 3398 StmtIt++; 3399 continue; 3400 } 3401 3402 // Remove the statement because it is unnecessary. 3403 if (Stmt.isRegionStmt()) 3404 for (BasicBlock *BB : Stmt.getRegion()->blocks()) 3405 StmtMap.erase(BB); 3406 else 3407 StmtMap.erase(Stmt.getBasicBlock()); 3408 3409 StmtIt = Stmts.erase(StmtIt); 3410 } 3411 } 3412 3413 InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) { 3414 LoadInst *LInst = dyn_cast<LoadInst>(Val); 3415 if (!LInst) 3416 return nullptr; 3417 3418 if (Value *Rep = InvEquivClassVMap.lookup(LInst)) 3419 LInst = cast<LoadInst>(Rep); 3420 3421 Type *Ty = LInst->getType(); 3422 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); 3423 for (auto &IAClass : InvariantEquivClasses) { 3424 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) 3425 continue; 3426 3427 auto &MAs = IAClass.InvariantAccesses; 3428 for (auto *MA : MAs) 3429 if (MA->getAccessInstruction() == Val) 3430 return &IAClass; 3431 } 3432 3433 return nullptr; 3434 } 3435 3436 /// Check if @p MA can always be hoisted without execution context. 3437 static bool canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, 3438 bool MAInvalidCtxIsEmpty, 3439 bool NonHoistableCtxIsEmpty) { 3440 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); 3441 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout(); 3442 // TODO: We can provide more information for better but more expensive 3443 // results. 3444 if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(), 3445 LInst->getAlignment(), DL)) 3446 return false; 3447 3448 // If the location might be overwritten we do not hoist it unconditionally. 3449 // 3450 // TODO: This is probably to conservative. 3451 if (!NonHoistableCtxIsEmpty) 3452 return false; 3453 3454 // If a dereferencable load is in a statement that is modeled precisely we can 3455 // hoist it. 3456 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty) 3457 return true; 3458 3459 // Even if the statement is not modeled precisely we can hoist the load if it 3460 // does not involve any parameters that might have been specialized by the 3461 // statement domain. 3462 for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++) 3463 if (!isa<SCEVConstant>(MA->getSubscript(u))) 3464 return false; 3465 return true; 3466 } 3467 3468 void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) { 3469 3470 if (InvMAs.empty()) 3471 return; 3472 3473 auto *StmtInvalidCtx = Stmt.getInvalidContext(); 3474 bool StmtInvalidCtxIsEmpty = isl_set_is_empty(StmtInvalidCtx); 3475 3476 // Get the context under which the statement is executed but remove the error 3477 // context under which this statement is reached. 3478 isl_set *DomainCtx = isl_set_params(Stmt.getDomain()); 3479 DomainCtx = isl_set_subtract(DomainCtx, StmtInvalidCtx); 3480 3481 if (isl_set_n_basic_set(DomainCtx) >= MaxDisjunctionsInDomain) { 3482 auto *AccInst = InvMAs.front().MA->getAccessInstruction(); 3483 invalidate(COMPLEXITY, AccInst->getDebugLoc()); 3484 isl_set_free(DomainCtx); 3485 for (auto &InvMA : InvMAs) 3486 isl_set_free(InvMA.NonHoistableCtx); 3487 return; 3488 } 3489 3490 // Project out all parameters that relate to loads in the statement. Otherwise 3491 // we could have cyclic dependences on the constraints under which the 3492 // hoisted loads are executed and we could not determine an order in which to 3493 // pre-load them. This happens because not only lower bounds are part of the 3494 // domain but also upper bounds. 3495 for (auto &InvMA : InvMAs) { 3496 auto *MA = InvMA.MA; 3497 Instruction *AccInst = MA->getAccessInstruction(); 3498 if (SE->isSCEVable(AccInst->getType())) { 3499 SetVector<Value *> Values; 3500 for (const SCEV *Parameter : Parameters) { 3501 Values.clear(); 3502 findValues(Parameter, *SE, Values); 3503 if (!Values.count(AccInst)) 3504 continue; 3505 3506 if (isl_id *ParamId = getIdForParam(Parameter)) { 3507 int Dim = isl_set_find_dim_by_id(DomainCtx, isl_dim_param, ParamId); 3508 DomainCtx = isl_set_eliminate(DomainCtx, isl_dim_param, Dim, 1); 3509 isl_id_free(ParamId); 3510 } 3511 } 3512 } 3513 } 3514 3515 for (auto &InvMA : InvMAs) { 3516 auto *MA = InvMA.MA; 3517 auto *NHCtx = InvMA.NonHoistableCtx; 3518 3519 // Check for another invariant access that accesses the same location as 3520 // MA and if found consolidate them. Otherwise create a new equivalence 3521 // class at the end of InvariantEquivClasses. 3522 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); 3523 Type *Ty = LInst->getType(); 3524 const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); 3525 3526 auto *MAInvalidCtx = MA->getInvalidContext(); 3527 bool NonHoistableCtxIsEmpty = isl_set_is_empty(NHCtx); 3528 bool MAInvalidCtxIsEmpty = isl_set_is_empty(MAInvalidCtx); 3529 3530 isl_set *MACtx; 3531 // Check if we know that this pointer can be speculatively accessed. 3532 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty, 3533 NonHoistableCtxIsEmpty)) { 3534 MACtx = isl_set_universe(isl_set_get_space(DomainCtx)); 3535 isl_set_free(MAInvalidCtx); 3536 isl_set_free(NHCtx); 3537 } else { 3538 MACtx = isl_set_copy(DomainCtx); 3539 MACtx = isl_set_subtract(MACtx, isl_set_union(MAInvalidCtx, NHCtx)); 3540 MACtx = isl_set_gist_params(MACtx, getContext()); 3541 } 3542 3543 bool Consolidated = false; 3544 for (auto &IAClass : InvariantEquivClasses) { 3545 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) 3546 continue; 3547 3548 // If the pointer and the type is equal check if the access function wrt. 3549 // to the domain is equal too. It can happen that the domain fixes 3550 // parameter values and these can be different for distinct part of the 3551 // SCoP. If this happens we cannot consolidate the loads but need to 3552 // create a new invariant load equivalence class. 3553 auto &MAs = IAClass.InvariantAccesses; 3554 if (!MAs.empty()) { 3555 auto *LastMA = MAs.front(); 3556 3557 auto *AR = isl_map_range(MA->getAccessRelation()); 3558 auto *LastAR = isl_map_range(LastMA->getAccessRelation()); 3559 bool SameAR = isl_set_is_equal(AR, LastAR); 3560 isl_set_free(AR); 3561 isl_set_free(LastAR); 3562 3563 if (!SameAR) 3564 continue; 3565 } 3566 3567 // Add MA to the list of accesses that are in this class. 3568 MAs.push_front(MA); 3569 3570 Consolidated = true; 3571 3572 // Unify the execution context of the class and this statement. 3573 isl_set *&IAClassDomainCtx = IAClass.ExecutionContext; 3574 if (IAClassDomainCtx) 3575 IAClassDomainCtx = 3576 isl_set_coalesce(isl_set_union(IAClassDomainCtx, MACtx)); 3577 else 3578 IAClassDomainCtx = MACtx; 3579 break; 3580 } 3581 3582 if (Consolidated) 3583 continue; 3584 3585 // If we did not consolidate MA, thus did not find an equivalence class 3586 // for it, we create a new one. 3587 InvariantEquivClasses.emplace_back( 3588 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty}); 3589 } 3590 3591 isl_set_free(DomainCtx); 3592 } 3593 3594 __isl_give isl_set *Scop::getNonHoistableCtx(MemoryAccess *Access, 3595 __isl_keep isl_union_map *Writes) { 3596 // TODO: Loads that are not loop carried, hence are in a statement with 3597 // zero iterators, are by construction invariant, though we 3598 // currently "hoist" them anyway. This is necessary because we allow 3599 // them to be treated as parameters (e.g., in conditions) and our code 3600 // generation would otherwise use the old value. 3601 3602 auto &Stmt = *Access->getStatement(); 3603 BasicBlock *BB = Stmt.getEntryBlock(); 3604 3605 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() || 3606 Access->isMemoryIntrinsic()) 3607 return nullptr; 3608 3609 // Skip accesses that have an invariant base pointer which is defined but 3610 // not loaded inside the SCoP. This can happened e.g., if a readnone call 3611 // returns a pointer that is used as a base address. However, as we want 3612 // to hoist indirect pointers, we allow the base pointer to be defined in 3613 // the region if it is also a memory access. Each ScopArrayInfo object 3614 // that has a base pointer origin has a base pointer that is loaded and 3615 // that it is invariant, thus it will be hoisted too. However, if there is 3616 // no base pointer origin we check that the base pointer is defined 3617 // outside the region. 3618 auto *LI = cast<LoadInst>(Access->getAccessInstruction()); 3619 if (hasNonHoistableBasePtrInScop(Access, Writes)) 3620 return nullptr; 3621 3622 // Skip accesses in non-affine subregions as they might not be executed 3623 // under the same condition as the entry of the non-affine subregion. 3624 if (BB != LI->getParent()) 3625 return nullptr; 3626 3627 isl_map *AccessRelation = Access->getAccessRelation(); 3628 assert(!isl_map_is_empty(AccessRelation)); 3629 3630 if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0, 3631 Stmt.getNumIterators())) { 3632 isl_map_free(AccessRelation); 3633 return nullptr; 3634 } 3635 3636 AccessRelation = isl_map_intersect_domain(AccessRelation, Stmt.getDomain()); 3637 isl_set *AccessRange = isl_map_range(AccessRelation); 3638 3639 isl_union_map *Written = isl_union_map_intersect_range( 3640 isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange)); 3641 auto *WrittenCtx = isl_union_map_params(Written); 3642 bool IsWritten = !isl_set_is_empty(WrittenCtx); 3643 3644 if (!IsWritten) 3645 return WrittenCtx; 3646 3647 WrittenCtx = isl_set_remove_divs(WrittenCtx); 3648 bool TooComplex = isl_set_n_basic_set(WrittenCtx) >= MaxDisjunctionsInDomain; 3649 if (TooComplex || !isRequiredInvariantLoad(LI)) { 3650 isl_set_free(WrittenCtx); 3651 return nullptr; 3652 } 3653 3654 addAssumption(INVARIANTLOAD, isl_set_copy(WrittenCtx), LI->getDebugLoc(), 3655 AS_RESTRICTION); 3656 return WrittenCtx; 3657 } 3658 3659 void Scop::verifyInvariantLoads() { 3660 auto &RIL = getRequiredInvariantLoads(); 3661 for (LoadInst *LI : RIL) { 3662 assert(LI && contains(LI)); 3663 ScopStmt *Stmt = getStmtFor(LI); 3664 if (Stmt && Stmt->getArrayAccessOrNULLFor(LI)) { 3665 invalidate(INVARIANTLOAD, LI->getDebugLoc()); 3666 return; 3667 } 3668 } 3669 } 3670 3671 void Scop::hoistInvariantLoads() { 3672 if (!PollyInvariantLoadHoisting) 3673 return; 3674 3675 isl_union_map *Writes = getWrites(); 3676 for (ScopStmt &Stmt : *this) { 3677 InvariantAccessesTy InvariantAccesses; 3678 3679 for (MemoryAccess *Access : Stmt) 3680 if (auto *NHCtx = getNonHoistableCtx(Access, Writes)) 3681 InvariantAccesses.push_back({Access, NHCtx}); 3682 3683 // Transfer the memory access from the statement to the SCoP. 3684 for (auto InvMA : InvariantAccesses) 3685 Stmt.removeMemoryAccess(InvMA.MA); 3686 addInvariantLoads(Stmt, InvariantAccesses); 3687 } 3688 isl_union_map_free(Writes); 3689 } 3690 3691 const ScopArrayInfo *Scop::getOrCreateScopArrayInfo( 3692 Value *BasePtr, Type *ElementType, ArrayRef<const SCEV *> Sizes, 3693 ScopArrayInfo::MemoryKind Kind, const char *BaseName) { 3694 assert((BasePtr || BaseName) && 3695 "BasePtr and BaseName can not be nullptr at the same time."); 3696 assert(!(BasePtr && BaseName) && "BaseName is redundant."); 3697 auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)] 3698 : ScopArrayNameMap[BaseName]; 3699 if (!SAI) { 3700 auto &DL = getFunction().getParent()->getDataLayout(); 3701 SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind, 3702 DL, this, BaseName)); 3703 ScopArrayInfoSet.insert(SAI.get()); 3704 } else { 3705 SAI->updateElementType(ElementType); 3706 // In case of mismatching array sizes, we bail out by setting the run-time 3707 // context to false. 3708 if (!SAI->updateSizes(Sizes)) 3709 invalidate(DELINEARIZATION, DebugLoc()); 3710 } 3711 return SAI.get(); 3712 } 3713 3714 const ScopArrayInfo * 3715 Scop::createScopArrayInfo(Type *ElementType, const std::string &BaseName, 3716 const std::vector<unsigned> &Sizes) { 3717 auto *DimSizeType = Type::getInt64Ty(getSE()->getContext()); 3718 std::vector<const SCEV *> SCEVSizes; 3719 3720 for (auto size : Sizes) 3721 if (size) 3722 SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false)); 3723 else 3724 SCEVSizes.push_back(nullptr); 3725 3726 auto *SAI = 3727 getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, 3728 ScopArrayInfo::MK_Array, BaseName.c_str()); 3729 return SAI; 3730 } 3731 3732 const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, 3733 ScopArrayInfo::MemoryKind Kind) { 3734 auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get(); 3735 assert(SAI && "No ScopArrayInfo available for this base pointer"); 3736 return SAI; 3737 } 3738 3739 std::string Scop::getContextStr() const { return stringFromIslObj(Context); } 3740 3741 std::string Scop::getAssumedContextStr() const { 3742 assert(AssumedContext && "Assumed context not yet built"); 3743 return stringFromIslObj(AssumedContext); 3744 } 3745 3746 std::string Scop::getInvalidContextStr() const { 3747 return stringFromIslObj(InvalidContext); 3748 } 3749 3750 std::string Scop::getNameStr() const { 3751 std::string ExitName, EntryName; 3752 raw_string_ostream ExitStr(ExitName); 3753 raw_string_ostream EntryStr(EntryName); 3754 3755 R.getEntry()->printAsOperand(EntryStr, false); 3756 EntryStr.str(); 3757 3758 if (R.getExit()) { 3759 R.getExit()->printAsOperand(ExitStr, false); 3760 ExitStr.str(); 3761 } else 3762 ExitName = "FunctionExit"; 3763 3764 return EntryName + "---" + ExitName; 3765 } 3766 3767 __isl_give isl_set *Scop::getContext() const { return isl_set_copy(Context); } 3768 __isl_give isl_space *Scop::getParamSpace() const { 3769 return isl_set_get_space(Context); 3770 } 3771 3772 __isl_give isl_set *Scop::getAssumedContext() const { 3773 assert(AssumedContext && "Assumed context not yet built"); 3774 return isl_set_copy(AssumedContext); 3775 } 3776 3777 bool Scop::isProfitable() const { 3778 if (PollyProcessUnprofitable) 3779 return true; 3780 3781 if (isEmpty()) 3782 return false; 3783 3784 unsigned OptimizableStmtsOrLoops = 0; 3785 for (auto &Stmt : *this) { 3786 if (Stmt.getNumIterators() == 0) 3787 continue; 3788 3789 bool ContainsArrayAccs = false; 3790 bool ContainsScalarAccs = false; 3791 for (auto *MA : Stmt) { 3792 if (MA->isRead()) 3793 continue; 3794 ContainsArrayAccs |= MA->isArrayKind(); 3795 ContainsScalarAccs |= MA->isScalarKind(); 3796 } 3797 3798 if (!UnprofitableScalarAccs || (ContainsArrayAccs && !ContainsScalarAccs)) 3799 OptimizableStmtsOrLoops += Stmt.getNumIterators(); 3800 } 3801 3802 return OptimizableStmtsOrLoops > 1; 3803 } 3804 3805 bool Scop::hasFeasibleRuntimeContext() const { 3806 auto *PositiveContext = getAssumedContext(); 3807 auto *NegativeContext = getInvalidContext(); 3808 PositiveContext = addNonEmptyDomainConstraints(PositiveContext); 3809 bool IsFeasible = !(isl_set_is_empty(PositiveContext) || 3810 isl_set_is_subset(PositiveContext, NegativeContext)); 3811 isl_set_free(PositiveContext); 3812 if (!IsFeasible) { 3813 isl_set_free(NegativeContext); 3814 return false; 3815 } 3816 3817 auto *DomainContext = isl_union_set_params(getDomains()); 3818 IsFeasible = !isl_set_is_subset(DomainContext, NegativeContext); 3819 IsFeasible &= !isl_set_is_subset(Context, NegativeContext); 3820 isl_set_free(NegativeContext); 3821 isl_set_free(DomainContext); 3822 3823 return IsFeasible; 3824 } 3825 3826 static std::string toString(AssumptionKind Kind) { 3827 switch (Kind) { 3828 case ALIASING: 3829 return "No-aliasing"; 3830 case INBOUNDS: 3831 return "Inbounds"; 3832 case WRAPPING: 3833 return "No-overflows"; 3834 case UNSIGNED: 3835 return "Signed-unsigned"; 3836 case COMPLEXITY: 3837 return "Low complexity"; 3838 case PROFITABLE: 3839 return "Profitable"; 3840 case ERRORBLOCK: 3841 return "No-error"; 3842 case INFINITELOOP: 3843 return "Finite loop"; 3844 case INVARIANTLOAD: 3845 return "Invariant load"; 3846 case DELINEARIZATION: 3847 return "Delinearization"; 3848 } 3849 llvm_unreachable("Unknown AssumptionKind!"); 3850 } 3851 3852 bool Scop::isEffectiveAssumption(__isl_keep isl_set *Set, AssumptionSign Sign) { 3853 if (Sign == AS_ASSUMPTION) { 3854 if (isl_set_is_subset(Context, Set)) 3855 return false; 3856 3857 if (isl_set_is_subset(AssumedContext, Set)) 3858 return false; 3859 } else { 3860 if (isl_set_is_disjoint(Set, Context)) 3861 return false; 3862 3863 if (isl_set_is_subset(Set, InvalidContext)) 3864 return false; 3865 } 3866 return true; 3867 } 3868 3869 bool Scop::trackAssumption(AssumptionKind Kind, __isl_keep isl_set *Set, 3870 DebugLoc Loc, AssumptionSign Sign) { 3871 if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign)) 3872 return false; 3873 3874 // Do never emit trivial assumptions as they only clutter the output. 3875 if (!PollyRemarksMinimal) { 3876 isl_set *Univ = nullptr; 3877 if (Sign == AS_ASSUMPTION) 3878 Univ = isl_set_universe(isl_set_get_space(Set)); 3879 3880 bool IsTrivial = (Sign == AS_RESTRICTION && isl_set_is_empty(Set)) || 3881 (Sign == AS_ASSUMPTION && isl_set_is_equal(Univ, Set)); 3882 isl_set_free(Univ); 3883 3884 if (IsTrivial) 3885 return false; 3886 } 3887 3888 switch (Kind) { 3889 case ALIASING: 3890 AssumptionsAliasing++; 3891 break; 3892 case INBOUNDS: 3893 AssumptionsInbounds++; 3894 break; 3895 case WRAPPING: 3896 AssumptionsWrapping++; 3897 break; 3898 case UNSIGNED: 3899 AssumptionsUnsigned++; 3900 break; 3901 case COMPLEXITY: 3902 AssumptionsComplexity++; 3903 break; 3904 case PROFITABLE: 3905 AssumptionsUnprofitable++; 3906 break; 3907 case ERRORBLOCK: 3908 AssumptionsErrorBlock++; 3909 break; 3910 case INFINITELOOP: 3911 AssumptionsInfiniteLoop++; 3912 break; 3913 case INVARIANTLOAD: 3914 AssumptionsInvariantLoad++; 3915 break; 3916 case DELINEARIZATION: 3917 AssumptionsDelinearization++; 3918 break; 3919 } 3920 3921 auto &F = getFunction(); 3922 auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t"; 3923 std::string Msg = toString(Kind) + Suffix + stringFromIslObj(Set); 3924 emitOptimizationRemarkAnalysis(F.getContext(), DEBUG_TYPE, F, Loc, Msg); 3925 return true; 3926 } 3927 3928 void Scop::addAssumption(AssumptionKind Kind, __isl_take isl_set *Set, 3929 DebugLoc Loc, AssumptionSign Sign) { 3930 // Simplify the assumptions/restrictions first. 3931 Set = isl_set_gist_params(Set, getContext()); 3932 3933 if (!trackAssumption(Kind, Set, Loc, Sign)) { 3934 isl_set_free(Set); 3935 return; 3936 } 3937 3938 if (Sign == AS_ASSUMPTION) { 3939 AssumedContext = isl_set_intersect(AssumedContext, Set); 3940 AssumedContext = isl_set_coalesce(AssumedContext); 3941 } else { 3942 InvalidContext = isl_set_union(InvalidContext, Set); 3943 InvalidContext = isl_set_coalesce(InvalidContext); 3944 } 3945 } 3946 3947 void Scop::recordAssumption(AssumptionKind Kind, __isl_take isl_set *Set, 3948 DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) { 3949 assert((isl_set_is_params(Set) || BB) && 3950 "Assumptions without a basic block must be parameter sets"); 3951 RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB}); 3952 } 3953 3954 void Scop::addRecordedAssumptions() { 3955 while (!RecordedAssumptions.empty()) { 3956 const Assumption &AS = RecordedAssumptions.pop_back_val(); 3957 3958 if (!AS.BB) { 3959 addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign); 3960 continue; 3961 } 3962 3963 // If the domain was deleted the assumptions are void. 3964 isl_set *Dom = getDomainConditions(AS.BB); 3965 if (!Dom) { 3966 isl_set_free(AS.Set); 3967 continue; 3968 } 3969 3970 // If a basic block was given use its domain to simplify the assumption. 3971 // In case of restrictions we know they only have to hold on the domain, 3972 // thus we can intersect them with the domain of the block. However, for 3973 // assumptions the domain has to imply them, thus: 3974 // _ _____ 3975 // Dom => S <==> A v B <==> A - B 3976 // 3977 // To avoid the complement we will register A - B as a restriction not an 3978 // assumption. 3979 isl_set *S = AS.Set; 3980 if (AS.Sign == AS_RESTRICTION) 3981 S = isl_set_params(isl_set_intersect(S, Dom)); 3982 else /* (AS.Sign == AS_ASSUMPTION) */ 3983 S = isl_set_params(isl_set_subtract(Dom, S)); 3984 3985 addAssumption(AS.Kind, S, AS.Loc, AS_RESTRICTION); 3986 } 3987 } 3988 3989 void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc) { 3990 addAssumption(Kind, isl_set_empty(getParamSpace()), Loc, AS_ASSUMPTION); 3991 } 3992 3993 __isl_give isl_set *Scop::getInvalidContext() const { 3994 return isl_set_copy(InvalidContext); 3995 } 3996 3997 void Scop::printContext(raw_ostream &OS) const { 3998 OS << "Context:\n"; 3999 OS.indent(4) << Context << "\n"; 4000 4001 OS.indent(4) << "Assumed Context:\n"; 4002 OS.indent(4) << AssumedContext << "\n"; 4003 4004 OS.indent(4) << "Invalid Context:\n"; 4005 OS.indent(4) << InvalidContext << "\n"; 4006 4007 unsigned Dim = 0; 4008 for (const SCEV *Parameter : Parameters) 4009 OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n"; 4010 } 4011 4012 void Scop::printAliasAssumptions(raw_ostream &OS) const { 4013 int noOfGroups = 0; 4014 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) { 4015 if (Pair.second.size() == 0) 4016 noOfGroups += 1; 4017 else 4018 noOfGroups += Pair.second.size(); 4019 } 4020 4021 OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n"; 4022 if (MinMaxAliasGroups.empty()) { 4023 OS.indent(8) << "n/a\n"; 4024 return; 4025 } 4026 4027 for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) { 4028 4029 // If the group has no read only accesses print the write accesses. 4030 if (Pair.second.empty()) { 4031 OS.indent(8) << "[["; 4032 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) { 4033 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second 4034 << ">"; 4035 } 4036 OS << " ]]\n"; 4037 } 4038 4039 for (const MinMaxAccessTy &MMAReadOnly : Pair.second) { 4040 OS.indent(8) << "[["; 4041 OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">"; 4042 for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) { 4043 OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second 4044 << ">"; 4045 } 4046 OS << " ]]\n"; 4047 } 4048 } 4049 } 4050 4051 void Scop::printStatements(raw_ostream &OS) const { 4052 OS << "Statements {\n"; 4053 4054 for (const ScopStmt &Stmt : *this) 4055 OS.indent(4) << Stmt; 4056 4057 OS.indent(4) << "}\n"; 4058 } 4059 4060 void Scop::printArrayInfo(raw_ostream &OS) const { 4061 OS << "Arrays {\n"; 4062 4063 for (auto &Array : arrays()) 4064 Array->print(OS); 4065 4066 OS.indent(4) << "}\n"; 4067 4068 OS.indent(4) << "Arrays (Bounds as pw_affs) {\n"; 4069 4070 for (auto &Array : arrays()) 4071 Array->print(OS, /* SizeAsPwAff */ true); 4072 4073 OS.indent(4) << "}\n"; 4074 } 4075 4076 void Scop::print(raw_ostream &OS) const { 4077 OS.indent(4) << "Function: " << getFunction().getName() << "\n"; 4078 OS.indent(4) << "Region: " << getNameStr() << "\n"; 4079 OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; 4080 OS.indent(4) << "Invariant Accesses: {\n"; 4081 for (const auto &IAClass : InvariantEquivClasses) { 4082 const auto &MAs = IAClass.InvariantAccesses; 4083 if (MAs.empty()) { 4084 OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n"; 4085 } else { 4086 MAs.front()->print(OS); 4087 OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext 4088 << "\n"; 4089 } 4090 } 4091 OS.indent(4) << "}\n"; 4092 printContext(OS.indent(4)); 4093 printArrayInfo(OS.indent(4)); 4094 printAliasAssumptions(OS); 4095 printStatements(OS.indent(4)); 4096 } 4097 4098 void Scop::dump() const { print(dbgs()); } 4099 4100 isl_ctx *Scop::getIslCtx() const { return IslCtx.get(); } 4101 4102 __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB, 4103 bool NonNegative) { 4104 // First try to use the SCEVAffinator to generate a piecewise defined 4105 // affine function from @p E in the context of @p BB. If that tasks becomes to 4106 // complex the affinator might return a nullptr. In such a case we invalidate 4107 // the SCoP and return a dummy value. This way we do not need to add error 4108 // handling code to all users of this function. 4109 auto PWAC = Affinator.getPwAff(E, BB); 4110 if (PWAC.first) { 4111 // TODO: We could use a heuristic and either use: 4112 // SCEVAffinator::takeNonNegativeAssumption 4113 // or 4114 // SCEVAffinator::interpretAsUnsigned 4115 // to deal with unsigned or "NonNegative" SCEVs. 4116 if (NonNegative) 4117 Affinator.takeNonNegativeAssumption(PWAC); 4118 return PWAC; 4119 } 4120 4121 auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc(); 4122 invalidate(COMPLEXITY, DL); 4123 return Affinator.getPwAff(SE->getZero(E->getType()), BB); 4124 } 4125 4126 __isl_give isl_union_set *Scop::getDomains() const { 4127 isl_union_set *Domain = isl_union_set_empty(getParamSpace()); 4128 4129 for (const ScopStmt &Stmt : *this) 4130 Domain = isl_union_set_add_set(Domain, Stmt.getDomain()); 4131 4132 return Domain; 4133 } 4134 4135 __isl_give isl_pw_aff *Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) { 4136 PWACtx PWAC = getPwAff(E, BB); 4137 isl_set_free(PWAC.second); 4138 return PWAC.first; 4139 } 4140 4141 __isl_give isl_union_map * 4142 Scop::getAccessesOfType(std::function<bool(MemoryAccess &)> Predicate) { 4143 isl_union_map *Accesses = isl_union_map_empty(getParamSpace()); 4144 4145 for (ScopStmt &Stmt : *this) { 4146 for (MemoryAccess *MA : Stmt) { 4147 if (!Predicate(*MA)) 4148 continue; 4149 4150 isl_set *Domain = Stmt.getDomain(); 4151 isl_map *AccessDomain = MA->getAccessRelation(); 4152 AccessDomain = isl_map_intersect_domain(AccessDomain, Domain); 4153 Accesses = isl_union_map_add_map(Accesses, AccessDomain); 4154 } 4155 } 4156 return isl_union_map_coalesce(Accesses); 4157 } 4158 4159 __isl_give isl_union_map *Scop::getMustWrites() { 4160 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); }); 4161 } 4162 4163 __isl_give isl_union_map *Scop::getMayWrites() { 4164 return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); }); 4165 } 4166 4167 __isl_give isl_union_map *Scop::getWrites() { 4168 return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); }); 4169 } 4170 4171 __isl_give isl_union_map *Scop::getReads() { 4172 return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); }); 4173 } 4174 4175 __isl_give isl_union_map *Scop::getAccesses() { 4176 return getAccessesOfType([](MemoryAccess &MA) { return true; }); 4177 } 4178 4179 // Check whether @p Node is an extension node. 4180 // 4181 // @return true if @p Node is an extension node. 4182 isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) { 4183 if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension) 4184 return isl_bool_error; 4185 else 4186 return isl_bool_true; 4187 } 4188 4189 bool Scop::containsExtensionNode(__isl_keep isl_schedule *Schedule) { 4190 return isl_schedule_foreach_schedule_node_top_down(Schedule, isNotExtNode, 4191 nullptr) == isl_stat_error; 4192 } 4193 4194 __isl_give isl_union_map *Scop::getSchedule() const { 4195 auto *Tree = getScheduleTree(); 4196 if (containsExtensionNode(Tree)) { 4197 isl_schedule_free(Tree); 4198 return nullptr; 4199 } 4200 auto *S = isl_schedule_get_map(Tree); 4201 isl_schedule_free(Tree); 4202 return S; 4203 } 4204 4205 __isl_give isl_schedule *Scop::getScheduleTree() const { 4206 return isl_schedule_intersect_domain(isl_schedule_copy(Schedule), 4207 getDomains()); 4208 } 4209 4210 void Scop::setSchedule(__isl_take isl_union_map *NewSchedule) { 4211 auto *S = isl_schedule_from_domain(getDomains()); 4212 S = isl_schedule_insert_partial_schedule( 4213 S, isl_multi_union_pw_aff_from_union_map(NewSchedule)); 4214 isl_schedule_free(Schedule); 4215 Schedule = S; 4216 } 4217 4218 void Scop::setScheduleTree(__isl_take isl_schedule *NewSchedule) { 4219 isl_schedule_free(Schedule); 4220 Schedule = NewSchedule; 4221 } 4222 4223 bool Scop::restrictDomains(__isl_take isl_union_set *Domain) { 4224 bool Changed = false; 4225 for (ScopStmt &Stmt : *this) { 4226 isl_union_set *StmtDomain = isl_union_set_from_set(Stmt.getDomain()); 4227 isl_union_set *NewStmtDomain = isl_union_set_intersect( 4228 isl_union_set_copy(StmtDomain), isl_union_set_copy(Domain)); 4229 4230 if (isl_union_set_is_subset(StmtDomain, NewStmtDomain)) { 4231 isl_union_set_free(StmtDomain); 4232 isl_union_set_free(NewStmtDomain); 4233 continue; 4234 } 4235 4236 Changed = true; 4237 4238 isl_union_set_free(StmtDomain); 4239 NewStmtDomain = isl_union_set_coalesce(NewStmtDomain); 4240 4241 if (isl_union_set_is_empty(NewStmtDomain)) { 4242 Stmt.restrictDomain(isl_set_empty(Stmt.getDomainSpace())); 4243 isl_union_set_free(NewStmtDomain); 4244 } else 4245 Stmt.restrictDomain(isl_set_from_union_set(NewStmtDomain)); 4246 } 4247 isl_union_set_free(Domain); 4248 return Changed; 4249 } 4250 4251 ScalarEvolution *Scop::getSE() const { return SE; } 4252 4253 struct MapToDimensionDataTy { 4254 int N; 4255 isl_union_pw_multi_aff *Res; 4256 }; 4257 4258 // Create a function that maps the elements of 'Set' to its N-th dimension and 4259 // add it to User->Res. 4260 // 4261 // @param Set The input set. 4262 // @param User->N The dimension to map to. 4263 // @param User->Res The isl_union_pw_multi_aff to which to add the result. 4264 // 4265 // @returns isl_stat_ok if no error occured, othewise isl_stat_error. 4266 static isl_stat mapToDimension_AddSet(__isl_take isl_set *Set, void *User) { 4267 struct MapToDimensionDataTy *Data = (struct MapToDimensionDataTy *)User; 4268 int Dim; 4269 isl_space *Space; 4270 isl_pw_multi_aff *PMA; 4271 4272 Dim = isl_set_dim(Set, isl_dim_set); 4273 Space = isl_set_get_space(Set); 4274 PMA = isl_pw_multi_aff_project_out_map(Space, isl_dim_set, Data->N, 4275 Dim - Data->N); 4276 if (Data->N > 1) 4277 PMA = isl_pw_multi_aff_drop_dims(PMA, isl_dim_out, 0, Data->N - 1); 4278 Data->Res = isl_union_pw_multi_aff_add_pw_multi_aff(Data->Res, PMA); 4279 4280 isl_set_free(Set); 4281 4282 return isl_stat_ok; 4283 } 4284 4285 // Create an isl_multi_union_aff that defines an identity mapping from the 4286 // elements of USet to their N-th dimension. 4287 // 4288 // # Example: 4289 // 4290 // Domain: { A[i,j]; B[i,j,k] } 4291 // N: 1 4292 // 4293 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] } 4294 // 4295 // @param USet A union set describing the elements for which to generate a 4296 // mapping. 4297 // @param N The dimension to map to. 4298 // @returns A mapping from USet to its N-th dimension. 4299 static __isl_give isl_multi_union_pw_aff * 4300 mapToDimension(__isl_take isl_union_set *USet, int N) { 4301 assert(N >= 0); 4302 assert(USet); 4303 assert(!isl_union_set_is_empty(USet)); 4304 4305 struct MapToDimensionDataTy Data; 4306 4307 auto *Space = isl_union_set_get_space(USet); 4308 auto *PwAff = isl_union_pw_multi_aff_empty(Space); 4309 4310 Data = {N, PwAff}; 4311 4312 auto Res = isl_union_set_foreach_set(USet, &mapToDimension_AddSet, &Data); 4313 (void)Res; 4314 4315 assert(Res == isl_stat_ok); 4316 4317 isl_union_set_free(USet); 4318 return isl_multi_union_pw_aff_from_union_pw_multi_aff(Data.Res); 4319 } 4320 4321 void Scop::addScopStmt(BasicBlock *BB) { 4322 assert(BB && "Unexpected nullptr!"); 4323 Stmts.emplace_back(*this, *BB); 4324 auto *Stmt = &Stmts.back(); 4325 StmtMap[BB] = Stmt; 4326 } 4327 4328 void Scop::addScopStmt(Region *R) { 4329 assert(R && "Unexpected nullptr!"); 4330 Stmts.emplace_back(*this, *R); 4331 auto *Stmt = &Stmts.back(); 4332 for (BasicBlock *BB : R->blocks()) 4333 StmtMap[BB] = Stmt; 4334 } 4335 4336 ScopStmt *Scop::addScopStmt(__isl_take isl_map *SourceRel, 4337 __isl_take isl_map *TargetRel, 4338 __isl_take isl_set *Domain) { 4339 #ifndef NDEBUG 4340 isl_set *SourceDomain = isl_map_domain(isl_map_copy(SourceRel)); 4341 isl_set *TargetDomain = isl_map_domain(isl_map_copy(TargetRel)); 4342 assert(isl_set_is_subset(Domain, TargetDomain) && 4343 "Target access not defined for complete statement domain"); 4344 assert(isl_set_is_subset(Domain, SourceDomain) && 4345 "Source access not defined for complete statement domain"); 4346 isl_set_free(SourceDomain); 4347 isl_set_free(TargetDomain); 4348 #endif 4349 Stmts.emplace_back(*this, SourceRel, TargetRel, Domain); 4350 CopyStmtsNum++; 4351 return &(Stmts.back()); 4352 } 4353 4354 void Scop::buildSchedule(LoopInfo &LI) { 4355 Loop *L = getLoopSurroundingScop(*this, LI); 4356 LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)}); 4357 buildSchedule(getRegion().getNode(), LoopStack, LI); 4358 assert(LoopStack.size() == 1 && LoopStack.back().L == L); 4359 Schedule = LoopStack[0].Schedule; 4360 } 4361 4362 /// To generate a schedule for the elements in a Region we traverse the Region 4363 /// in reverse-post-order and add the contained RegionNodes in traversal order 4364 /// to the schedule of the loop that is currently at the top of the LoopStack. 4365 /// For loop-free codes, this results in a correct sequential ordering. 4366 /// 4367 /// Example: 4368 /// bb1(0) 4369 /// / \. 4370 /// bb2(1) bb3(2) 4371 /// \ / \. 4372 /// bb4(3) bb5(4) 4373 /// \ / 4374 /// bb6(5) 4375 /// 4376 /// Including loops requires additional processing. Whenever a loop header is 4377 /// encountered, the corresponding loop is added to the @p LoopStack. Starting 4378 /// from an empty schedule, we first process all RegionNodes that are within 4379 /// this loop and complete the sequential schedule at this loop-level before 4380 /// processing about any other nodes. To implement this 4381 /// loop-nodes-first-processing, the reverse post-order traversal is 4382 /// insufficient. Hence, we additionally check if the traversal yields 4383 /// sub-regions or blocks that are outside the last loop on the @p LoopStack. 4384 /// These region-nodes are then queue and only traverse after the all nodes 4385 /// within the current loop have been processed. 4386 void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) { 4387 Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI); 4388 4389 ReversePostOrderTraversal<Region *> RTraversal(R); 4390 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end()); 4391 std::deque<RegionNode *> DelayList; 4392 bool LastRNWaiting = false; 4393 4394 // Iterate over the region @p R in reverse post-order but queue 4395 // sub-regions/blocks iff they are not part of the last encountered but not 4396 // completely traversed loop. The variable LastRNWaiting is a flag to indicate 4397 // that we queued the last sub-region/block from the reverse post-order 4398 // iterator. If it is set we have to explore the next sub-region/block from 4399 // the iterator (if any) to guarantee progress. If it is not set we first try 4400 // the next queued sub-region/blocks. 4401 while (!WorkList.empty() || !DelayList.empty()) { 4402 RegionNode *RN; 4403 4404 if ((LastRNWaiting && !WorkList.empty()) || DelayList.size() == 0) { 4405 RN = WorkList.front(); 4406 WorkList.pop_front(); 4407 LastRNWaiting = false; 4408 } else { 4409 RN = DelayList.front(); 4410 DelayList.pop_front(); 4411 } 4412 4413 Loop *L = getRegionNodeLoop(RN, LI); 4414 if (!contains(L)) 4415 L = OuterScopLoop; 4416 4417 Loop *LastLoop = LoopStack.back().L; 4418 if (LastLoop != L) { 4419 if (LastLoop && !LastLoop->contains(L)) { 4420 LastRNWaiting = true; 4421 DelayList.push_back(RN); 4422 continue; 4423 } 4424 LoopStack.push_back({L, nullptr, 0}); 4425 } 4426 buildSchedule(RN, LoopStack, LI); 4427 } 4428 4429 return; 4430 } 4431 4432 void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) { 4433 4434 if (RN->isSubRegion()) { 4435 auto *LocalRegion = RN->getNodeAs<Region>(); 4436 if (!isNonAffineSubRegion(LocalRegion)) { 4437 buildSchedule(LocalRegion, LoopStack, LI); 4438 return; 4439 } 4440 } 4441 4442 auto &LoopData = LoopStack.back(); 4443 LoopData.NumBlocksProcessed += getNumBlocksInRegionNode(RN); 4444 4445 if (auto *Stmt = getStmtFor(RN)) { 4446 auto *UDomain = isl_union_set_from_set(Stmt->getDomain()); 4447 auto *StmtSchedule = isl_schedule_from_domain(UDomain); 4448 LoopData.Schedule = combineInSequence(LoopData.Schedule, StmtSchedule); 4449 } 4450 4451 // Check if we just processed the last node in this loop. If we did, finalize 4452 // the loop by: 4453 // 4454 // - adding new schedule dimensions 4455 // - folding the resulting schedule into the parent loop schedule 4456 // - dropping the loop schedule from the LoopStack. 4457 // 4458 // Then continue to check surrounding loops, which might also have been 4459 // completed by this node. 4460 while (LoopData.L && 4461 LoopData.NumBlocksProcessed == LoopData.L->getNumBlocks()) { 4462 auto *Schedule = LoopData.Schedule; 4463 auto NumBlocksProcessed = LoopData.NumBlocksProcessed; 4464 4465 LoopStack.pop_back(); 4466 auto &NextLoopData = LoopStack.back(); 4467 4468 if (Schedule) { 4469 auto *Domain = isl_schedule_get_domain(Schedule); 4470 auto *MUPA = mapToDimension(Domain, LoopStack.size()); 4471 Schedule = isl_schedule_insert_partial_schedule(Schedule, MUPA); 4472 NextLoopData.Schedule = 4473 combineInSequence(NextLoopData.Schedule, Schedule); 4474 } 4475 4476 NextLoopData.NumBlocksProcessed += NumBlocksProcessed; 4477 LoopData = NextLoopData; 4478 } 4479 } 4480 4481 ScopStmt *Scop::getStmtFor(BasicBlock *BB) const { 4482 auto StmtMapIt = StmtMap.find(BB); 4483 if (StmtMapIt == StmtMap.end()) 4484 return nullptr; 4485 return StmtMapIt->second; 4486 } 4487 4488 ScopStmt *Scop::getStmtFor(RegionNode *RN) const { 4489 if (RN->isSubRegion()) 4490 return getStmtFor(RN->getNodeAs<Region>()); 4491 return getStmtFor(RN->getNodeAs<BasicBlock>()); 4492 } 4493 4494 ScopStmt *Scop::getStmtFor(Region *R) const { 4495 ScopStmt *Stmt = getStmtFor(R->getEntry()); 4496 assert(!Stmt || Stmt->getRegion() == R); 4497 return Stmt; 4498 } 4499 4500 int Scop::getRelativeLoopDepth(const Loop *L) const { 4501 Loop *OuterLoop = 4502 L ? R.outermostLoopInRegion(const_cast<Loop *>(L)) : nullptr; 4503 if (!OuterLoop) 4504 return -1; 4505 return L->getLoopDepth() - OuterLoop->getLoopDepth(); 4506 } 4507 4508 ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) { 4509 for (auto &SAI : arrays()) { 4510 if (SAI->getName() == BaseName) 4511 return SAI; 4512 } 4513 return nullptr; 4514 } 4515 4516 //===----------------------------------------------------------------------===// 4517 void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const { 4518 AU.addRequired<LoopInfoWrapperPass>(); 4519 AU.addRequired<RegionInfoPass>(); 4520 AU.addRequired<DominatorTreeWrapperPass>(); 4521 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 4522 AU.addRequiredTransitive<ScopDetection>(); 4523 AU.addRequired<AAResultsWrapperPass>(); 4524 AU.setPreservesAll(); 4525 } 4526 4527 bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) { 4528 auto &SD = getAnalysis<ScopDetection>(); 4529 4530 if (!SD.isMaxRegionInScop(*R)) 4531 return false; 4532 4533 Function *F = R->getEntry()->getParent(); 4534 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 4535 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 4536 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 4537 auto const &DL = F->getParent()->getDataLayout(); 4538 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 4539 4540 ScopBuilder SB(R, AA, DL, DT, LI, SD, SE); 4541 S = SB.getScop(); // take ownership of scop object 4542 return false; 4543 } 4544 4545 void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const { 4546 if (S) 4547 S->print(OS); 4548 else 4549 OS << "Invalid Scop!\n"; 4550 } 4551 4552 char ScopInfoRegionPass::ID = 0; 4553 4554 Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); } 4555 4556 INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops", 4557 "Polly - Create polyhedral description of Scops", false, 4558 false); 4559 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 4560 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 4561 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 4562 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 4563 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 4564 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 4565 INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops", 4566 "Polly - Create polyhedral description of Scops", false, 4567 false) 4568 4569 //===----------------------------------------------------------------------===// 4570 void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 4571 AU.addRequired<LoopInfoWrapperPass>(); 4572 AU.addRequired<RegionInfoPass>(); 4573 AU.addRequired<DominatorTreeWrapperPass>(); 4574 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 4575 AU.addRequiredTransitive<ScopDetection>(); 4576 AU.addRequired<AAResultsWrapperPass>(); 4577 AU.setPreservesAll(); 4578 } 4579 4580 bool ScopInfoWrapperPass::runOnFunction(Function &F) { 4581 auto &SD = getAnalysis<ScopDetection>(); 4582 4583 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 4584 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 4585 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 4586 auto const &DL = F.getParent()->getDataLayout(); 4587 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 4588 4589 /// Create polyhedral descripton of scops for all the valid regions of a 4590 /// function. 4591 for (auto &It : SD) { 4592 Region *R = const_cast<Region *>(It); 4593 if (!SD.isMaxRegionInScop(*R)) 4594 continue; 4595 4596 ScopBuilder SB(R, AA, DL, DT, LI, SD, SE); 4597 std::unique_ptr<Scop> S = SB.getScop(); 4598 if (!S) 4599 continue; 4600 bool Inserted = 4601 RegionToScopMap.insert(std::make_pair(R, std::move(S))).second; 4602 assert(Inserted && "Building Scop for the same region twice!"); 4603 (void)Inserted; 4604 } 4605 return false; 4606 } 4607 4608 void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const { 4609 for (auto &It : RegionToScopMap) { 4610 if (It.second) 4611 It.second->print(OS); 4612 else 4613 OS << "Invalid Scop!\n"; 4614 } 4615 } 4616 4617 char ScopInfoWrapperPass::ID = 0; 4618 4619 Pass *polly::createScopInfoWrapperPassPass() { 4620 return new ScopInfoWrapperPass(); 4621 } 4622 4623 INITIALIZE_PASS_BEGIN( 4624 ScopInfoWrapperPass, "polly-function-scops", 4625 "Polly - Create polyhedral description of all Scops of a function", false, 4626 false); 4627 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 4628 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 4629 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 4630 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 4631 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 4632 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 4633 INITIALIZE_PASS_END( 4634 ScopInfoWrapperPass, "polly-function-scops", 4635 "Polly - Create polyhedral description of all Scops of a function", false, 4636 false) 4637