1 //===- Schedule.cpp - Calculate an optimized schedule ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass the isl to calculate a schedule that is optimized for parallelism 11 // and tileablility. The algorithm used in isl is an optimized version of the 12 // algorithm described in following paper: 13 // 14 // U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan. 15 // A Practical Automatic Polyhedral Parallelizer and Locality Optimizer. 16 // In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language 17 // Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008. 18 //===----------------------------------------------------------------------===// 19 20 #include "polly/ScheduleOptimizer.h" 21 #include "isl/aff.h" 22 #include "isl/band.h" 23 #include "isl/constraint.h" 24 #include "isl/map.h" 25 #include "isl/options.h" 26 #include "isl/schedule.h" 27 #include "isl/space.h" 28 #include "polly/CodeGen/CodeGeneration.h" 29 #include "polly/DependenceInfo.h" 30 #include "polly/LinkAllPasses.h" 31 #include "polly/Options.h" 32 #include "polly/ScopInfo.h" 33 #include "polly/Support/GICHelper.h" 34 #include "llvm/Support/Debug.h" 35 36 using namespace llvm; 37 using namespace polly; 38 39 #define DEBUG_TYPE "polly-opt-isl" 40 41 namespace polly { 42 bool DisablePollyTiling; 43 } 44 static cl::opt<bool, true> 45 DisableTiling("polly-no-tiling", 46 cl::desc("Disable tiling in the scheduler"), 47 cl::location(polly::DisablePollyTiling), cl::init(false), 48 cl::ZeroOrMore, cl::cat(PollyCategory)); 49 50 static cl::opt<std::string> 51 OptimizeDeps("polly-opt-optimize-only", 52 cl::desc("Only a certain kind of dependences (all/raw)"), 53 cl::Hidden, cl::init("all"), cl::ZeroOrMore, 54 cl::cat(PollyCategory)); 55 56 static cl::opt<std::string> 57 SimplifyDeps("polly-opt-simplify-deps", 58 cl::desc("Dependences should be simplified (yes/no)"), 59 cl::Hidden, cl::init("yes"), cl::ZeroOrMore, 60 cl::cat(PollyCategory)); 61 62 static cl::opt<int> MaxConstantTerm( 63 "polly-opt-max-constant-term", 64 cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden, 65 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory)); 66 67 static cl::opt<int> MaxCoefficient( 68 "polly-opt-max-coefficient", 69 cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden, 70 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory)); 71 72 static cl::opt<std::string> FusionStrategy( 73 "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"), 74 cl::Hidden, cl::init("min"), cl::ZeroOrMore, cl::cat(PollyCategory)); 75 76 static cl::opt<std::string> 77 MaximizeBandDepth("polly-opt-maximize-bands", 78 cl::desc("Maximize the band depth (yes/no)"), cl::Hidden, 79 cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory)); 80 81 static cl::opt<int> DefaultTileSize( 82 "polly-default-tile-size", 83 cl::desc("The default tile size (if not enough were provided by" 84 " --polly-tile-sizes)"), 85 cl::Hidden, cl::init(32), cl::ZeroOrMore, cl::cat(PollyCategory)); 86 87 static cl::list<int> TileSizes("polly-tile-sizes", 88 cl::desc("A tile size" 89 " for each loop dimension, filled with" 90 " --polly-default-tile-size"), 91 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, 92 cl::cat(PollyCategory)); 93 namespace { 94 95 class IslScheduleOptimizer : public ScopPass { 96 public: 97 static char ID; 98 explicit IslScheduleOptimizer() : ScopPass(ID) { LastSchedule = nullptr; } 99 100 ~IslScheduleOptimizer() { isl_schedule_free(LastSchedule); } 101 102 bool runOnScop(Scop &S) override; 103 void printScop(raw_ostream &OS, Scop &S) const override; 104 void getAnalysisUsage(AnalysisUsage &AU) const override; 105 106 private: 107 isl_schedule *LastSchedule; 108 109 /// @brief Decide if the @p NewSchedule is profitable for @p S. 110 /// 111 /// @param S The SCoP we optimize. 112 /// @param NewSchedule The new schedule we computed. 113 /// 114 /// @return True, if we believe @p NewSchedule is an improvement for @p S. 115 bool isProfitableSchedule(Scop &S, __isl_keep isl_union_map *NewSchedule); 116 117 static void extendScattering(Scop &S, unsigned NewDimensions); 118 119 /// @brief Create a map that describes a n-dimensonal tiling. 120 /// 121 /// getTileMap creates a map from a n-dimensional scattering space into an 122 /// 2*n-dimensional scattering space. The map describes a rectangular 123 /// tiling. 124 /// 125 /// Example: 126 /// scheduleDimensions = 2, parameterDimensions = 1, TileSizes = <32, 64> 127 /// 128 /// tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: 129 /// t0 % 32 = 0 and t0 <= s0 < t0 + 32 and 130 /// t1 % 64 = 0 and t1 <= s1 < t1 + 64} 131 /// 132 /// Before tiling: 133 /// 134 /// for (i = 0; i < N; i++) 135 /// for (j = 0; j < M; j++) 136 /// S(i,j) 137 /// 138 /// After tiling: 139 /// 140 /// for (t_i = 0; t_i < N; i+=32) 141 /// for (t_j = 0; t_j < M; j+=64) 142 /// for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 143 /// for (j = t_j; j < t_j + 64; j++) | Known that M % 64 = 0 144 /// S(i,j) 145 /// 146 static isl_basic_map *getTileMap(isl_ctx *ctx, int scheduleDimensions); 147 148 /// @brief Get the schedule for this band. 149 /// 150 /// Polly applies transformations like tiling on top of the isl calculated 151 /// value. This can influence the number of scheduling dimension. The 152 /// number of schedule dimensions is returned in the parameter 'Dimension'. 153 static isl_union_map *getScheduleForBand(isl_band *Band, int *Dimensions); 154 155 /// @brief Create a map that pre-vectorizes one scheduling dimension. 156 /// 157 /// getPrevectorMap creates a map that maps each input dimension to the same 158 /// output dimension, except for the dimension DimToVectorize. 159 /// DimToVectorize is strip mined by 'VectorWidth' and the newly created 160 /// point loop of DimToVectorize is moved to the innermost level. 161 /// 162 /// Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4): 163 /// 164 /// | Before transformation 165 /// | 166 /// | A[i,j] -> [i,j] 167 /// | 168 /// | for (i = 0; i < 128; i++) 169 /// | for (j = 0; j < 128; j++) 170 /// | A(i,j); 171 /// 172 /// Prevector map: 173 /// [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip 174 /// 175 /// | After transformation: 176 /// | 177 /// | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip 178 /// | 179 /// | for (it = 0; it < 128; it+=4) 180 /// | for (j = 0; j < 128; j++) 181 /// | for (ip = max(0,it); ip < min(128, it + 3); ip++) 182 /// | A(ip,j); 183 /// 184 /// The goal of this transformation is to create a trivially vectorizable 185 /// loop. This means a parallel loop at the innermost level that has a 186 /// constant number of iterations corresponding to the target vector width. 187 /// 188 /// This transformation creates a loop at the innermost level. The loop has 189 /// a constant number of iterations, if the number of loop iterations at 190 /// DimToVectorize can be divided by VectorWidth. The default VectorWidth is 191 /// currently constant and not yet target specific. This function does not 192 /// reason about parallelism. 193 static isl_map *getPrevectorMap(isl_ctx *ctx, int DimToVectorize, 194 int ScheduleDimensions, int VectorWidth = 4); 195 196 /// @brief Get the scheduling map for a list of bands. 197 /// 198 /// Walk recursively the forest of bands to combine the schedules of the 199 /// individual bands to the overall schedule. In case tiling is requested, 200 /// the individual bands are tiled. 201 static isl_union_map *getScheduleForBandList(isl_band_list *BandList); 202 203 static isl_union_map *getScheduleMap(isl_schedule *Schedule); 204 205 using llvm::Pass::doFinalization; 206 207 virtual bool doFinalization() override { 208 isl_schedule_free(LastSchedule); 209 LastSchedule = nullptr; 210 return true; 211 } 212 }; 213 } 214 215 char IslScheduleOptimizer::ID = 0; 216 217 void IslScheduleOptimizer::extendScattering(Scop &S, unsigned NewDimensions) { 218 for (ScopStmt *Stmt : S) { 219 unsigned OldDimensions = Stmt->getNumScattering(); 220 isl_space *Space; 221 isl_map *Map, *New; 222 223 Space = isl_space_alloc(Stmt->getIslCtx(), 0, OldDimensions, NewDimensions); 224 Map = isl_map_universe(Space); 225 226 for (unsigned i = 0; i < OldDimensions; i++) 227 Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); 228 229 for (unsigned i = OldDimensions; i < NewDimensions; i++) 230 Map = isl_map_fix_si(Map, isl_dim_out, i, 0); 231 232 Map = isl_map_align_params(Map, S.getParamSpace()); 233 New = isl_map_apply_range(Stmt->getScattering(), Map); 234 Stmt->setScattering(New); 235 } 236 } 237 238 isl_basic_map *IslScheduleOptimizer::getTileMap(isl_ctx *ctx, 239 int scheduleDimensions) { 240 // We construct 241 // 242 // tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: 243 // s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 64 and 244 // s1 = a1 * 64 and s1 = p1 and t1 <= p1 < t1 + 64} 245 // 246 // and project out the auxilary dimensions a0 and a1. 247 isl_space *Space = 248 isl_space_alloc(ctx, 0, scheduleDimensions, scheduleDimensions * 3); 249 isl_basic_map *tileMap = isl_basic_map_universe(isl_space_copy(Space)); 250 251 isl_local_space *LocalSpace = isl_local_space_from_space(Space); 252 253 for (int x = 0; x < scheduleDimensions; x++) { 254 int sX = x; 255 int tX = x; 256 int pX = scheduleDimensions + x; 257 int aX = 2 * scheduleDimensions + x; 258 int tileSize = (int)TileSizes.size() > x ? TileSizes[x] : DefaultTileSize; 259 assert(tileSize > 0 && "Invalid tile size"); 260 261 isl_constraint *c; 262 263 // sX = aX * tileSize; 264 c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); 265 isl_constraint_set_coefficient_si(c, isl_dim_out, sX, 1); 266 isl_constraint_set_coefficient_si(c, isl_dim_out, aX, -tileSize); 267 tileMap = isl_basic_map_add_constraint(tileMap, c); 268 269 // pX = sX; 270 c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); 271 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1); 272 isl_constraint_set_coefficient_si(c, isl_dim_in, sX, -1); 273 tileMap = isl_basic_map_add_constraint(tileMap, c); 274 275 // tX <= pX 276 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); 277 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, 1); 278 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, -1); 279 tileMap = isl_basic_map_add_constraint(tileMap, c); 280 281 // pX <= tX + (tileSize - 1) 282 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); 283 isl_constraint_set_coefficient_si(c, isl_dim_out, tX, 1); 284 isl_constraint_set_coefficient_si(c, isl_dim_out, pX, -1); 285 isl_constraint_set_constant_si(c, tileSize - 1); 286 tileMap = isl_basic_map_add_constraint(tileMap, c); 287 } 288 289 // Project out auxilary dimensions. 290 // 291 // The auxilary dimensions are transformed into existentially quantified ones. 292 // This reduces the number of visible scattering dimensions and allows Cloog 293 // to produces better code. 294 tileMap = isl_basic_map_project_out( 295 tileMap, isl_dim_out, 2 * scheduleDimensions, scheduleDimensions); 296 isl_local_space_free(LocalSpace); 297 return tileMap; 298 } 299 300 isl_union_map *IslScheduleOptimizer::getScheduleForBand(isl_band *Band, 301 int *Dimensions) { 302 isl_union_map *PartialSchedule; 303 isl_ctx *ctx; 304 isl_space *Space; 305 isl_basic_map *TileMap; 306 isl_union_map *TileUMap; 307 308 PartialSchedule = isl_band_get_partial_schedule(Band); 309 *Dimensions = isl_band_n_member(Band); 310 311 if (DisableTiling) 312 return PartialSchedule; 313 314 // It does not make any sense to tile a band with just one dimension. 315 if (*Dimensions == 1) 316 return PartialSchedule; 317 318 ctx = isl_union_map_get_ctx(PartialSchedule); 319 Space = isl_union_map_get_space(PartialSchedule); 320 321 TileMap = getTileMap(ctx, *Dimensions); 322 TileUMap = isl_union_map_from_map(isl_map_from_basic_map(TileMap)); 323 TileUMap = isl_union_map_align_params(TileUMap, Space); 324 *Dimensions = 2 * *Dimensions; 325 326 return isl_union_map_apply_range(PartialSchedule, TileUMap); 327 } 328 329 isl_map *IslScheduleOptimizer::getPrevectorMap(isl_ctx *ctx, int DimToVectorize, 330 int ScheduleDimensions, 331 int VectorWidth) { 332 isl_space *Space; 333 isl_local_space *LocalSpace, *LocalSpaceRange; 334 isl_set *Modulo; 335 isl_map *TilingMap; 336 isl_constraint *c; 337 isl_aff *Aff; 338 int PointDimension; /* ip */ 339 int TileDimension; /* it */ 340 isl_val *VectorWidthMP; 341 342 assert(0 <= DimToVectorize && DimToVectorize < ScheduleDimensions); 343 344 Space = isl_space_alloc(ctx, 0, ScheduleDimensions, ScheduleDimensions + 1); 345 TilingMap = isl_map_universe(isl_space_copy(Space)); 346 LocalSpace = isl_local_space_from_space(Space); 347 PointDimension = ScheduleDimensions; 348 TileDimension = DimToVectorize; 349 350 // Create an identity map for everything except DimToVectorize and map 351 // DimToVectorize to the point loop at the innermost dimension. 352 for (int i = 0; i < ScheduleDimensions; i++) { 353 c = isl_equality_alloc(isl_local_space_copy(LocalSpace)); 354 c = isl_constraint_set_coefficient_si(c, isl_dim_in, i, -1); 355 356 if (i == DimToVectorize) 357 c = isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1); 358 else 359 c = isl_constraint_set_coefficient_si(c, isl_dim_out, i, 1); 360 361 TilingMap = isl_map_add_constraint(TilingMap, c); 362 } 363 364 // it % 'VectorWidth' = 0 365 LocalSpaceRange = isl_local_space_range(isl_local_space_copy(LocalSpace)); 366 Aff = isl_aff_zero_on_domain(LocalSpaceRange); 367 Aff = isl_aff_set_constant_si(Aff, VectorWidth); 368 Aff = isl_aff_set_coefficient_si(Aff, isl_dim_in, TileDimension, 1); 369 VectorWidthMP = isl_val_int_from_si(ctx, VectorWidth); 370 Aff = isl_aff_mod_val(Aff, VectorWidthMP); 371 Modulo = isl_pw_aff_zero_set(isl_pw_aff_from_aff(Aff)); 372 TilingMap = isl_map_intersect_range(TilingMap, Modulo); 373 374 // it <= ip 375 c = isl_inequality_alloc(isl_local_space_copy(LocalSpace)); 376 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, -1); 377 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, 1); 378 TilingMap = isl_map_add_constraint(TilingMap, c); 379 380 // ip <= it + ('VectorWidth' - 1) 381 c = isl_inequality_alloc(LocalSpace); 382 isl_constraint_set_coefficient_si(c, isl_dim_out, TileDimension, 1); 383 isl_constraint_set_coefficient_si(c, isl_dim_out, PointDimension, -1); 384 isl_constraint_set_constant_si(c, VectorWidth - 1); 385 TilingMap = isl_map_add_constraint(TilingMap, c); 386 387 return TilingMap; 388 } 389 390 isl_union_map * 391 IslScheduleOptimizer::getScheduleForBandList(isl_band_list *BandList) { 392 int NumBands; 393 isl_union_map *Schedule; 394 isl_ctx *ctx; 395 396 ctx = isl_band_list_get_ctx(BandList); 397 NumBands = isl_band_list_n_band(BandList); 398 Schedule = isl_union_map_empty(isl_space_params_alloc(ctx, 0)); 399 400 for (int i = 0; i < NumBands; i++) { 401 isl_band *Band; 402 isl_union_map *PartialSchedule; 403 int ScheduleDimensions; 404 isl_space *Space; 405 406 Band = isl_band_list_get_band(BandList, i); 407 PartialSchedule = getScheduleForBand(Band, &ScheduleDimensions); 408 Space = isl_union_map_get_space(PartialSchedule); 409 410 if (isl_band_has_children(Band)) { 411 isl_band_list *Children; 412 isl_union_map *SuffixSchedule; 413 414 Children = isl_band_get_children(Band); 415 SuffixSchedule = getScheduleForBandList(Children); 416 PartialSchedule = 417 isl_union_map_flat_range_product(PartialSchedule, SuffixSchedule); 418 isl_band_list_free(Children); 419 } else if (PollyVectorizerChoice != VECTORIZER_NONE) { 420 // In case we are at the innermost band, we try to prepare for 421 // vectorization. This means, we look for the innermost parallel loop 422 // and strip mine this loop to the innermost level using a strip-mine 423 // factor corresponding to the number of vector iterations. 424 int NumDims = isl_band_n_member(Band); 425 for (int j = NumDims - 1; j >= 0; j--) { 426 if (isl_band_member_is_coincident(Band, j)) { 427 isl_map *TileMap; 428 isl_union_map *TileUMap; 429 430 TileMap = getPrevectorMap(ctx, ScheduleDimensions - NumDims + j, 431 ScheduleDimensions); 432 TileUMap = isl_union_map_from_map(TileMap); 433 TileUMap = 434 isl_union_map_align_params(TileUMap, isl_space_copy(Space)); 435 PartialSchedule = 436 isl_union_map_apply_range(PartialSchedule, TileUMap); 437 break; 438 } 439 } 440 } 441 442 Schedule = isl_union_map_union(Schedule, PartialSchedule); 443 444 isl_band_free(Band); 445 isl_space_free(Space); 446 } 447 448 return Schedule; 449 } 450 451 isl_union_map *IslScheduleOptimizer::getScheduleMap(isl_schedule *Schedule) { 452 isl_band_list *BandList = isl_schedule_get_band_forest(Schedule); 453 isl_union_map *ScheduleMap = getScheduleForBandList(BandList); 454 isl_band_list_free(BandList); 455 return ScheduleMap; 456 } 457 458 bool IslScheduleOptimizer::isProfitableSchedule( 459 Scop &S, __isl_keep isl_union_map *NewSchedule) { 460 // To understand if the schedule has been optimized we check if the schedule 461 // has changed at all. 462 // TODO: We can improve this by tracking if any necessarily beneficial 463 // transformations have been performed. This can e.g. be tiling, loop 464 // interchange, or ...) We can track this either at the place where the 465 // transformation has been performed or, in case of automatic ILP based 466 // optimizations, by comparing (yet to be defined) performance metrics 467 // before/after the scheduling optimizer 468 // (e.g., #stride-one accesses) 469 isl_union_map *OldSchedule = S.getSchedule(); 470 bool changed = !isl_union_map_is_equal(OldSchedule, NewSchedule); 471 isl_union_map_free(OldSchedule); 472 return changed; 473 } 474 475 bool IslScheduleOptimizer::runOnScop(Scop &S) { 476 477 // Skip empty SCoPs but still allow code generation as it will delete the 478 // loops present but not needed. 479 if (S.getSize() == 0) { 480 S.markAsOptimized(); 481 return false; 482 } 483 484 const Dependences &D = getAnalysis<DependenceInfo>().getDependences(); 485 486 if (!D.hasValidDependences()) 487 return false; 488 489 isl_schedule_free(LastSchedule); 490 LastSchedule = nullptr; 491 492 // Build input data. 493 int ValidityKinds = 494 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; 495 int ProximityKinds; 496 497 if (OptimizeDeps == "all") 498 ProximityKinds = 499 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; 500 else if (OptimizeDeps == "raw") 501 ProximityKinds = Dependences::TYPE_RAW; 502 else { 503 errs() << "Do not know how to optimize for '" << OptimizeDeps << "'" 504 << " Falling back to optimizing all dependences.\n"; 505 ProximityKinds = 506 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; 507 } 508 509 isl_union_set *Domain = S.getDomains(); 510 511 if (!Domain) 512 return false; 513 514 isl_union_map *Validity = D.getDependences(ValidityKinds); 515 isl_union_map *Proximity = D.getDependences(ProximityKinds); 516 517 // Simplify the dependences by removing the constraints introduced by the 518 // domains. This can speed up the scheduling time significantly, as large 519 // constant coefficients will be removed from the dependences. The 520 // introduction of some additional dependences reduces the possible 521 // transformations, but in most cases, such transformation do not seem to be 522 // interesting anyway. In some cases this option may stop the scheduler to 523 // find any schedule. 524 if (SimplifyDeps == "yes") { 525 Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain)); 526 Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain)); 527 Proximity = 528 isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain)); 529 Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain)); 530 } else if (SimplifyDeps != "no") { 531 errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' " 532 "or 'no'. Falling back to default: 'yes'\n"; 533 } 534 535 DEBUG(dbgs() << "\n\nCompute schedule from: "); 536 DEBUG(dbgs() << "Domain := " << stringFromIslObj(Domain) << ";\n"); 537 DEBUG(dbgs() << "Proximity := " << stringFromIslObj(Proximity) << ";\n"); 538 DEBUG(dbgs() << "Validity := " << stringFromIslObj(Validity) << ";\n"); 539 540 int IslFusionStrategy; 541 542 if (FusionStrategy == "max") { 543 IslFusionStrategy = ISL_SCHEDULE_FUSE_MAX; 544 } else if (FusionStrategy == "min") { 545 IslFusionStrategy = ISL_SCHEDULE_FUSE_MIN; 546 } else { 547 errs() << "warning: Unknown fusion strategy. Falling back to maximal " 548 "fusion.\n"; 549 IslFusionStrategy = ISL_SCHEDULE_FUSE_MAX; 550 } 551 552 int IslMaximizeBands; 553 554 if (MaximizeBandDepth == "yes") { 555 IslMaximizeBands = 1; 556 } else if (MaximizeBandDepth == "no") { 557 IslMaximizeBands = 0; 558 } else { 559 errs() << "warning: Option -polly-opt-maximize-bands should either be 'yes'" 560 " or 'no'. Falling back to default: 'yes'\n"; 561 IslMaximizeBands = 1; 562 } 563 564 isl_options_set_schedule_fuse(S.getIslCtx(), IslFusionStrategy); 565 isl_options_set_schedule_maximize_band_depth(S.getIslCtx(), IslMaximizeBands); 566 isl_options_set_schedule_max_constant_term(S.getIslCtx(), MaxConstantTerm); 567 isl_options_set_schedule_max_coefficient(S.getIslCtx(), MaxCoefficient); 568 569 isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_CONTINUE); 570 571 isl_schedule_constraints *ScheduleConstraints; 572 ScheduleConstraints = isl_schedule_constraints_on_domain(Domain); 573 ScheduleConstraints = 574 isl_schedule_constraints_set_proximity(ScheduleConstraints, Proximity); 575 ScheduleConstraints = isl_schedule_constraints_set_validity( 576 ScheduleConstraints, isl_union_map_copy(Validity)); 577 ScheduleConstraints = 578 isl_schedule_constraints_set_coincidence(ScheduleConstraints, Validity); 579 isl_schedule *Schedule; 580 Schedule = isl_schedule_constraints_compute_schedule(ScheduleConstraints); 581 isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_ABORT); 582 583 // In cases the scheduler is not able to optimize the code, we just do not 584 // touch the schedule. 585 if (!Schedule) 586 return false; 587 588 DEBUG(dbgs() << "Schedule := " << stringFromIslObj(Schedule) << ";\n"); 589 590 isl_union_map *NewSchedule = getScheduleMap(Schedule); 591 592 // Check if the optimizations performed were profitable, otherwise exit early. 593 if (!isProfitableSchedule(S, NewSchedule)) { 594 isl_schedule_free(Schedule); 595 isl_union_map_free(NewSchedule); 596 return false; 597 } 598 599 S.markAsOptimized(); 600 601 for (ScopStmt *Stmt : S) { 602 isl_map *StmtSchedule; 603 isl_set *Domain = Stmt->getDomain(); 604 isl_union_map *StmtBand; 605 StmtBand = isl_union_map_intersect_domain(isl_union_map_copy(NewSchedule), 606 isl_union_set_from_set(Domain)); 607 if (isl_union_map_is_empty(StmtBand)) { 608 StmtSchedule = isl_map_from_domain(isl_set_empty(Stmt->getDomainSpace())); 609 isl_union_map_free(StmtBand); 610 } else { 611 assert(isl_union_map_n_map(StmtBand) == 1); 612 StmtSchedule = isl_map_from_union_map(StmtBand); 613 } 614 615 Stmt->setScattering(StmtSchedule); 616 } 617 618 isl_union_map_free(NewSchedule); 619 LastSchedule = Schedule; 620 621 unsigned MaxScatDims = 0; 622 623 for (ScopStmt *Stmt : S) 624 MaxScatDims = std::max(Stmt->getNumScattering(), MaxScatDims); 625 626 extendScattering(S, MaxScatDims); 627 return false; 628 } 629 630 void IslScheduleOptimizer::printScop(raw_ostream &OS, Scop &) const { 631 isl_printer *p; 632 char *ScheduleStr; 633 634 OS << "Calculated schedule:\n"; 635 636 if (!LastSchedule) { 637 OS << "n/a\n"; 638 return; 639 } 640 641 p = isl_printer_to_str(isl_schedule_get_ctx(LastSchedule)); 642 p = isl_printer_print_schedule(p, LastSchedule); 643 ScheduleStr = isl_printer_get_str(p); 644 isl_printer_free(p); 645 646 OS << ScheduleStr << "\n"; 647 } 648 649 void IslScheduleOptimizer::getAnalysisUsage(AnalysisUsage &AU) const { 650 ScopPass::getAnalysisUsage(AU); 651 AU.addRequired<DependenceInfo>(); 652 } 653 654 Pass *polly::createIslScheduleOptimizerPass() { 655 return new IslScheduleOptimizer(); 656 } 657 658 INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl", 659 "Polly - Optimize schedule of SCoP", false, false); 660 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 661 INITIALIZE_PASS_DEPENDENCY(ScopInfo); 662 INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl", 663 "Polly - Optimize schedule of SCoP", false, false) 664