1 //===- Schedule.cpp - Calculate an optimized schedule ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass generates an entirey new schedule tree from the data dependences 11 // and iteration domains. The new schedule tree is computed in two steps: 12 // 13 // 1) The isl scheduling optimizer is run 14 // 15 // The isl scheduling optimizer creates a new schedule tree that maximizes 16 // parallelism and tileability and minimizes data-dependence distances. The 17 // algorithm used is a modified version of the ``Pluto'' algorithm: 18 // 19 // U. Bondhugula, A. Hartono, J. Ramanujam, and P. Sadayappan. 20 // A Practical Automatic Polyhedral Parallelizer and Locality Optimizer. 21 // In Proceedings of the 2008 ACM SIGPLAN Conference On Programming Language 22 // Design and Implementation, PLDI ’08, pages 101–113. ACM, 2008. 23 // 24 // 2) A set of post-scheduling transformations is applied on the schedule tree. 25 // 26 // These optimizations include: 27 // 28 // - Tiling of the innermost tilable bands 29 // - Prevectorization - The coice of a possible outer loop that is strip-mined 30 // to the innermost level to enable inner-loop 31 // vectorization. 32 // - Some optimizations for spatial locality are also planned. 33 // 34 // For a detailed description of the schedule tree itself please see section 6 35 // of: 36 // 37 // Polyhedral AST generation is more than scanning polyhedra 38 // Tobias Grosser, Sven Verdoolaege, Albert Cohen 39 // ACM Transations on Programming Languages and Systems (TOPLAS), 40 // 37(4), July 2015 41 // http://www.grosser.es/#pub-polyhedral-AST-generation 42 // 43 // This publication also contains a detailed discussion of the different options 44 // for polyhedral loop unrolling, full/partial tile separation and other uses 45 // of the schedule tree. 46 // 47 //===----------------------------------------------------------------------===// 48 49 #include "polly/ScheduleOptimizer.h" 50 #include "polly/CodeGen/CodeGeneration.h" 51 #include "polly/DependenceInfo.h" 52 #include "polly/LinkAllPasses.h" 53 #include "polly/Options.h" 54 #include "polly/ScopInfo.h" 55 #include "polly/Support/GICHelper.h" 56 #include "llvm/Support/Debug.h" 57 #include "isl/aff.h" 58 #include "isl/band.h" 59 #include "isl/constraint.h" 60 #include "isl/map.h" 61 #include "isl/options.h" 62 #include "isl/printer.h" 63 #include "isl/schedule.h" 64 #include "isl/schedule_node.h" 65 #include "isl/space.h" 66 #include "isl/union_map.h" 67 #include "isl/union_set.h" 68 69 using namespace llvm; 70 using namespace polly; 71 72 #define DEBUG_TYPE "polly-opt-isl" 73 74 static cl::opt<std::string> 75 OptimizeDeps("polly-opt-optimize-only", 76 cl::desc("Only a certain kind of dependences (all/raw)"), 77 cl::Hidden, cl::init("all"), cl::ZeroOrMore, 78 cl::cat(PollyCategory)); 79 80 static cl::opt<std::string> 81 SimplifyDeps("polly-opt-simplify-deps", 82 cl::desc("Dependences should be simplified (yes/no)"), 83 cl::Hidden, cl::init("yes"), cl::ZeroOrMore, 84 cl::cat(PollyCategory)); 85 86 static cl::opt<int> MaxConstantTerm( 87 "polly-opt-max-constant-term", 88 cl::desc("The maximal constant term allowed (-1 is unlimited)"), cl::Hidden, 89 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory)); 90 91 static cl::opt<int> MaxCoefficient( 92 "polly-opt-max-coefficient", 93 cl::desc("The maximal coefficient allowed (-1 is unlimited)"), cl::Hidden, 94 cl::init(20), cl::ZeroOrMore, cl::cat(PollyCategory)); 95 96 static cl::opt<std::string> FusionStrategy( 97 "polly-opt-fusion", cl::desc("The fusion strategy to choose (min/max)"), 98 cl::Hidden, cl::init("min"), cl::ZeroOrMore, cl::cat(PollyCategory)); 99 100 static cl::opt<std::string> 101 MaximizeBandDepth("polly-opt-maximize-bands", 102 cl::desc("Maximize the band depth (yes/no)"), cl::Hidden, 103 cl::init("yes"), cl::ZeroOrMore, cl::cat(PollyCategory)); 104 105 static cl::opt<std::string> OuterCoincidence( 106 "polly-opt-outer-coincidence", 107 cl::desc("Try to construct schedules where the outer member of each band " 108 "satisfies the coincidence constraints (yes/no)"), 109 cl::Hidden, cl::init("no"), cl::ZeroOrMore, cl::cat(PollyCategory)); 110 111 static cl::opt<int> PrevectorWidth( 112 "polly-prevect-width", 113 cl::desc( 114 "The number of loop iterations to strip-mine for pre-vectorization"), 115 cl::Hidden, cl::init(4), cl::ZeroOrMore, cl::cat(PollyCategory)); 116 117 static cl::opt<bool> FirstLevelTiling("polly-tiling", 118 cl::desc("Enable loop tiling"), 119 cl::init(true), cl::ZeroOrMore, 120 cl::cat(PollyCategory)); 121 122 static cl::opt<int> FirstLevelDefaultTileSize( 123 "polly-default-tile-size", 124 cl::desc("The default tile size (if not enough were provided by" 125 " --polly-tile-sizes)"), 126 cl::Hidden, cl::init(32), cl::ZeroOrMore, cl::cat(PollyCategory)); 127 128 static cl::list<int> FirstLevelTileSizes( 129 "polly-tile-sizes", cl::desc("A tile size for each loop dimension, filled " 130 "with --polly-default-tile-size"), 131 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, cl::cat(PollyCategory)); 132 133 static cl::opt<bool> 134 SecondLevelTiling("polly-2nd-level-tiling", 135 cl::desc("Enable a 2nd level loop of loop tiling"), 136 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 137 138 static cl::opt<int> SecondLevelDefaultTileSize( 139 "polly-2nd-level-default-tile-size", 140 cl::desc("The default 2nd-level tile size (if not enough were provided by" 141 " --polly-2nd-level-tile-sizes)"), 142 cl::Hidden, cl::init(16), cl::ZeroOrMore, cl::cat(PollyCategory)); 143 144 static cl::list<int> 145 SecondLevelTileSizes("polly-2nd-level-tile-sizes", 146 cl::desc("A tile size for each loop dimension, filled " 147 "with --polly-default-tile-size"), 148 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, 149 cl::cat(PollyCategory)); 150 151 static cl::opt<bool> RegisterTiling("polly-register-tiling", 152 cl::desc("Enable register tiling"), 153 cl::init(false), cl::ZeroOrMore, 154 cl::cat(PollyCategory)); 155 156 static cl::opt<int> RegisterDefaultTileSize( 157 "polly-register-tiling-default-tile-size", 158 cl::desc("The default register tile size (if not enough were provided by" 159 " --polly-register-tile-sizes)"), 160 cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory)); 161 162 static cl::list<int> 163 RegisterTileSizes("polly-register-tile-sizes", 164 cl::desc("A tile size for each loop dimension, filled " 165 "with --polly-register-tile-size"), 166 cl::Hidden, cl::ZeroOrMore, cl::CommaSeparated, 167 cl::cat(PollyCategory)); 168 169 /// @brief Create an isl_union_set, which describes the isolate option based 170 /// on IsoalteDomain. 171 /// 172 /// @param IsolateDomain An isl_set whose last dimension is the only one that 173 /// should belong to the current band node. 174 static __isl_give isl_union_set * 175 getIsolateOptions(__isl_take isl_set *IsolateDomain) { 176 auto Dims = isl_set_dim(IsolateDomain, isl_dim_set); 177 auto *IsolateRelation = isl_map_from_domain(IsolateDomain); 178 IsolateRelation = isl_map_move_dims(IsolateRelation, isl_dim_out, 0, 179 isl_dim_in, Dims - 1, 1); 180 auto *IsolateOption = isl_map_wrap(IsolateRelation); 181 auto *Id = isl_id_alloc(isl_set_get_ctx(IsolateOption), "isolate", NULL); 182 return isl_union_set_from_set(isl_set_set_tuple_id(IsolateOption, Id)); 183 } 184 185 /// @brief Create an isl_union_set, which describes the atomic option for the 186 /// dimension of the current node. 187 /// 188 /// It may help to reduce the size of generated code. 189 /// 190 /// @param Ctx An isl_ctx, which is used to create the isl_union_set. 191 static __isl_give isl_union_set *getAtomicOptions(__isl_take isl_ctx *Ctx) { 192 auto *Space = isl_space_set_alloc(Ctx, 0, 1); 193 auto *AtomicOption = isl_set_universe(Space); 194 auto *Id = isl_id_alloc(Ctx, "atomic", NULL); 195 return isl_union_set_from_set(isl_set_set_tuple_id(AtomicOption, Id)); 196 } 197 198 /// @brief Make the last dimension of Set to take values 199 /// from 0 to VectorWidth - 1. 200 /// 201 /// @param Set A set, which should be modified. 202 /// @param VectorWidth A parameter, which determines the constraint. 203 static __isl_give isl_set *addExtentConstraints(__isl_take isl_set *Set, 204 int VectorWidth) { 205 auto Dims = isl_set_dim(Set, isl_dim_set); 206 auto Space = isl_set_get_space(Set); 207 auto *LocalSpace = isl_local_space_from_space(Space); 208 auto *ExtConstr = 209 isl_constraint_alloc_inequality(isl_local_space_copy(LocalSpace)); 210 ExtConstr = isl_constraint_set_constant_si(ExtConstr, 0); 211 ExtConstr = 212 isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, 1); 213 Set = isl_set_add_constraint(Set, ExtConstr); 214 ExtConstr = isl_constraint_alloc_inequality(LocalSpace); 215 ExtConstr = isl_constraint_set_constant_si(ExtConstr, VectorWidth - 1); 216 ExtConstr = 217 isl_constraint_set_coefficient_si(ExtConstr, isl_dim_set, Dims - 1, -1); 218 return isl_set_add_constraint(Set, ExtConstr); 219 } 220 221 /// @brief Build the desired set of partial tile prefixes. 222 /// 223 /// We build a set of partial tile prefixes, which are prefixes of the vector 224 /// loop that have exactly VectorWidth iterations. 225 /// 226 /// 1. Get all prefixes of the vector loop. 227 /// 2. Extend it to a set, which has exactly VectorWidth iterations for 228 /// any prefix from the set that was built on the previous step. 229 /// 3. Subtract loop domain from it, project out the vector loop dimension and 230 /// get a set of prefixes, which don’t have exactly VectorWidth iterations. 231 /// 4. Subtract it from all prefixes of the vector loop and get the desired 232 /// set. 233 /// 234 /// @param ScheduleRange A range of a map, which describes a prefix schedule 235 /// relation. 236 static __isl_give isl_set * 237 getPartialTilePrefixes(__isl_take isl_set *ScheduleRange, int VectorWidth) { 238 auto Dims = isl_set_dim(ScheduleRange, isl_dim_set); 239 auto *LoopPrefixes = isl_set_project_out(isl_set_copy(ScheduleRange), 240 isl_dim_set, Dims - 1, 1); 241 auto *ExtentPrefixes = 242 isl_set_add_dims(isl_set_copy(LoopPrefixes), isl_dim_set, 1); 243 ExtentPrefixes = addExtentConstraints(ExtentPrefixes, VectorWidth); 244 auto *BadPrefixes = isl_set_subtract(ExtentPrefixes, ScheduleRange); 245 BadPrefixes = isl_set_project_out(BadPrefixes, isl_dim_set, Dims - 1, 1); 246 return isl_set_subtract(LoopPrefixes, BadPrefixes); 247 } 248 249 __isl_give isl_schedule_node *ScheduleTreeOptimizer::isolateFullPartialTiles( 250 __isl_take isl_schedule_node *Node, int VectorWidth) { 251 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band); 252 Node = isl_schedule_node_child(Node, 0); 253 Node = isl_schedule_node_child(Node, 0); 254 auto *SchedRelUMap = isl_schedule_node_get_prefix_schedule_relation(Node); 255 auto *ScheduleRelation = isl_map_from_union_map(SchedRelUMap); 256 auto *ScheduleRange = isl_map_range(ScheduleRelation); 257 auto *IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth); 258 auto *AtomicOption = getAtomicOptions(isl_set_get_ctx(IsolateDomain)); 259 auto *IsolateOption = getIsolateOptions(IsolateDomain); 260 Node = isl_schedule_node_parent(Node); 261 Node = isl_schedule_node_parent(Node); 262 auto *Options = isl_union_set_union(IsolateOption, AtomicOption); 263 Node = isl_schedule_node_band_set_ast_build_options(Node, Options); 264 return Node; 265 } 266 267 __isl_give isl_schedule_node * 268 ScheduleTreeOptimizer::prevectSchedBand(__isl_take isl_schedule_node *Node, 269 unsigned DimToVectorize, 270 int VectorWidth) { 271 assert(isl_schedule_node_get_type(Node) == isl_schedule_node_band); 272 273 auto Space = isl_schedule_node_band_get_space(Node); 274 auto ScheduleDimensions = isl_space_dim(Space, isl_dim_set); 275 isl_space_free(Space); 276 assert(DimToVectorize < ScheduleDimensions); 277 278 if (DimToVectorize > 0) { 279 Node = isl_schedule_node_band_split(Node, DimToVectorize); 280 Node = isl_schedule_node_child(Node, 0); 281 } 282 if (DimToVectorize < ScheduleDimensions - 1) 283 Node = isl_schedule_node_band_split(Node, 1); 284 Space = isl_schedule_node_band_get_space(Node); 285 auto Sizes = isl_multi_val_zero(Space); 286 auto Ctx = isl_schedule_node_get_ctx(Node); 287 Sizes = 288 isl_multi_val_set_val(Sizes, 0, isl_val_int_from_si(Ctx, VectorWidth)); 289 Node = isl_schedule_node_band_tile(Node, Sizes); 290 Node = isolateFullPartialTiles(Node, VectorWidth); 291 Node = isl_schedule_node_child(Node, 0); 292 // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, 293 // we will have troubles to match it in the backend. 294 Node = isl_schedule_node_band_set_ast_build_options( 295 Node, isl_union_set_read_from_str(Ctx, "{ unroll[x]: 1 = 0 }")); 296 Node = isl_schedule_node_band_sink(Node); 297 Node = isl_schedule_node_child(Node, 0); 298 if (isl_schedule_node_get_type(Node) == isl_schedule_node_leaf) 299 Node = isl_schedule_node_parent(Node); 300 isl_id *LoopMarker = isl_id_alloc(Ctx, "SIMD", nullptr); 301 Node = isl_schedule_node_insert_mark(Node, LoopMarker); 302 return Node; 303 } 304 305 __isl_give isl_schedule_node * 306 ScheduleTreeOptimizer::tileNode(__isl_take isl_schedule_node *Node, 307 const char *Identifier, ArrayRef<int> TileSizes, 308 int DefaultTileSize) { 309 auto Ctx = isl_schedule_node_get_ctx(Node); 310 auto Space = isl_schedule_node_band_get_space(Node); 311 auto Dims = isl_space_dim(Space, isl_dim_set); 312 auto Sizes = isl_multi_val_zero(Space); 313 std::string IdentifierString(Identifier); 314 for (unsigned i = 0; i < Dims; i++) { 315 auto tileSize = i < TileSizes.size() ? TileSizes[i] : DefaultTileSize; 316 Sizes = isl_multi_val_set_val(Sizes, i, isl_val_int_from_si(Ctx, tileSize)); 317 } 318 auto TileLoopMarkerStr = IdentifierString + " - Tiles"; 319 isl_id *TileLoopMarker = 320 isl_id_alloc(Ctx, TileLoopMarkerStr.c_str(), nullptr); 321 Node = isl_schedule_node_insert_mark(Node, TileLoopMarker); 322 Node = isl_schedule_node_child(Node, 0); 323 Node = isl_schedule_node_band_tile(Node, Sizes); 324 Node = isl_schedule_node_child(Node, 0); 325 auto PointLoopMarkerStr = IdentifierString + " - Points"; 326 isl_id *PointLoopMarker = 327 isl_id_alloc(Ctx, PointLoopMarkerStr.c_str(), nullptr); 328 Node = isl_schedule_node_insert_mark(Node, PointLoopMarker); 329 Node = isl_schedule_node_child(Node, 0); 330 return Node; 331 } 332 333 bool ScheduleTreeOptimizer::isTileableBandNode( 334 __isl_keep isl_schedule_node *Node) { 335 if (isl_schedule_node_get_type(Node) != isl_schedule_node_band) 336 return false; 337 338 if (isl_schedule_node_n_children(Node) != 1) 339 return false; 340 341 if (!isl_schedule_node_band_get_permutable(Node)) 342 return false; 343 344 auto Space = isl_schedule_node_band_get_space(Node); 345 auto Dims = isl_space_dim(Space, isl_dim_set); 346 isl_space_free(Space); 347 348 if (Dims <= 1) 349 return false; 350 351 auto Child = isl_schedule_node_get_child(Node, 0); 352 auto Type = isl_schedule_node_get_type(Child); 353 isl_schedule_node_free(Child); 354 355 if (Type != isl_schedule_node_leaf) 356 return false; 357 358 return true; 359 } 360 361 __isl_give isl_schedule_node * 362 ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *Node, 363 void *User) { 364 if (!isTileableBandNode(Node)) 365 return Node; 366 367 if (FirstLevelTiling) 368 Node = tileNode(Node, "1st level tiling", FirstLevelTileSizes, 369 FirstLevelDefaultTileSize); 370 371 if (SecondLevelTiling) 372 Node = tileNode(Node, "2nd level tiling", SecondLevelTileSizes, 373 SecondLevelDefaultTileSize); 374 375 if (RegisterTiling) { 376 auto *Ctx = isl_schedule_node_get_ctx(Node); 377 Node = tileNode(Node, "Register tiling", RegisterTileSizes, 378 RegisterDefaultTileSize); 379 Node = isl_schedule_node_band_set_ast_build_options( 380 Node, isl_union_set_read_from_str(Ctx, "{unroll[x]}")); 381 } 382 383 if (PollyVectorizerChoice == VECTORIZER_NONE) 384 return Node; 385 386 auto Space = isl_schedule_node_band_get_space(Node); 387 auto Dims = isl_space_dim(Space, isl_dim_set); 388 isl_space_free(Space); 389 390 for (int i = Dims - 1; i >= 0; i--) 391 if (isl_schedule_node_band_member_get_coincident(Node, i)) { 392 Node = prevectSchedBand(Node, i, PrevectorWidth); 393 break; 394 } 395 396 return Node; 397 } 398 399 __isl_give isl_schedule * 400 ScheduleTreeOptimizer::optimizeSchedule(__isl_take isl_schedule *Schedule) { 401 isl_schedule_node *Root = isl_schedule_get_root(Schedule); 402 Root = optimizeScheduleNode(Root); 403 isl_schedule_free(Schedule); 404 auto S = isl_schedule_node_get_schedule(Root); 405 isl_schedule_node_free(Root); 406 return S; 407 } 408 409 __isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeScheduleNode( 410 __isl_take isl_schedule_node *Node) { 411 Node = isl_schedule_node_map_descendant_bottom_up(Node, optimizeBand, NULL); 412 return Node; 413 } 414 415 bool ScheduleTreeOptimizer::isProfitableSchedule( 416 Scop &S, __isl_keep isl_union_map *NewSchedule) { 417 // To understand if the schedule has been optimized we check if the schedule 418 // has changed at all. 419 // TODO: We can improve this by tracking if any necessarily beneficial 420 // transformations have been performed. This can e.g. be tiling, loop 421 // interchange, or ...) We can track this either at the place where the 422 // transformation has been performed or, in case of automatic ILP based 423 // optimizations, by comparing (yet to be defined) performance metrics 424 // before/after the scheduling optimizer 425 // (e.g., #stride-one accesses) 426 isl_union_map *OldSchedule = S.getSchedule(); 427 bool changed = !isl_union_map_is_equal(OldSchedule, NewSchedule); 428 isl_union_map_free(OldSchedule); 429 return changed; 430 } 431 432 namespace { 433 class IslScheduleOptimizer : public ScopPass { 434 public: 435 static char ID; 436 explicit IslScheduleOptimizer() : ScopPass(ID) { LastSchedule = nullptr; } 437 438 ~IslScheduleOptimizer() { isl_schedule_free(LastSchedule); } 439 440 /// @brief Optimize the schedule of the SCoP @p S. 441 bool runOnScop(Scop &S) override; 442 443 /// @brief Print the new schedule for the SCoP @p S. 444 void printScop(raw_ostream &OS, Scop &S) const override; 445 446 /// @brief Register all analyses and transformation required. 447 void getAnalysisUsage(AnalysisUsage &AU) const override; 448 449 /// @brief Release the internal memory. 450 void releaseMemory() override { 451 isl_schedule_free(LastSchedule); 452 LastSchedule = nullptr; 453 } 454 455 private: 456 isl_schedule *LastSchedule; 457 }; 458 } 459 460 char IslScheduleOptimizer::ID = 0; 461 462 bool IslScheduleOptimizer::runOnScop(Scop &S) { 463 464 // Skip empty SCoPs but still allow code generation as it will delete the 465 // loops present but not needed. 466 if (S.getSize() == 0) { 467 S.markAsOptimized(); 468 return false; 469 } 470 471 const Dependences &D = 472 getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement); 473 474 if (!D.hasValidDependences()) 475 return false; 476 477 isl_schedule_free(LastSchedule); 478 LastSchedule = nullptr; 479 480 // Build input data. 481 int ValidityKinds = 482 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; 483 int ProximityKinds; 484 485 if (OptimizeDeps == "all") 486 ProximityKinds = 487 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; 488 else if (OptimizeDeps == "raw") 489 ProximityKinds = Dependences::TYPE_RAW; 490 else { 491 errs() << "Do not know how to optimize for '" << OptimizeDeps << "'" 492 << " Falling back to optimizing all dependences.\n"; 493 ProximityKinds = 494 Dependences::TYPE_RAW | Dependences::TYPE_WAR | Dependences::TYPE_WAW; 495 } 496 497 isl_union_set *Domain = S.getDomains(); 498 499 if (!Domain) 500 return false; 501 502 isl_union_map *Validity = D.getDependences(ValidityKinds); 503 isl_union_map *Proximity = D.getDependences(ProximityKinds); 504 505 // Simplify the dependences by removing the constraints introduced by the 506 // domains. This can speed up the scheduling time significantly, as large 507 // constant coefficients will be removed from the dependences. The 508 // introduction of some additional dependences reduces the possible 509 // transformations, but in most cases, such transformation do not seem to be 510 // interesting anyway. In some cases this option may stop the scheduler to 511 // find any schedule. 512 if (SimplifyDeps == "yes") { 513 Validity = isl_union_map_gist_domain(Validity, isl_union_set_copy(Domain)); 514 Validity = isl_union_map_gist_range(Validity, isl_union_set_copy(Domain)); 515 Proximity = 516 isl_union_map_gist_domain(Proximity, isl_union_set_copy(Domain)); 517 Proximity = isl_union_map_gist_range(Proximity, isl_union_set_copy(Domain)); 518 } else if (SimplifyDeps != "no") { 519 errs() << "warning: Option -polly-opt-simplify-deps should either be 'yes' " 520 "or 'no'. Falling back to default: 'yes'\n"; 521 } 522 523 DEBUG(dbgs() << "\n\nCompute schedule from: "); 524 DEBUG(dbgs() << "Domain := " << stringFromIslObj(Domain) << ";\n"); 525 DEBUG(dbgs() << "Proximity := " << stringFromIslObj(Proximity) << ";\n"); 526 DEBUG(dbgs() << "Validity := " << stringFromIslObj(Validity) << ";\n"); 527 528 unsigned IslSerializeSCCs; 529 530 if (FusionStrategy == "max") { 531 IslSerializeSCCs = 0; 532 } else if (FusionStrategy == "min") { 533 IslSerializeSCCs = 1; 534 } else { 535 errs() << "warning: Unknown fusion strategy. Falling back to maximal " 536 "fusion.\n"; 537 IslSerializeSCCs = 0; 538 } 539 540 int IslMaximizeBands; 541 542 if (MaximizeBandDepth == "yes") { 543 IslMaximizeBands = 1; 544 } else if (MaximizeBandDepth == "no") { 545 IslMaximizeBands = 0; 546 } else { 547 errs() << "warning: Option -polly-opt-maximize-bands should either be 'yes'" 548 " or 'no'. Falling back to default: 'yes'\n"; 549 IslMaximizeBands = 1; 550 } 551 552 int IslOuterCoincidence; 553 554 if (OuterCoincidence == "yes") { 555 IslOuterCoincidence = 1; 556 } else if (OuterCoincidence == "no") { 557 IslOuterCoincidence = 0; 558 } else { 559 errs() << "warning: Option -polly-opt-outer-coincidence should either be " 560 "'yes' or 'no'. Falling back to default: 'no'\n"; 561 IslOuterCoincidence = 0; 562 } 563 564 isl_options_set_schedule_outer_coincidence(S.getIslCtx(), 565 IslOuterCoincidence); 566 isl_options_set_schedule_serialize_sccs(S.getIslCtx(), IslSerializeSCCs); 567 isl_options_set_schedule_maximize_band_depth(S.getIslCtx(), IslMaximizeBands); 568 isl_options_set_schedule_max_constant_term(S.getIslCtx(), MaxConstantTerm); 569 isl_options_set_schedule_max_coefficient(S.getIslCtx(), MaxCoefficient); 570 isl_options_set_tile_scale_tile_loops(S.getIslCtx(), 0); 571 572 isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_CONTINUE); 573 574 isl_schedule_constraints *ScheduleConstraints; 575 ScheduleConstraints = isl_schedule_constraints_on_domain(Domain); 576 ScheduleConstraints = 577 isl_schedule_constraints_set_proximity(ScheduleConstraints, Proximity); 578 ScheduleConstraints = isl_schedule_constraints_set_validity( 579 ScheduleConstraints, isl_union_map_copy(Validity)); 580 ScheduleConstraints = 581 isl_schedule_constraints_set_coincidence(ScheduleConstraints, Validity); 582 isl_schedule *Schedule; 583 Schedule = isl_schedule_constraints_compute_schedule(ScheduleConstraints); 584 isl_options_set_on_error(S.getIslCtx(), ISL_ON_ERROR_ABORT); 585 586 // In cases the scheduler is not able to optimize the code, we just do not 587 // touch the schedule. 588 if (!Schedule) 589 return false; 590 591 DEBUG({ 592 auto *P = isl_printer_to_str(S.getIslCtx()); 593 P = isl_printer_set_yaml_style(P, ISL_YAML_STYLE_BLOCK); 594 P = isl_printer_print_schedule(P, Schedule); 595 dbgs() << "NewScheduleTree: \n" << isl_printer_get_str(P) << "\n"; 596 isl_printer_free(P); 597 }); 598 599 isl_schedule *NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule); 600 isl_union_map *NewScheduleMap = isl_schedule_get_map(NewSchedule); 601 602 if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewScheduleMap)) { 603 isl_union_map_free(NewScheduleMap); 604 isl_schedule_free(NewSchedule); 605 return false; 606 } 607 608 S.setScheduleTree(NewSchedule); 609 S.markAsOptimized(); 610 611 isl_union_map_free(NewScheduleMap); 612 return false; 613 } 614 615 void IslScheduleOptimizer::printScop(raw_ostream &OS, Scop &) const { 616 isl_printer *p; 617 char *ScheduleStr; 618 619 OS << "Calculated schedule:\n"; 620 621 if (!LastSchedule) { 622 OS << "n/a\n"; 623 return; 624 } 625 626 p = isl_printer_to_str(isl_schedule_get_ctx(LastSchedule)); 627 p = isl_printer_print_schedule(p, LastSchedule); 628 ScheduleStr = isl_printer_get_str(p); 629 isl_printer_free(p); 630 631 OS << ScheduleStr << "\n"; 632 } 633 634 void IslScheduleOptimizer::getAnalysisUsage(AnalysisUsage &AU) const { 635 ScopPass::getAnalysisUsage(AU); 636 AU.addRequired<DependenceInfo>(); 637 } 638 639 Pass *polly::createIslScheduleOptimizerPass() { 640 return new IslScheduleOptimizer(); 641 } 642 643 INITIALIZE_PASS_BEGIN(IslScheduleOptimizer, "polly-opt-isl", 644 "Polly - Optimize schedule of SCoP", false, false); 645 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 646 INITIALIZE_PASS_DEPENDENCY(ScopInfo); 647 INITIALIZE_PASS_END(IslScheduleOptimizer, "polly-opt-isl", 648 "Polly - Optimize schedule of SCoP", false, false) 649