1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Undo the effect of Loop Invariant Code Motion (LICM) and 10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level. 11 // 12 // Namely, remove register/scalar dependencies by mapping them back to array 13 // elements. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "polly/DeLICM.h" 18 #include "polly/LinkAllPasses.h" 19 #include "polly/Options.h" 20 #include "polly/ScopInfo.h" 21 #include "polly/ScopPass.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/ISLOStream.h" 24 #include "polly/Support/ISLTools.h" 25 #include "polly/ZoneAlgo.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/InitializePasses.h" 28 29 #define DEBUG_TYPE "polly-delicm" 30 31 using namespace polly; 32 using namespace llvm; 33 34 namespace { 35 36 cl::opt<int> 37 DelicmMaxOps("polly-delicm-max-ops", 38 cl::desc("Maximum number of isl operations to invest for " 39 "lifetime analysis; 0=no limit"), 40 cl::init(1000000), cl::cat(PollyCategory)); 41 42 cl::opt<bool> DelicmOverapproximateWrites( 43 "polly-delicm-overapproximate-writes", 44 cl::desc( 45 "Do more PHI writes than necessary in order to avoid partial accesses"), 46 cl::init(false), cl::Hidden, cl::cat(PollyCategory)); 47 48 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes", 49 cl::desc("Allow partial writes"), 50 cl::init(true), cl::Hidden, 51 cl::cat(PollyCategory)); 52 53 cl::opt<bool> 54 DelicmComputeKnown("polly-delicm-compute-known", 55 cl::desc("Compute known content of array elements"), 56 cl::init(true), cl::Hidden, cl::cat(PollyCategory)); 57 58 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); 59 STATISTIC(DeLICMOutOfQuota, 60 "Analyses aborted because max_operations was reached"); 61 STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); 62 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); 63 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); 64 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); 65 66 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM"); 67 STATISTIC(NumValueWritesInLoops, 68 "Number of scalar value writes nested in affine loops after DeLICM"); 69 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM"); 70 STATISTIC(NumPHIWritesInLoops, 71 "Number of scalar phi writes nested in affine loops after DeLICM"); 72 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM"); 73 STATISTIC(NumSingletonWritesInLoops, 74 "Number of singleton writes nested in affine loops after DeLICM"); 75 76 isl::union_map computeReachingOverwrite(isl::union_map Schedule, 77 isl::union_map Writes, 78 bool InclPrevWrite, 79 bool InclOverwrite) { 80 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite, 81 InclOverwrite); 82 } 83 84 /// Compute the next overwrite for a scalar. 85 /// 86 /// @param Schedule { DomainWrite[] -> Scatter[] } 87 /// Schedule of (at least) all writes. Instances not in @p 88 /// Writes are ignored. 89 /// @param Writes { DomainWrite[] } 90 /// The element instances that write to the scalar. 91 /// @param InclPrevWrite Whether to extend the timepoints to include 92 /// the timepoint where the previous write happens. 93 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint 94 /// of the overwrite itself. 95 /// 96 /// @return { Scatter[] -> DomainDef[] } 97 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule, 98 isl::union_set Writes, 99 bool InclPrevWrite, 100 bool InclOverwrite) { 101 102 // { DomainWrite[] } 103 auto WritesMap = isl::union_map::from_domain(Writes); 104 105 // { [Element[] -> Scatter[]] -> DomainWrite[] } 106 auto Result = computeReachingOverwrite( 107 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); 108 109 return Result.domain_factor_range(); 110 } 111 112 /// Overload of computeScalarReachingOverwrite, with only one writing statement. 113 /// Consequently, the result consists of only one map space. 114 /// 115 /// @param Schedule { DomainWrite[] -> Scatter[] } 116 /// @param Writes { DomainWrite[] } 117 /// @param InclPrevWrite Include the previous write to result. 118 /// @param InclOverwrite Include the overwrite to the result. 119 /// 120 /// @return { Scatter[] -> DomainWrite[] } 121 isl::map computeScalarReachingOverwrite(isl::union_map Schedule, 122 isl::set Writes, bool InclPrevWrite, 123 bool InclOverwrite) { 124 isl::space ScatterSpace = getScatterSpace(Schedule); 125 isl::space DomSpace = Writes.get_space(); 126 127 isl::union_map ReachOverwrite = computeScalarReachingOverwrite( 128 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite); 129 130 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace); 131 return singleton(std::move(ReachOverwrite), ResultSpace); 132 } 133 134 /// Try to find a 'natural' extension of a mapped to elements outside its 135 /// domain. 136 /// 137 /// @param Relevant The map with mapping that may not be modified. 138 /// @param Universe The domain to which @p Relevant needs to be extended. 139 /// 140 /// @return A map with that associates the domain elements of @p Relevant to the 141 /// same elements and in addition the elements of @p Universe to some 142 /// undefined elements. The function prefers to return simple maps. 143 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) { 144 Relevant = Relevant.coalesce(); 145 isl::union_set RelevantDomain = Relevant.domain(); 146 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain); 147 Simplified = Simplified.coalesce(); 148 return Simplified.intersect_domain(Universe); 149 } 150 151 /// Represent the knowledge of the contents of any array elements in any zone or 152 /// the knowledge we would add when mapping a scalar to an array element. 153 /// 154 /// Every array element at every zone unit has one of two states: 155 /// 156 /// - Unused: Not occupied by any value so a transformation can change it to 157 /// other values. 158 /// 159 /// - Occupied: The element contains a value that is still needed. 160 /// 161 /// The union of Unused and Unknown zones forms the universe, the set of all 162 /// elements at every timepoint. The universe can easily be derived from the 163 /// array elements that are accessed someway. Arrays that are never accessed 164 /// also never play a role in any computation and can hence be ignored. With a 165 /// given universe, only one of the sets needs to stored implicitly. Computing 166 /// the complement is also an expensive operation, hence this class has been 167 /// designed that only one of sets is needed while the other is assumed to be 168 /// implicit. It can still be given, but is mostly ignored. 169 /// 170 /// There are two use cases for the Knowledge class: 171 /// 172 /// 1) To represent the knowledge of the current state of ScopInfo. The unused 173 /// state means that an element is currently unused: there is no read of it 174 /// before the next overwrite. Also called 'Existing'. 175 /// 176 /// 2) To represent the requirements for mapping a scalar to array elements. The 177 /// unused state means that there is no change/requirement. Also called 178 /// 'Proposed'. 179 /// 180 /// In addition to these states at unit zones, Knowledge needs to know when 181 /// values are written. This is because written values may have no lifetime (one 182 /// reason is that the value is never read). Such writes would therefore never 183 /// conflict, but overwrite values that might still be required. Another source 184 /// of problems are multiple writes to the same element at the same timepoint, 185 /// because their order is undefined. 186 class Knowledge final { 187 private: 188 /// { [Element[] -> Zone[]] } 189 /// Set of array elements and when they are alive. 190 /// Can contain a nullptr; in this case the set is implicitly defined as the 191 /// complement of #Unused. 192 /// 193 /// The set of alive array elements is represented as zone, as the set of live 194 /// values can differ depending on how the elements are interpreted. 195 /// Assuming a value X is written at timestep [0] and read at timestep [1] 196 /// without being used at any later point, then the value is alive in the 197 /// interval ]0,1[. This interval cannot be represented by an integer set, as 198 /// it does not contain any integer point. Zones allow us to represent this 199 /// interval and can be converted to sets of timepoints when needed (e.g., in 200 /// isConflicting when comparing to the write sets). 201 /// @see convertZoneToTimepoints and this file's comment for more details. 202 isl::union_set Occupied; 203 204 /// { [Element[] -> Zone[]] } 205 /// Set of array elements when they are not alive, i.e. their memory can be 206 /// used for other purposed. Can contain a nullptr; in this case the set is 207 /// implicitly defined as the complement of #Occupied. 208 isl::union_set Unused; 209 210 /// { [Element[] -> Zone[]] -> ValInst[] } 211 /// Maps to the known content for each array element at any interval. 212 /// 213 /// Any element/interval can map to multiple known elements. This is due to 214 /// multiple llvm::Value referring to the same content. Examples are 215 /// 216 /// - A value stored and loaded again. The LoadInst represents the same value 217 /// as the StoreInst's value operand. 218 /// 219 /// - A PHINode is equal to any one of the incoming values. In case of 220 /// LCSSA-form, it is always equal to its single incoming value. 221 /// 222 /// Two Knowledges are considered not conflicting if at least one of the known 223 /// values match. Not known values are not stored as an unnamed tuple (as 224 /// #Written does), but maps to nothing. 225 /// 226 /// Known values are usually just defined for #Occupied elements. Knowing 227 /// #Unused contents has no advantage as it can be overwritten. 228 isl::union_map Known; 229 230 /// { [Element[] -> Scatter[]] -> ValInst[] } 231 /// The write actions currently in the scop or that would be added when 232 /// mapping a scalar. Maps to the value that is written. 233 /// 234 /// Written values that cannot be identified are represented by an unknown 235 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself. 236 isl::union_map Written; 237 238 /// Check whether this Knowledge object is well-formed. 239 void checkConsistency() const { 240 #ifndef NDEBUG 241 // Default-initialized object 242 if (Occupied.is_null() && Unused.is_null() && Known.is_null() && 243 Written.is_null()) 244 return; 245 246 assert(!Occupied.is_null() || !Unused.is_null()); 247 assert(!Known.is_null()); 248 assert(!Written.is_null()); 249 250 // If not all fields are defined, we cannot derived the universe. 251 if (Occupied.is_null() || Unused.is_null()) 252 return; 253 254 assert(Occupied.is_disjoint(Unused)); 255 auto Universe = Occupied.unite(Unused); 256 257 assert(!Known.domain().is_subset(Universe).is_false()); 258 assert(!Written.domain().is_subset(Universe).is_false()); 259 #endif 260 } 261 262 public: 263 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do 264 /// not use such an object. 265 Knowledge() {} 266 267 /// Create a new object with the given members. 268 Knowledge(isl::union_set Occupied, isl::union_set Unused, 269 isl::union_map Known, isl::union_map Written) 270 : Occupied(std::move(Occupied)), Unused(std::move(Unused)), 271 Known(std::move(Known)), Written(std::move(Written)) { 272 checkConsistency(); 273 } 274 275 /// Return whether this object was not default-constructed. 276 bool isUsable() const { 277 return (Occupied.is_null() || Unused.is_null()) && !Known.is_null() && 278 !Written.is_null(); 279 } 280 281 /// Print the content of this object to @p OS. 282 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { 283 if (isUsable()) { 284 if (!Occupied.is_null()) 285 OS.indent(Indent) << "Occupied: " << Occupied << "\n"; 286 else 287 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n"; 288 if (!Unused.is_null()) 289 OS.indent(Indent) << "Unused: " << Unused << "\n"; 290 else 291 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n"; 292 OS.indent(Indent) << "Known: " << Known << "\n"; 293 OS.indent(Indent) << "Written : " << Written << '\n'; 294 } else { 295 OS.indent(Indent) << "Invalid knowledge\n"; 296 } 297 } 298 299 /// Combine two knowledges, this and @p That. 300 void learnFrom(Knowledge That) { 301 assert(!isConflicting(*this, That)); 302 assert(!Unused.is_null() && !That.Occupied.is_null()); 303 assert( 304 That.Unused.is_null() && 305 "This function is only prepared to learn occupied elements from That"); 306 assert(Occupied.is_null() && "This function does not implement " 307 "`this->Occupied = " 308 "this->Occupied.unite(That.Occupied);`"); 309 310 Unused = Unused.subtract(That.Occupied); 311 Known = Known.unite(That.Known); 312 Written = Written.unite(That.Written); 313 314 checkConsistency(); 315 } 316 317 /// Determine whether two Knowledges conflict with each other. 318 /// 319 /// In theory @p Existing and @p Proposed are symmetric, but the 320 /// implementation is constrained by the implicit interpretation. That is, @p 321 /// Existing must have #Unused defined (use case 1) and @p Proposed must have 322 /// #Occupied defined (use case 1). 323 /// 324 /// A conflict is defined as non-preserved semantics when they are merged. For 325 /// instance, when for the same array and zone they assume different 326 /// llvm::Values. 327 /// 328 /// @param Existing One of the knowledges with #Unused defined. 329 /// @param Proposed One of the knowledges with #Occupied defined. 330 /// @param OS Dump the conflict reason to this output stream; use 331 /// nullptr to not output anything. 332 /// @param Indent Indention for the conflict reason. 333 /// 334 /// @return True, iff the two knowledges are conflicting. 335 static bool isConflicting(const Knowledge &Existing, 336 const Knowledge &Proposed, 337 llvm::raw_ostream *OS = nullptr, 338 unsigned Indent = 0) { 339 assert(!Existing.Unused.is_null()); 340 assert(!Proposed.Occupied.is_null()); 341 342 #ifndef NDEBUG 343 if (!Existing.Occupied.is_null() && !Proposed.Unused.is_null()) { 344 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused); 345 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused); 346 assert(ExistingUniverse.is_equal(ProposedUniverse) && 347 "Both inputs' Knowledges must be over the same universe"); 348 } 349 #endif 350 351 // Do the Existing and Proposed lifetimes conflict? 352 // 353 // Lifetimes are described as the cross-product of array elements and zone 354 // intervals in which they are alive (the space { [Element[] -> Zone[]] }). 355 // In the following we call this "element/lifetime interval". 356 // 357 // In order to not conflict, one of the following conditions must apply for 358 // each element/lifetime interval: 359 // 360 // 1. If occupied in one of the knowledges, it is unused in the other. 361 // 362 // - or - 363 // 364 // 2. Both contain the same value. 365 // 366 // Instead of partitioning the element/lifetime intervals into a part that 367 // both Knowledges occupy (which requires an expensive subtraction) and for 368 // these to check whether they are known to be the same value, we check only 369 // the second condition and ensure that it also applies when then first 370 // condition is true. This is done by adding a wildcard value to 371 // Proposed.Known and Existing.Unused such that they match as a common known 372 // value. We use the "unknown ValInst" for this purpose. Every 373 // Existing.Unused may match with an unknown Proposed.Occupied because these 374 // never are in conflict with each other. 375 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied); 376 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal); 377 378 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused); 379 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal); 380 381 auto MatchingVals = ExistingValues.intersect(ProposedValues); 382 auto Matches = MatchingVals.domain(); 383 384 // Any Proposed.Occupied must either have a match between the known values 385 // of Existing and Occupied, or be in Existing.Unused. In the latter case, 386 // the previously added "AnyVal" will match each other. 387 if (!Proposed.Occupied.is_subset(Matches)) { 388 if (OS) { 389 auto Conflicting = Proposed.Occupied.subtract(Matches); 390 auto ExistingConflictingKnown = 391 Existing.Known.intersect_domain(Conflicting); 392 auto ProposedConflictingKnown = 393 Proposed.Known.intersect_domain(Conflicting); 394 395 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n"; 396 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n"; 397 if (!ExistingConflictingKnown.is_empty()) 398 OS->indent(Indent) 399 << "Existing Known: " << ExistingConflictingKnown << "\n"; 400 if (!ProposedConflictingKnown.is_empty()) 401 OS->indent(Indent) 402 << "Proposed Known: " << ProposedConflictingKnown << "\n"; 403 } 404 return true; 405 } 406 407 // Do the writes in Existing conflict with occupied values in Proposed? 408 // 409 // In order to not conflict, it must either write to unused lifetime or 410 // write the same value. To check, we remove the writes that write into 411 // Proposed.Unused (they never conflict) and then see whether the written 412 // value is already in Proposed.Known. If there are multiple known values 413 // and a written value is known under different names, it is enough when one 414 // of the written values (assuming that they are the same value under 415 // different names, e.g. a PHINode and one of the incoming values) matches 416 // one of the known names. 417 // 418 // We convert here the set of lifetimes to actual timepoints. A lifetime is 419 // in conflict with a set of write timepoints, if either a live timepoint is 420 // clearly within the lifetime or if a write happens at the beginning of the 421 // lifetime (where it would conflict with the value that actually writes the 422 // value alive). There is no conflict at the end of a lifetime, as the alive 423 // value will always be read, before it is overwritten again. The last 424 // property holds in Polly for all scalar values and we expect all users of 425 // Knowledge to check this property also for accesses to MemoryKind::Array. 426 auto ProposedFixedDefs = 427 convertZoneToTimepoints(Proposed.Occupied, true, false); 428 auto ProposedFixedKnown = 429 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false); 430 431 auto ExistingConflictingWrites = 432 Existing.Written.intersect_domain(ProposedFixedDefs); 433 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain(); 434 435 auto CommonWrittenVal = 436 ProposedFixedKnown.intersect(ExistingConflictingWrites); 437 auto CommonWrittenValDomain = CommonWrittenVal.domain(); 438 439 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) { 440 if (OS) { 441 auto ExistingConflictingWritten = 442 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain); 443 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain( 444 ExistingConflictingWritten.domain()); 445 446 OS->indent(Indent) 447 << "Proposed a lifetime where there is an Existing write into it\n"; 448 OS->indent(Indent) << "Existing conflicting writes: " 449 << ExistingConflictingWritten << "\n"; 450 if (!ProposedConflictingKnown.is_empty()) 451 OS->indent(Indent) 452 << "Proposed conflicting known: " << ProposedConflictingKnown 453 << "\n"; 454 } 455 return true; 456 } 457 458 // Do the writes in Proposed conflict with occupied values in Existing? 459 auto ExistingAvailableDefs = 460 convertZoneToTimepoints(Existing.Unused, true, false); 461 auto ExistingKnownDefs = 462 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false); 463 464 auto ProposedWrittenDomain = Proposed.Written.domain(); 465 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written); 466 auto IdenticalOrUnused = 467 ExistingAvailableDefs.unite(KnownIdentical.domain()); 468 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) { 469 if (OS) { 470 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused); 471 auto ExistingConflictingKnown = 472 ExistingKnownDefs.intersect_domain(Conflicting); 473 auto ProposedConflictingWritten = 474 Proposed.Written.intersect_domain(Conflicting); 475 476 OS->indent(Indent) << "Proposed writes into range used by Existing\n"; 477 OS->indent(Indent) << "Proposed conflicting writes: " 478 << ProposedConflictingWritten << "\n"; 479 if (!ExistingConflictingKnown.is_empty()) 480 OS->indent(Indent) 481 << "Existing conflicting known: " << ExistingConflictingKnown 482 << "\n"; 483 } 484 return true; 485 } 486 487 // Does Proposed write at the same time as Existing already does (order of 488 // writes is undefined)? Writing the same value is permitted. 489 auto ExistingWrittenDomain = Existing.Written.domain(); 490 auto BothWritten = 491 Existing.Written.domain().intersect(Proposed.Written.domain()); 492 auto ExistingKnownWritten = filterKnownValInst(Existing.Written); 493 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written); 494 auto CommonWritten = 495 ExistingKnownWritten.intersect(ProposedKnownWritten).domain(); 496 497 if (!BothWritten.is_subset(CommonWritten)) { 498 if (OS) { 499 auto Conflicting = BothWritten.subtract(CommonWritten); 500 auto ExistingConflictingWritten = 501 Existing.Written.intersect_domain(Conflicting); 502 auto ProposedConflictingWritten = 503 Proposed.Written.intersect_domain(Conflicting); 504 505 OS->indent(Indent) << "Proposed writes at the same time as an already " 506 "Existing write\n"; 507 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n"; 508 if (!ExistingConflictingWritten.is_empty()) 509 OS->indent(Indent) 510 << "Exiting write: " << ExistingConflictingWritten << "\n"; 511 if (!ProposedConflictingWritten.is_empty()) 512 OS->indent(Indent) 513 << "Proposed write: " << ProposedConflictingWritten << "\n"; 514 } 515 return true; 516 } 517 518 return false; 519 } 520 }; 521 522 /// Implementation of the DeLICM/DePRE transformation. 523 class DeLICMImpl final : public ZoneAlgorithm { 524 private: 525 /// Knowledge before any transformation took place. 526 Knowledge OriginalZone; 527 528 /// Current knowledge of the SCoP including all already applied 529 /// transformations. 530 Knowledge Zone; 531 532 /// Number of StoreInsts something can be mapped to. 533 int NumberOfCompatibleTargets = 0; 534 535 /// The number of StoreInsts to which at least one value or PHI has been 536 /// mapped to. 537 int NumberOfTargetsMapped = 0; 538 539 /// The number of llvm::Value mapped to some array element. 540 int NumberOfMappedValueScalars = 0; 541 542 /// The number of PHIs mapped to some array element. 543 int NumberOfMappedPHIScalars = 0; 544 545 /// Determine whether two knowledges are conflicting with each other. 546 /// 547 /// @see Knowledge::isConflicting 548 bool isConflicting(const Knowledge &Proposed) { 549 raw_ostream *OS = nullptr; 550 LLVM_DEBUG(OS = &llvm::dbgs()); 551 return Knowledge::isConflicting(Zone, Proposed, OS, 4); 552 } 553 554 /// Determine whether @p SAI is a scalar that can be mapped to an array 555 /// element. 556 bool isMappable(const ScopArrayInfo *SAI) { 557 assert(SAI); 558 559 if (SAI->isValueKind()) { 560 auto *MA = S->getValueDef(SAI); 561 if (!MA) { 562 LLVM_DEBUG( 563 dbgs() 564 << " Reject because value is read-only within the scop\n"); 565 return false; 566 } 567 568 // Mapping if value is used after scop is not supported. The code 569 // generator would need to reload the scalar after the scop, but it 570 // does not have the information to where it is mapped to. Only the 571 // MemoryAccesses have that information, not the ScopArrayInfo. 572 auto Inst = MA->getAccessInstruction(); 573 for (auto User : Inst->users()) { 574 if (!isa<Instruction>(User)) 575 return false; 576 auto UserInst = cast<Instruction>(User); 577 578 if (!S->contains(UserInst)) { 579 LLVM_DEBUG(dbgs() << " Reject because value is escaping\n"); 580 return false; 581 } 582 } 583 584 return true; 585 } 586 587 if (SAI->isPHIKind()) { 588 auto *MA = S->getPHIRead(SAI); 589 assert(MA); 590 591 // Mapping of an incoming block from before the SCoP is not supported by 592 // the code generator. 593 auto PHI = cast<PHINode>(MA->getAccessInstruction()); 594 for (auto Incoming : PHI->blocks()) { 595 if (!S->contains(Incoming)) { 596 LLVM_DEBUG(dbgs() 597 << " Reject because at least one incoming block is " 598 "not in the scop region\n"); 599 return false; 600 } 601 } 602 603 return true; 604 } 605 606 LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); 607 return false; 608 } 609 610 /// Compute the uses of a MemoryKind::Value and its lifetime (from its 611 /// definition to the last use). 612 /// 613 /// @param SAI The ScopArrayInfo representing the value's storage. 614 /// 615 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } 616 /// First element is the set of uses for each definition. 617 /// The second is the lifetime of each definition. 618 std::tuple<isl::union_map, isl::map> 619 computeValueUses(const ScopArrayInfo *SAI) { 620 assert(SAI->isValueKind()); 621 622 // { DomainRead[] } 623 auto Reads = makeEmptyUnionSet(); 624 625 // Find all uses. 626 for (auto *MA : S->getValueUses(SAI)) 627 Reads = Reads.unite(getDomainFor(MA)); 628 629 // { DomainRead[] -> Scatter[] } 630 auto ReadSchedule = getScatterFor(Reads); 631 632 auto *DefMA = S->getValueDef(SAI); 633 assert(DefMA); 634 635 // { DomainDef[] } 636 auto Writes = getDomainFor(DefMA); 637 638 // { DomainDef[] -> Scatter[] } 639 auto WriteScatter = getScatterFor(Writes); 640 641 // { Scatter[] -> DomainDef[] } 642 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); 643 644 // { [DomainDef[] -> Scatter[]] -> DomainUse[] } 645 auto Uses = isl::union_map(ReachDef.reverse().range_map()) 646 .apply_range(ReadSchedule.reverse()); 647 648 // { DomainDef[] -> Scatter[] } 649 auto UseScatter = 650 singleton(Uses.domain().unwrap(), 651 Writes.get_space().map_from_domain_and_range(ScatterSpace)); 652 653 // { DomainDef[] -> Zone[] } 654 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); 655 656 // { DomainDef[] -> DomainRead[] } 657 auto DefUses = Uses.domain_factor_domain(); 658 659 return std::make_pair(DefUses, Lifetime); 660 } 661 662 /// Try to map a MemoryKind::Value to a given array element. 663 /// 664 /// @param SAI Representation of the scalar's memory to map. 665 /// @param TargetElt { Scatter[] -> Element[] } 666 /// Suggestion where to map a scalar to when at a timepoint. 667 /// 668 /// @return true if the scalar was successfully mapped. 669 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) { 670 assert(SAI->isValueKind()); 671 672 auto *DefMA = S->getValueDef(SAI); 673 assert(DefMA->isValueKind()); 674 assert(DefMA->isMustWrite()); 675 auto *V = DefMA->getAccessValue(); 676 auto *DefInst = DefMA->getAccessInstruction(); 677 678 // Stop if the scalar has already been mapped. 679 if (!DefMA->getLatestScopArrayInfo()->isValueKind()) 680 return false; 681 682 // { DomainDef[] -> Scatter[] } 683 auto DefSched = getScatterFor(DefMA); 684 685 // Where each write is mapped to, according to the suggestion. 686 // { DomainDef[] -> Element[] } 687 auto DefTarget = TargetElt.apply_domain(DefSched.reverse()); 688 simplify(DefTarget); 689 LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); 690 691 auto OrigDomain = getDomainFor(DefMA); 692 auto MappedDomain = DefTarget.domain(); 693 if (!OrigDomain.is_subset(MappedDomain)) { 694 LLVM_DEBUG( 695 dbgs() 696 << " Reject because mapping does not encompass all instances\n"); 697 return false; 698 } 699 700 // { DomainDef[] -> Zone[] } 701 isl::map Lifetime; 702 703 // { DomainDef[] -> DomainUse[] } 704 isl::union_map DefUses; 705 706 std::tie(DefUses, Lifetime) = computeValueUses(SAI); 707 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); 708 709 /// { [Element[] -> Zone[]] } 710 auto EltZone = Lifetime.apply_domain(DefTarget).wrap(); 711 simplify(EltZone); 712 713 // When known knowledge is disabled, just return the unknown value. It will 714 // either get filtered out or conflict with itself. 715 // { DomainDef[] -> ValInst[] } 716 isl::map ValInst; 717 if (DelicmComputeKnown) 718 ValInst = makeValInst(V, DefMA->getStatement(), 719 LI->getLoopFor(DefInst->getParent())); 720 else 721 ValInst = makeUnknownForDomain(DefMA->getStatement()); 722 723 // { DomainDef[] -> [Element[] -> Zone[]] } 724 auto EltKnownTranslator = DefTarget.range_product(Lifetime); 725 726 // { [Element[] -> Zone[]] -> ValInst[] } 727 auto EltKnown = ValInst.apply_domain(EltKnownTranslator); 728 simplify(EltKnown); 729 730 // { DomainDef[] -> [Element[] -> Scatter[]] } 731 auto WrittenTranslator = DefTarget.range_product(DefSched); 732 733 // { [Element[] -> Scatter[]] -> ValInst[] } 734 auto DefEltSched = ValInst.apply_domain(WrittenTranslator); 735 simplify(DefEltSched); 736 737 Knowledge Proposed(EltZone, {}, filterKnownValInst(EltKnown), DefEltSched); 738 if (isConflicting(Proposed)) 739 return false; 740 741 // { DomainUse[] -> Element[] } 742 auto UseTarget = DefUses.reverse().apply_range(DefTarget); 743 744 mapValue(SAI, std::move(DefTarget), std::move(UseTarget), 745 std::move(Lifetime), std::move(Proposed)); 746 return true; 747 } 748 749 /// After a scalar has been mapped, update the global knowledge. 750 void applyLifetime(Knowledge Proposed) { 751 Zone.learnFrom(std::move(Proposed)); 752 } 753 754 /// Map a MemoryKind::Value scalar to an array element. 755 /// 756 /// Callers must have ensured that the mapping is valid and not conflicting. 757 /// 758 /// @param SAI The ScopArrayInfo representing the scalar's memory to 759 /// map. 760 /// @param DefTarget { DomainDef[] -> Element[] } 761 /// The array element to map the scalar to. 762 /// @param UseTarget { DomainUse[] -> Element[] } 763 /// The array elements the uses are mapped to. 764 /// @param Lifetime { DomainDef[] -> Zone[] } 765 /// The lifetime of each llvm::Value definition for 766 /// reporting. 767 /// @param Proposed Mapping constraints for reporting. 768 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget, 769 isl::union_map UseTarget, isl::map Lifetime, 770 Knowledge Proposed) { 771 // Redirect the read accesses. 772 for (auto *MA : S->getValueUses(SAI)) { 773 // { DomainUse[] } 774 auto Domain = getDomainFor(MA); 775 776 // { DomainUse[] -> Element[] } 777 auto NewAccRel = UseTarget.intersect_domain(Domain); 778 simplify(NewAccRel); 779 780 assert(isl_union_map_n_map(NewAccRel.get()) == 1); 781 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel)); 782 } 783 784 auto *WA = S->getValueDef(SAI); 785 WA->setNewAccessRelation(DefTarget); 786 applyLifetime(Proposed); 787 788 MappedValueScalars++; 789 NumberOfMappedValueScalars += 1; 790 } 791 792 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope, 793 bool IsCertain = true) { 794 // When known knowledge is disabled, just return the unknown value. It will 795 // either get filtered out or conflict with itself. 796 if (!DelicmComputeKnown) 797 return makeUnknownForDomain(UserStmt); 798 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain); 799 } 800 801 /// Express the incoming values of a PHI for each incoming statement in an 802 /// isl::union_map. 803 /// 804 /// @param SAI The PHI scalar represented by a ScopArrayInfo. 805 /// 806 /// @return { PHIWriteDomain[] -> ValInst[] } 807 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) { 808 auto Result = makeEmptyUnionMap(); 809 810 // Collect the incoming values. 811 for (auto *MA : S->getPHIIncomings(SAI)) { 812 // { DomainWrite[] -> ValInst[] } 813 isl::union_map ValInst; 814 auto *WriteStmt = MA->getStatement(); 815 816 auto Incoming = MA->getIncoming(); 817 assert(!Incoming.empty()); 818 if (Incoming.size() == 1) { 819 ValInst = makeValInst(Incoming[0].second, WriteStmt, 820 LI->getLoopFor(Incoming[0].first)); 821 } else { 822 // If the PHI is in a subregion's exit node it can have multiple 823 // incoming values (+ maybe another incoming edge from an unrelated 824 // block). We cannot directly represent it as a single llvm::Value. 825 // We currently model it as unknown value, but modeling as the PHIInst 826 // itself could be OK, too. 827 ValInst = makeUnknownForDomain(WriteStmt); 828 } 829 830 Result = Result.unite(ValInst); 831 } 832 833 assert(Result.is_single_valued() && 834 "Cannot have multiple incoming values for same incoming statement"); 835 return Result; 836 } 837 838 /// Try to map a MemoryKind::PHI scalar to a given array element. 839 /// 840 /// @param SAI Representation of the scalar's memory to map. 841 /// @param TargetElt { Scatter[] -> Element[] } 842 /// Suggestion where to map the scalar to when at a 843 /// timepoint. 844 /// 845 /// @return true if the PHI scalar has been mapped. 846 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) { 847 auto *PHIRead = S->getPHIRead(SAI); 848 assert(PHIRead->isPHIKind()); 849 assert(PHIRead->isRead()); 850 851 // Skip if already been mapped. 852 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) 853 return false; 854 855 // { DomainRead[] -> Scatter[] } 856 auto PHISched = getScatterFor(PHIRead); 857 858 // { DomainRead[] -> Element[] } 859 auto PHITarget = PHISched.apply_range(TargetElt); 860 simplify(PHITarget); 861 LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); 862 863 auto OrigDomain = getDomainFor(PHIRead); 864 auto MappedDomain = PHITarget.domain(); 865 if (!OrigDomain.is_subset(MappedDomain)) { 866 LLVM_DEBUG( 867 dbgs() 868 << " Reject because mapping does not encompass all instances\n"); 869 return false; 870 } 871 872 // { DomainRead[] -> DomainWrite[] } 873 auto PerPHIWrites = computePerPHI(SAI); 874 if (PerPHIWrites.is_null()) { 875 LLVM_DEBUG( 876 dbgs() << " Reject because cannot determine incoming values\n"); 877 return false; 878 } 879 880 // { DomainWrite[] -> Element[] } 881 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse(); 882 simplify(WritesTarget); 883 884 // { DomainWrite[] } 885 auto UniverseWritesDom = isl::union_set::empty(ParamSpace.ctx()); 886 887 for (auto *MA : S->getPHIIncomings(SAI)) 888 UniverseWritesDom = UniverseWritesDom.unite(getDomainFor(MA)); 889 890 auto RelevantWritesTarget = WritesTarget; 891 if (DelicmOverapproximateWrites) 892 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom); 893 894 auto ExpandedWritesDom = WritesTarget.domain(); 895 if (!DelicmPartialWrites && 896 !UniverseWritesDom.is_subset(ExpandedWritesDom)) { 897 LLVM_DEBUG( 898 dbgs() << " Reject because did not find PHI write mapping for " 899 "all instances\n"); 900 if (DelicmOverapproximateWrites) 901 LLVM_DEBUG(dbgs() << " Relevant Mapping: " 902 << RelevantWritesTarget << '\n'); 903 LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget 904 << '\n'); 905 LLVM_DEBUG(dbgs() << " Missing instances: " 906 << UniverseWritesDom.subtract(ExpandedWritesDom) 907 << '\n'); 908 return false; 909 } 910 911 // { DomainRead[] -> Scatter[] } 912 isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule); 913 isl::map PerPHIWriteScatter = 914 singleton(PerPHIWriteScatterUmap, PHISched.get_space()); 915 916 // { DomainRead[] -> Zone[] } 917 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); 918 simplify(Lifetime); 919 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); 920 921 // { DomainWrite[] -> Zone[] } 922 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites); 923 924 // { DomainWrite[] -> ValInst[] } 925 auto WrittenValue = determinePHIWrittenValues(SAI); 926 927 // { DomainWrite[] -> [Element[] -> Scatter[]] } 928 auto WrittenTranslator = WritesTarget.range_product(Schedule); 929 930 // { [Element[] -> Scatter[]] -> ValInst[] } 931 auto Written = WrittenValue.apply_domain(WrittenTranslator); 932 simplify(Written); 933 934 // { DomainWrite[] -> [Element[] -> Zone[]] } 935 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime); 936 937 // { DomainWrite[] -> ValInst[] } 938 auto WrittenKnownValue = filterKnownValInst(WrittenValue); 939 940 // { [Element[] -> Zone[]] -> ValInst[] } 941 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator); 942 simplify(EltLifetimeInst); 943 944 // { [Element[] -> Zone[] } 945 auto Occupied = LifetimeTranslator.range(); 946 simplify(Occupied); 947 948 Knowledge Proposed(Occupied, {}, EltLifetimeInst, Written); 949 if (isConflicting(Proposed)) 950 return false; 951 952 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget), 953 std::move(Lifetime), std::move(Proposed)); 954 return true; 955 } 956 957 /// Map a MemoryKind::PHI scalar to an array element. 958 /// 959 /// Callers must have ensured that the mapping is valid and not conflicting 960 /// with the common knowledge. 961 /// 962 /// @param SAI The ScopArrayInfo representing the scalar's memory to 963 /// map. 964 /// @param ReadTarget { DomainRead[] -> Element[] } 965 /// The array element to map the scalar to. 966 /// @param WriteTarget { DomainWrite[] -> Element[] } 967 /// New access target for each PHI incoming write. 968 /// @param Lifetime { DomainRead[] -> Zone[] } 969 /// The lifetime of each PHI for reporting. 970 /// @param Proposed Mapping constraints for reporting. 971 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget, 972 isl::union_map WriteTarget, isl::map Lifetime, 973 Knowledge Proposed) { 974 // { Element[] } 975 isl::space ElementSpace = ReadTarget.get_space().range(); 976 977 // Redirect the PHI incoming writes. 978 for (auto *MA : S->getPHIIncomings(SAI)) { 979 // { DomainWrite[] } 980 auto Domain = getDomainFor(MA); 981 982 // { DomainWrite[] -> Element[] } 983 auto NewAccRel = WriteTarget.intersect_domain(Domain); 984 simplify(NewAccRel); 985 986 isl::space NewAccRelSpace = 987 Domain.get_space().map_from_domain_and_range(ElementSpace); 988 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace); 989 MA->setNewAccessRelation(NewAccRelMap); 990 } 991 992 // Redirect the PHI read. 993 auto *PHIRead = S->getPHIRead(SAI); 994 PHIRead->setNewAccessRelation(ReadTarget); 995 applyLifetime(Proposed); 996 997 MappedPHIScalars++; 998 NumberOfMappedPHIScalars++; 999 } 1000 1001 /// Search and map scalars to memory overwritten by @p TargetStoreMA. 1002 /// 1003 /// Start trying to map scalars that are used in the same statement as the 1004 /// store. For every successful mapping, try to also map scalars of the 1005 /// statements where those are written. Repeat, until no more mapping 1006 /// opportunity is found. 1007 /// 1008 /// There is currently no preference in which order scalars are tried. 1009 /// Ideally, we would direct it towards a load instruction of the same array 1010 /// element. 1011 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { 1012 assert(TargetStoreMA->isLatestArrayKind()); 1013 assert(TargetStoreMA->isMustWrite()); 1014 1015 auto TargetStmt = TargetStoreMA->getStatement(); 1016 1017 // { DomTarget[] } 1018 auto TargetDom = getDomainFor(TargetStmt); 1019 1020 // { DomTarget[] -> Element[] } 1021 auto TargetAccRel = getAccessRelationFor(TargetStoreMA); 1022 1023 // { Zone[] -> DomTarget[] } 1024 // For each point in time, find the next target store instance. 1025 auto Target = 1026 computeScalarReachingOverwrite(Schedule, TargetDom, false, true); 1027 1028 // { Zone[] -> Element[] } 1029 // Use the target store's write location as a suggestion to map scalars to. 1030 auto EltTarget = Target.apply_range(TargetAccRel); 1031 simplify(EltTarget); 1032 LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); 1033 1034 // Stack of elements not yet processed. 1035 SmallVector<MemoryAccess *, 16> Worklist; 1036 1037 // Set of scalars already tested. 1038 SmallPtrSet<const ScopArrayInfo *, 16> Closed; 1039 1040 // Lambda to add all scalar reads to the work list. 1041 auto ProcessAllIncoming = [&](ScopStmt *Stmt) { 1042 for (auto *MA : *Stmt) { 1043 if (!MA->isLatestScalarKind()) 1044 continue; 1045 if (!MA->isRead()) 1046 continue; 1047 1048 Worklist.push_back(MA); 1049 } 1050 }; 1051 1052 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0); 1053 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal)) 1054 Worklist.push_back(WrittenValInputMA); 1055 else 1056 ProcessAllIncoming(TargetStmt); 1057 1058 auto AnyMapped = false; 1059 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout(); 1060 auto StoreSize = 1061 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); 1062 1063 while (!Worklist.empty()) { 1064 auto *MA = Worklist.pop_back_val(); 1065 1066 auto *SAI = MA->getScopArrayInfo(); 1067 if (Closed.count(SAI)) 1068 continue; 1069 Closed.insert(SAI); 1070 LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI 1071 << ")\n"); 1072 1073 // Skip non-mappable scalars. 1074 if (!isMappable(SAI)) 1075 continue; 1076 1077 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); 1078 if (MASize > StoreSize) { 1079 LLVM_DEBUG( 1080 dbgs() << " Reject because storage size is insufficient\n"); 1081 continue; 1082 } 1083 1084 // Try to map MemoryKind::Value scalars. 1085 if (SAI->isValueKind()) { 1086 if (!tryMapValue(SAI, EltTarget)) 1087 continue; 1088 1089 auto *DefAcc = S->getValueDef(SAI); 1090 ProcessAllIncoming(DefAcc->getStatement()); 1091 1092 AnyMapped = true; 1093 continue; 1094 } 1095 1096 // Try to map MemoryKind::PHI scalars. 1097 if (SAI->isPHIKind()) { 1098 if (!tryMapPHI(SAI, EltTarget)) 1099 continue; 1100 // Add inputs of all incoming statements to the worklist. Prefer the 1101 // input accesses of the incoming blocks. 1102 for (auto *PHIWrite : S->getPHIIncomings(SAI)) { 1103 auto *PHIWriteStmt = PHIWrite->getStatement(); 1104 bool FoundAny = false; 1105 for (auto Incoming : PHIWrite->getIncoming()) { 1106 auto *IncomingInputMA = 1107 PHIWriteStmt->lookupInputAccessOf(Incoming.second); 1108 if (!IncomingInputMA) 1109 continue; 1110 1111 Worklist.push_back(IncomingInputMA); 1112 FoundAny = true; 1113 } 1114 1115 if (!FoundAny) 1116 ProcessAllIncoming(PHIWrite->getStatement()); 1117 } 1118 1119 AnyMapped = true; 1120 continue; 1121 } 1122 } 1123 1124 if (AnyMapped) { 1125 TargetsMapped++; 1126 NumberOfTargetsMapped++; 1127 } 1128 return AnyMapped; 1129 } 1130 1131 /// Compute when an array element is unused. 1132 /// 1133 /// @return { [Element[] -> Zone[]] } 1134 isl::union_set computeLifetime() const { 1135 // { Element[] -> Zone[] } 1136 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, 1137 false, false, true); 1138 1139 auto Result = ArrayUnused.wrap(); 1140 1141 simplify(Result); 1142 return Result; 1143 } 1144 1145 /// Determine when an array element is written to, and which value instance is 1146 /// written. 1147 /// 1148 /// @return { [Element[] -> Scatter[]] -> ValInst[] } 1149 isl::union_map computeWritten() const { 1150 // { [Element[] -> Scatter[]] -> ValInst[] } 1151 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule); 1152 1153 simplify(EltWritten); 1154 return EltWritten; 1155 } 1156 1157 /// Determine whether an access touches at most one element. 1158 /// 1159 /// The accessed element could be a scalar or accessing an array with constant 1160 /// subscript, such that all instances access only that element. 1161 /// 1162 /// @param MA The access to test. 1163 /// 1164 /// @return True, if zero or one elements are accessed; False if at least two 1165 /// different elements are accessed. 1166 bool isScalarAccess(MemoryAccess *MA) { 1167 auto Map = getAccessRelationFor(MA); 1168 auto Set = Map.range(); 1169 return Set.is_singleton(); 1170 } 1171 1172 /// Print mapping statistics to @p OS. 1173 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { 1174 OS.indent(Indent) << "Statistics {\n"; 1175 OS.indent(Indent + 4) << "Compatible overwrites: " 1176 << NumberOfCompatibleTargets << "\n"; 1177 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped 1178 << '\n'; 1179 OS.indent(Indent + 4) << "Value scalars mapped: " 1180 << NumberOfMappedValueScalars << '\n'; 1181 OS.indent(Indent + 4) << "PHI scalars mapped: " 1182 << NumberOfMappedPHIScalars << '\n'; 1183 OS.indent(Indent) << "}\n"; 1184 } 1185 1186 public: 1187 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {} 1188 1189 /// Calculate the lifetime (definition to last use) of every array element. 1190 /// 1191 /// @return True if the computed lifetimes (#Zone) is usable. 1192 bool computeZone() { 1193 // Check that nothing strange occurs. 1194 collectCompatibleElts(); 1195 1196 isl::union_set EltUnused; 1197 isl::union_map EltKnown, EltWritten; 1198 1199 { 1200 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); 1201 1202 computeCommon(); 1203 1204 EltUnused = computeLifetime(); 1205 EltKnown = computeKnown(true, false); 1206 EltWritten = computeWritten(); 1207 } 1208 DeLICMAnalyzed++; 1209 1210 if (EltUnused.is_null() || EltKnown.is_null() || EltWritten.is_null()) { 1211 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota && 1212 "The only reason that these things have not been computed should " 1213 "be if the max-operations limit hit"); 1214 DeLICMOutOfQuota++; 1215 LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); 1216 DebugLoc Begin, End; 1217 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End); 1218 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin, 1219 S->getEntry()); 1220 R << "maximal number of operations exceeded during zone analysis"; 1221 S->getFunction().getContext().diagnose(R); 1222 return false; 1223 } 1224 1225 Zone = OriginalZone = Knowledge({}, EltUnused, EltKnown, EltWritten); 1226 LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); 1227 1228 assert(Zone.isUsable() && OriginalZone.isUsable()); 1229 return true; 1230 } 1231 1232 /// Try to map as many scalars to unused array elements as possible. 1233 /// 1234 /// Multiple scalars might be mappable to intersecting unused array element 1235 /// zones, but we can only chose one. This is a greedy algorithm, therefore 1236 /// the first processed element claims it. 1237 void greedyCollapse() { 1238 bool Modified = false; 1239 1240 for (auto &Stmt : *S) { 1241 for (auto *MA : Stmt) { 1242 if (!MA->isLatestArrayKind()) 1243 continue; 1244 if (!MA->isWrite()) 1245 continue; 1246 1247 if (MA->isMayWrite()) { 1248 LLVM_DEBUG(dbgs() << "Access " << MA 1249 << " pruned because it is a MAY_WRITE\n"); 1250 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite", 1251 MA->getAccessInstruction()); 1252 R << "Skipped possible mapping target because it is not an " 1253 "unconditional overwrite"; 1254 S->getFunction().getContext().diagnose(R); 1255 continue; 1256 } 1257 1258 if (Stmt.getNumIterators() == 0) { 1259 LLVM_DEBUG(dbgs() << "Access " << MA 1260 << " pruned because it is not in a loop\n"); 1261 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop", 1262 MA->getAccessInstruction()); 1263 R << "skipped possible mapping target because it is not in a loop"; 1264 S->getFunction().getContext().diagnose(R); 1265 continue; 1266 } 1267 1268 if (isScalarAccess(MA)) { 1269 LLVM_DEBUG(dbgs() 1270 << "Access " << MA 1271 << " pruned because it writes only a single element\n"); 1272 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite", 1273 MA->getAccessInstruction()); 1274 R << "skipped possible mapping target because the memory location " 1275 "written to does not depend on its outer loop"; 1276 S->getFunction().getContext().diagnose(R); 1277 continue; 1278 } 1279 1280 if (!isa<StoreInst>(MA->getAccessInstruction())) { 1281 LLVM_DEBUG(dbgs() << "Access " << MA 1282 << " pruned because it is not a StoreInst\n"); 1283 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore", 1284 MA->getAccessInstruction()); 1285 R << "skipped possible mapping target because non-store instructions " 1286 "are not supported"; 1287 S->getFunction().getContext().diagnose(R); 1288 continue; 1289 } 1290 1291 // Check for more than one element acces per statement instance. 1292 // Currently we expect write accesses to be functional, eg. disallow 1293 // 1294 // { Stmt[0] -> [i] : 0 <= i < 2 } 1295 // 1296 // This may occur when some accesses to the element write/read only 1297 // parts of the element, eg. a single byte. Polly then divides each 1298 // element into subelements of the smallest access length, normal access 1299 // then touch multiple of such subelements. It is very common when the 1300 // array is accesses with memset, memcpy or memmove which take i8* 1301 // arguments. 1302 isl::union_map AccRel = MA->getLatestAccessRelation(); 1303 if (!AccRel.is_single_valued().is_true()) { 1304 LLVM_DEBUG(dbgs() << "Access " << MA 1305 << " is incompatible because it writes multiple " 1306 "elements per instance\n"); 1307 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel", 1308 MA->getAccessInstruction()); 1309 R << "skipped possible mapping target because it writes more than " 1310 "one element"; 1311 S->getFunction().getContext().diagnose(R); 1312 continue; 1313 } 1314 1315 isl::union_set TouchedElts = AccRel.range(); 1316 if (!TouchedElts.is_subset(CompatibleElts)) { 1317 LLVM_DEBUG( 1318 dbgs() 1319 << "Access " << MA 1320 << " is incompatible because it touches incompatible elements\n"); 1321 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts", 1322 MA->getAccessInstruction()); 1323 R << "skipped possible mapping target because a target location " 1324 "cannot be reliably analyzed"; 1325 S->getFunction().getContext().diagnose(R); 1326 continue; 1327 } 1328 1329 assert(isCompatibleAccess(MA)); 1330 NumberOfCompatibleTargets++; 1331 LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); 1332 if (collapseScalarsToStore(MA)) 1333 Modified = true; 1334 } 1335 } 1336 1337 if (Modified) 1338 DeLICMScopsModified++; 1339 } 1340 1341 /// Dump the internal information about a performed DeLICM to @p OS. 1342 void print(llvm::raw_ostream &OS, int Indent = 0) { 1343 if (!Zone.isUsable()) { 1344 OS.indent(Indent) << "Zone not computed\n"; 1345 return; 1346 } 1347 1348 printStatistics(OS, Indent); 1349 if (!isModified()) { 1350 OS.indent(Indent) << "No modification has been made\n"; 1351 return; 1352 } 1353 printAccesses(OS, Indent); 1354 } 1355 1356 /// Return whether at least one transformation been applied. 1357 bool isModified() const { return NumberOfTargetsMapped > 0; } 1358 }; 1359 1360 static std::unique_ptr<DeLICMImpl> collapseToUnused(Scop &S, LoopInfo &LI) { 1361 std::unique_ptr<DeLICMImpl> Impl = std::make_unique<DeLICMImpl>(&S, &LI); 1362 1363 if (!Impl->computeZone()) { 1364 LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); 1365 return Impl; 1366 } 1367 1368 LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); 1369 Impl->greedyCollapse(); 1370 1371 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1372 LLVM_DEBUG(dbgs() << S); 1373 1374 return Impl; 1375 } 1376 1377 static std::unique_ptr<DeLICMImpl> runDeLICM(Scop &S, LoopInfo &LI) { 1378 std::unique_ptr<DeLICMImpl> Impl = collapseToUnused(S, LI); 1379 1380 Scop::ScopStatistics ScopStats = S.getStatistics(); 1381 NumValueWrites += ScopStats.NumValueWrites; 1382 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1383 NumPHIWrites += ScopStats.NumPHIWrites; 1384 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1385 NumSingletonWrites += ScopStats.NumSingletonWrites; 1386 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1387 1388 return Impl; 1389 } 1390 1391 static PreservedAnalyses runDeLICMUsingNPM(Scop &S, ScopAnalysisManager &SAM, 1392 ScopStandardAnalysisResults &SAR, 1393 SPMUpdater &U, raw_ostream *OS) { 1394 LoopInfo &LI = SAR.LI; 1395 std::unique_ptr<DeLICMImpl> Impl = runDeLICM(S, LI); 1396 1397 if (OS) { 1398 *OS << "Printing analysis 'Polly - DeLICM/DePRE' for region: '" 1399 << S.getName() << "' in function '" << S.getFunction().getName() 1400 << "':\n"; 1401 if (Impl) { 1402 assert(Impl->getScop() == &S); 1403 1404 *OS << "DeLICM result:\n"; 1405 Impl->print(*OS); 1406 } 1407 } 1408 1409 if (!Impl->isModified()) 1410 return PreservedAnalyses::all(); 1411 1412 PreservedAnalyses PA; 1413 PA.preserveSet<AllAnalysesOn<Module>>(); 1414 PA.preserveSet<AllAnalysesOn<Function>>(); 1415 PA.preserveSet<AllAnalysesOn<Loop>>(); 1416 return PA; 1417 } 1418 1419 class DeLICMWrapperPass final : public ScopPass { 1420 private: 1421 DeLICMWrapperPass(const DeLICMWrapperPass &) = delete; 1422 const DeLICMWrapperPass &operator=(const DeLICMWrapperPass &) = delete; 1423 1424 /// The pass implementation, also holding per-scop data. 1425 std::unique_ptr<DeLICMImpl> Impl; 1426 1427 public: 1428 static char ID; 1429 explicit DeLICMWrapperPass() : ScopPass(ID) {} 1430 1431 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 1432 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1433 AU.addRequired<LoopInfoWrapperPass>(); 1434 AU.setPreservesAll(); 1435 } 1436 1437 virtual bool runOnScop(Scop &S) override { 1438 // Free resources for previous scop's computation, if not yet done. 1439 releaseMemory(); 1440 1441 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1442 Impl = runDeLICM(S, LI); 1443 1444 return Impl->isModified(); 1445 } 1446 1447 virtual void printScop(raw_ostream &OS, Scop &S) const override { 1448 if (!Impl) 1449 return; 1450 assert(Impl->getScop() == &S); 1451 1452 OS << "DeLICM result:\n"; 1453 Impl->print(OS); 1454 } 1455 1456 virtual void releaseMemory() override { Impl.reset(); } 1457 }; 1458 1459 char DeLICMWrapperPass::ID; 1460 1461 /// Print result from DeLICMWrapperPass. 1462 class DeLICMPrinterLegacyPass final : public ScopPass { 1463 public: 1464 static char ID; 1465 1466 DeLICMPrinterLegacyPass() : DeLICMPrinterLegacyPass(outs()){}; 1467 explicit DeLICMPrinterLegacyPass(llvm::raw_ostream &OS) 1468 : ScopPass(ID), OS(OS) {} 1469 1470 bool runOnScop(Scop &S) override { 1471 DeLICMWrapperPass &P = getAnalysis<DeLICMWrapperPass>(); 1472 1473 OS << "Printing analysis '" << P.getPassName() << "' for region: '" 1474 << S.getRegion().getNameStr() << "' in function '" 1475 << S.getFunction().getName() << "':\n"; 1476 P.printScop(OS, S); 1477 1478 return false; 1479 } 1480 1481 void getAnalysisUsage(AnalysisUsage &AU) const override { 1482 ScopPass::getAnalysisUsage(AU); 1483 AU.addRequired<DeLICMWrapperPass>(); 1484 AU.setPreservesAll(); 1485 } 1486 1487 private: 1488 llvm::raw_ostream &OS; 1489 }; 1490 1491 char DeLICMPrinterLegacyPass::ID = 0; 1492 } // anonymous namespace 1493 1494 Pass *polly::createDeLICMWrapperPass() { return new DeLICMWrapperPass(); } 1495 1496 llvm::Pass *polly::createDeLICMPrinterLegacyPass(llvm::raw_ostream &OS) { 1497 return new DeLICMPrinterLegacyPass(OS); 1498 } 1499 1500 llvm::PreservedAnalyses polly::DeLICMPass::run(Scop &S, 1501 ScopAnalysisManager &SAM, 1502 ScopStandardAnalysisResults &SAR, 1503 SPMUpdater &U) { 1504 return runDeLICMUsingNPM(S, SAM, SAR, U, nullptr); 1505 } 1506 1507 llvm::PreservedAnalyses DeLICMPrinterPass::run(Scop &S, 1508 ScopAnalysisManager &SAM, 1509 ScopStandardAnalysisResults &SAR, 1510 SPMUpdater &U) { 1511 return runDeLICMUsingNPM(S, SAM, SAR, U, &OS); 1512 } 1513 1514 bool polly::isConflicting( 1515 isl::union_set ExistingOccupied, isl::union_set ExistingUnused, 1516 isl::union_map ExistingKnown, isl::union_map ExistingWrites, 1517 isl::union_set ProposedOccupied, isl::union_set ProposedUnused, 1518 isl::union_map ProposedKnown, isl::union_map ProposedWrites, 1519 llvm::raw_ostream *OS, unsigned Indent) { 1520 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), 1521 std::move(ExistingKnown), std::move(ExistingWrites)); 1522 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), 1523 std::move(ProposedKnown), std::move(ProposedWrites)); 1524 1525 return Knowledge::isConflicting(Existing, Proposed, OS, Indent); 1526 } 1527 1528 INITIALIZE_PASS_BEGIN(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE", 1529 false, false) 1530 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1531 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1532 INITIALIZE_PASS_END(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE", 1533 false, false) 1534 1535 INITIALIZE_PASS_BEGIN(DeLICMPrinterLegacyPass, "polly-print-delicm", 1536 "Polly - Print DeLICM/DePRE", false, false) 1537 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1538 INITIALIZE_PASS_END(DeLICMPrinterLegacyPass, "polly-print-delicm", 1539 "Polly - Print DeLICM/DePRE", false, false) 1540