1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Undo the effect of Loop Invariant Code Motion (LICM) and 10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level. 11 // 12 // Namely, remove register/scalar dependencies by mapping them back to array 13 // elements. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "polly/DeLICM.h" 18 #include "polly/LinkAllPasses.h" 19 #include "polly/Options.h" 20 #include "polly/ScopInfo.h" 21 #include "polly/ScopPass.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/ISLOStream.h" 24 #include "polly/Support/ISLTools.h" 25 #include "polly/ZoneAlgo.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/InitializePasses.h" 28 29 #define DEBUG_TYPE "polly-delicm" 30 31 using namespace polly; 32 using namespace llvm; 33 34 namespace { 35 36 cl::opt<int> 37 DelicmMaxOps("polly-delicm-max-ops", 38 cl::desc("Maximum number of isl operations to invest for " 39 "lifetime analysis; 0=no limit"), 40 cl::init(1000000), cl::cat(PollyCategory)); 41 42 cl::opt<bool> DelicmOverapproximateWrites( 43 "polly-delicm-overapproximate-writes", 44 cl::desc( 45 "Do more PHI writes than necessary in order to avoid partial accesses"), 46 cl::init(false), cl::Hidden, cl::cat(PollyCategory)); 47 48 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes", 49 cl::desc("Allow partial writes"), 50 cl::init(true), cl::Hidden, 51 cl::cat(PollyCategory)); 52 53 cl::opt<bool> 54 DelicmComputeKnown("polly-delicm-compute-known", 55 cl::desc("Compute known content of array elements"), 56 cl::init(true), cl::Hidden, cl::cat(PollyCategory)); 57 58 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); 59 STATISTIC(DeLICMOutOfQuota, 60 "Analyses aborted because max_operations was reached"); 61 STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); 62 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); 63 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); 64 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); 65 66 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM"); 67 STATISTIC(NumValueWritesInLoops, 68 "Number of scalar value writes nested in affine loops after DeLICM"); 69 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM"); 70 STATISTIC(NumPHIWritesInLoops, 71 "Number of scalar phi writes nested in affine loops after DeLICM"); 72 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM"); 73 STATISTIC(NumSingletonWritesInLoops, 74 "Number of singleton writes nested in affine loops after DeLICM"); 75 76 isl::union_map computeReachingOverwrite(isl::union_map Schedule, 77 isl::union_map Writes, 78 bool InclPrevWrite, 79 bool InclOverwrite) { 80 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite, 81 InclOverwrite); 82 } 83 84 /// Compute the next overwrite for a scalar. 85 /// 86 /// @param Schedule { DomainWrite[] -> Scatter[] } 87 /// Schedule of (at least) all writes. Instances not in @p 88 /// Writes are ignored. 89 /// @param Writes { DomainWrite[] } 90 /// The element instances that write to the scalar. 91 /// @param InclPrevWrite Whether to extend the timepoints to include 92 /// the timepoint where the previous write happens. 93 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint 94 /// of the overwrite itself. 95 /// 96 /// @return { Scatter[] -> DomainDef[] } 97 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule, 98 isl::union_set Writes, 99 bool InclPrevWrite, 100 bool InclOverwrite) { 101 102 // { DomainWrite[] } 103 auto WritesMap = isl::union_map::from_domain(Writes); 104 105 // { [Element[] -> Scatter[]] -> DomainWrite[] } 106 auto Result = computeReachingOverwrite( 107 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); 108 109 return Result.domain_factor_range(); 110 } 111 112 /// Overload of computeScalarReachingOverwrite, with only one writing statement. 113 /// Consequently, the result consists of only one map space. 114 /// 115 /// @param Schedule { DomainWrite[] -> Scatter[] } 116 /// @param Writes { DomainWrite[] } 117 /// @param InclPrevWrite Include the previous write to result. 118 /// @param InclOverwrite Include the overwrite to the result. 119 /// 120 /// @return { Scatter[] -> DomainWrite[] } 121 isl::map computeScalarReachingOverwrite(isl::union_map Schedule, 122 isl::set Writes, bool InclPrevWrite, 123 bool InclOverwrite) { 124 isl::space ScatterSpace = getScatterSpace(Schedule); 125 isl::space DomSpace = Writes.get_space(); 126 127 isl::union_map ReachOverwrite = computeScalarReachingOverwrite( 128 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite); 129 130 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace); 131 return singleton(std::move(ReachOverwrite), ResultSpace); 132 } 133 134 /// Try to find a 'natural' extension of a mapped to elements outside its 135 /// domain. 136 /// 137 /// @param Relevant The map with mapping that may not be modified. 138 /// @param Universe The domain to which @p Relevant needs to be extended. 139 /// 140 /// @return A map with that associates the domain elements of @p Relevant to the 141 /// same elements and in addition the elements of @p Universe to some 142 /// undefined elements. The function prefers to return simple maps. 143 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) { 144 Relevant = Relevant.coalesce(); 145 isl::union_set RelevantDomain = Relevant.domain(); 146 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain); 147 Simplified = Simplified.coalesce(); 148 return Simplified.intersect_domain(Universe); 149 } 150 151 /// Represent the knowledge of the contents of any array elements in any zone or 152 /// the knowledge we would add when mapping a scalar to an array element. 153 /// 154 /// Every array element at every zone unit has one of two states: 155 /// 156 /// - Unused: Not occupied by any value so a transformation can change it to 157 /// other values. 158 /// 159 /// - Occupied: The element contains a value that is still needed. 160 /// 161 /// The union of Unused and Unknown zones forms the universe, the set of all 162 /// elements at every timepoint. The universe can easily be derived from the 163 /// array elements that are accessed someway. Arrays that are never accessed 164 /// also never play a role in any computation and can hence be ignored. With a 165 /// given universe, only one of the sets needs to stored implicitly. Computing 166 /// the complement is also an expensive operation, hence this class has been 167 /// designed that only one of sets is needed while the other is assumed to be 168 /// implicit. It can still be given, but is mostly ignored. 169 /// 170 /// There are two use cases for the Knowledge class: 171 /// 172 /// 1) To represent the knowledge of the current state of ScopInfo. The unused 173 /// state means that an element is currently unused: there is no read of it 174 /// before the next overwrite. Also called 'Existing'. 175 /// 176 /// 2) To represent the requirements for mapping a scalar to array elements. The 177 /// unused state means that there is no change/requirement. Also called 178 /// 'Proposed'. 179 /// 180 /// In addition to these states at unit zones, Knowledge needs to know when 181 /// values are written. This is because written values may have no lifetime (one 182 /// reason is that the value is never read). Such writes would therefore never 183 /// conflict, but overwrite values that might still be required. Another source 184 /// of problems are multiple writes to the same element at the same timepoint, 185 /// because their order is undefined. 186 class Knowledge { 187 private: 188 /// { [Element[] -> Zone[]] } 189 /// Set of array elements and when they are alive. 190 /// Can contain a nullptr; in this case the set is implicitly defined as the 191 /// complement of #Unused. 192 /// 193 /// The set of alive array elements is represented as zone, as the set of live 194 /// values can differ depending on how the elements are interpreted. 195 /// Assuming a value X is written at timestep [0] and read at timestep [1] 196 /// without being used at any later point, then the value is alive in the 197 /// interval ]0,1[. This interval cannot be represented by an integer set, as 198 /// it does not contain any integer point. Zones allow us to represent this 199 /// interval and can be converted to sets of timepoints when needed (e.g., in 200 /// isConflicting when comparing to the write sets). 201 /// @see convertZoneToTimepoints and this file's comment for more details. 202 isl::union_set Occupied; 203 204 /// { [Element[] -> Zone[]] } 205 /// Set of array elements when they are not alive, i.e. their memory can be 206 /// used for other purposed. Can contain a nullptr; in this case the set is 207 /// implicitly defined as the complement of #Occupied. 208 isl::union_set Unused; 209 210 /// { [Element[] -> Zone[]] -> ValInst[] } 211 /// Maps to the known content for each array element at any interval. 212 /// 213 /// Any element/interval can map to multiple known elements. This is due to 214 /// multiple llvm::Value referring to the same content. Examples are 215 /// 216 /// - A value stored and loaded again. The LoadInst represents the same value 217 /// as the StoreInst's value operand. 218 /// 219 /// - A PHINode is equal to any one of the incoming values. In case of 220 /// LCSSA-form, it is always equal to its single incoming value. 221 /// 222 /// Two Knowledges are considered not conflicting if at least one of the known 223 /// values match. Not known values are not stored as an unnamed tuple (as 224 /// #Written does), but maps to nothing. 225 /// 226 /// Known values are usually just defined for #Occupied elements. Knowing 227 /// #Unused contents has no advantage as it can be overwritten. 228 isl::union_map Known; 229 230 /// { [Element[] -> Scatter[]] -> ValInst[] } 231 /// The write actions currently in the scop or that would be added when 232 /// mapping a scalar. Maps to the value that is written. 233 /// 234 /// Written values that cannot be identified are represented by an unknown 235 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself. 236 isl::union_map Written; 237 238 /// Check whether this Knowledge object is well-formed. 239 void checkConsistency() const { 240 #ifndef NDEBUG 241 // Default-initialized object 242 if (!Occupied && !Unused && !Known && !Written) 243 return; 244 245 assert(Occupied || Unused); 246 assert(Known); 247 assert(Written); 248 249 // If not all fields are defined, we cannot derived the universe. 250 if (!Occupied || !Unused) 251 return; 252 253 assert(Occupied.is_disjoint(Unused)); 254 auto Universe = Occupied.unite(Unused); 255 256 assert(!Known.domain().is_subset(Universe).is_false()); 257 assert(!Written.domain().is_subset(Universe).is_false()); 258 #endif 259 } 260 261 public: 262 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do 263 /// not use such an object. 264 Knowledge() {} 265 266 /// Create a new object with the given members. 267 Knowledge(isl::union_set Occupied, isl::union_set Unused, 268 isl::union_map Known, isl::union_map Written) 269 : Occupied(std::move(Occupied)), Unused(std::move(Unused)), 270 Known(std::move(Known)), Written(std::move(Written)) { 271 checkConsistency(); 272 } 273 274 /// Return whether this object was not default-constructed. 275 bool isUsable() const { return (Occupied || Unused) && Known && Written; } 276 277 /// Print the content of this object to @p OS. 278 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { 279 if (isUsable()) { 280 if (Occupied) 281 OS.indent(Indent) << "Occupied: " << Occupied << "\n"; 282 else 283 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n"; 284 if (Unused) 285 OS.indent(Indent) << "Unused: " << Unused << "\n"; 286 else 287 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n"; 288 OS.indent(Indent) << "Known: " << Known << "\n"; 289 OS.indent(Indent) << "Written : " << Written << '\n'; 290 } else { 291 OS.indent(Indent) << "Invalid knowledge\n"; 292 } 293 } 294 295 /// Combine two knowledges, this and @p That. 296 void learnFrom(Knowledge That) { 297 assert(!isConflicting(*this, That)); 298 assert(Unused && That.Occupied); 299 assert( 300 !That.Unused && 301 "This function is only prepared to learn occupied elements from That"); 302 assert(!Occupied && "This function does not implement " 303 "`this->Occupied = " 304 "this->Occupied.unite(That.Occupied);`"); 305 306 Unused = Unused.subtract(That.Occupied); 307 Known = Known.unite(That.Known); 308 Written = Written.unite(That.Written); 309 310 checkConsistency(); 311 } 312 313 /// Determine whether two Knowledges conflict with each other. 314 /// 315 /// In theory @p Existing and @p Proposed are symmetric, but the 316 /// implementation is constrained by the implicit interpretation. That is, @p 317 /// Existing must have #Unused defined (use case 1) and @p Proposed must have 318 /// #Occupied defined (use case 1). 319 /// 320 /// A conflict is defined as non-preserved semantics when they are merged. For 321 /// instance, when for the same array and zone they assume different 322 /// llvm::Values. 323 /// 324 /// @param Existing One of the knowledges with #Unused defined. 325 /// @param Proposed One of the knowledges with #Occupied defined. 326 /// @param OS Dump the conflict reason to this output stream; use 327 /// nullptr to not output anything. 328 /// @param Indent Indention for the conflict reason. 329 /// 330 /// @return True, iff the two knowledges are conflicting. 331 static bool isConflicting(const Knowledge &Existing, 332 const Knowledge &Proposed, 333 llvm::raw_ostream *OS = nullptr, 334 unsigned Indent = 0) { 335 assert(Existing.Unused); 336 assert(Proposed.Occupied); 337 338 #ifndef NDEBUG 339 if (Existing.Occupied && Proposed.Unused) { 340 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused); 341 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused); 342 assert(ExistingUniverse.is_equal(ProposedUniverse) && 343 "Both inputs' Knowledges must be over the same universe"); 344 } 345 #endif 346 347 // Do the Existing and Proposed lifetimes conflict? 348 // 349 // Lifetimes are described as the cross-product of array elements and zone 350 // intervals in which they are alive (the space { [Element[] -> Zone[]] }). 351 // In the following we call this "element/lifetime interval". 352 // 353 // In order to not conflict, one of the following conditions must apply for 354 // each element/lifetime interval: 355 // 356 // 1. If occupied in one of the knowledges, it is unused in the other. 357 // 358 // - or - 359 // 360 // 2. Both contain the same value. 361 // 362 // Instead of partitioning the element/lifetime intervals into a part that 363 // both Knowledges occupy (which requires an expensive subtraction) and for 364 // these to check whether they are known to be the same value, we check only 365 // the second condition and ensure that it also applies when then first 366 // condition is true. This is done by adding a wildcard value to 367 // Proposed.Known and Existing.Unused such that they match as a common known 368 // value. We use the "unknown ValInst" for this purpose. Every 369 // Existing.Unused may match with an unknown Proposed.Occupied because these 370 // never are in conflict with each other. 371 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied); 372 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal); 373 374 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused); 375 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal); 376 377 auto MatchingVals = ExistingValues.intersect(ProposedValues); 378 auto Matches = MatchingVals.domain(); 379 380 // Any Proposed.Occupied must either have a match between the known values 381 // of Existing and Occupied, or be in Existing.Unused. In the latter case, 382 // the previously added "AnyVal" will match each other. 383 if (!Proposed.Occupied.is_subset(Matches)) { 384 if (OS) { 385 auto Conflicting = Proposed.Occupied.subtract(Matches); 386 auto ExistingConflictingKnown = 387 Existing.Known.intersect_domain(Conflicting); 388 auto ProposedConflictingKnown = 389 Proposed.Known.intersect_domain(Conflicting); 390 391 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n"; 392 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n"; 393 if (!ExistingConflictingKnown.is_empty()) 394 OS->indent(Indent) 395 << "Existing Known: " << ExistingConflictingKnown << "\n"; 396 if (!ProposedConflictingKnown.is_empty()) 397 OS->indent(Indent) 398 << "Proposed Known: " << ProposedConflictingKnown << "\n"; 399 } 400 return true; 401 } 402 403 // Do the writes in Existing conflict with occupied values in Proposed? 404 // 405 // In order to not conflict, it must either write to unused lifetime or 406 // write the same value. To check, we remove the writes that write into 407 // Proposed.Unused (they never conflict) and then see whether the written 408 // value is already in Proposed.Known. If there are multiple known values 409 // and a written value is known under different names, it is enough when one 410 // of the written values (assuming that they are the same value under 411 // different names, e.g. a PHINode and one of the incoming values) matches 412 // one of the known names. 413 // 414 // We convert here the set of lifetimes to actual timepoints. A lifetime is 415 // in conflict with a set of write timepoints, if either a live timepoint is 416 // clearly within the lifetime or if a write happens at the beginning of the 417 // lifetime (where it would conflict with the value that actually writes the 418 // value alive). There is no conflict at the end of a lifetime, as the alive 419 // value will always be read, before it is overwritten again. The last 420 // property holds in Polly for all scalar values and we expect all users of 421 // Knowledge to check this property also for accesses to MemoryKind::Array. 422 auto ProposedFixedDefs = 423 convertZoneToTimepoints(Proposed.Occupied, true, false); 424 auto ProposedFixedKnown = 425 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false); 426 427 auto ExistingConflictingWrites = 428 Existing.Written.intersect_domain(ProposedFixedDefs); 429 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain(); 430 431 auto CommonWrittenVal = 432 ProposedFixedKnown.intersect(ExistingConflictingWrites); 433 auto CommonWrittenValDomain = CommonWrittenVal.domain(); 434 435 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) { 436 if (OS) { 437 auto ExistingConflictingWritten = 438 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain); 439 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain( 440 ExistingConflictingWritten.domain()); 441 442 OS->indent(Indent) 443 << "Proposed a lifetime where there is an Existing write into it\n"; 444 OS->indent(Indent) << "Existing conflicting writes: " 445 << ExistingConflictingWritten << "\n"; 446 if (!ProposedConflictingKnown.is_empty()) 447 OS->indent(Indent) 448 << "Proposed conflicting known: " << ProposedConflictingKnown 449 << "\n"; 450 } 451 return true; 452 } 453 454 // Do the writes in Proposed conflict with occupied values in Existing? 455 auto ExistingAvailableDefs = 456 convertZoneToTimepoints(Existing.Unused, true, false); 457 auto ExistingKnownDefs = 458 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false); 459 460 auto ProposedWrittenDomain = Proposed.Written.domain(); 461 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written); 462 auto IdenticalOrUnused = 463 ExistingAvailableDefs.unite(KnownIdentical.domain()); 464 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) { 465 if (OS) { 466 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused); 467 auto ExistingConflictingKnown = 468 ExistingKnownDefs.intersect_domain(Conflicting); 469 auto ProposedConflictingWritten = 470 Proposed.Written.intersect_domain(Conflicting); 471 472 OS->indent(Indent) << "Proposed writes into range used by Existing\n"; 473 OS->indent(Indent) << "Proposed conflicting writes: " 474 << ProposedConflictingWritten << "\n"; 475 if (!ExistingConflictingKnown.is_empty()) 476 OS->indent(Indent) 477 << "Existing conflicting known: " << ExistingConflictingKnown 478 << "\n"; 479 } 480 return true; 481 } 482 483 // Does Proposed write at the same time as Existing already does (order of 484 // writes is undefined)? Writing the same value is permitted. 485 auto ExistingWrittenDomain = Existing.Written.domain(); 486 auto BothWritten = 487 Existing.Written.domain().intersect(Proposed.Written.domain()); 488 auto ExistingKnownWritten = filterKnownValInst(Existing.Written); 489 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written); 490 auto CommonWritten = 491 ExistingKnownWritten.intersect(ProposedKnownWritten).domain(); 492 493 if (!BothWritten.is_subset(CommonWritten)) { 494 if (OS) { 495 auto Conflicting = BothWritten.subtract(CommonWritten); 496 auto ExistingConflictingWritten = 497 Existing.Written.intersect_domain(Conflicting); 498 auto ProposedConflictingWritten = 499 Proposed.Written.intersect_domain(Conflicting); 500 501 OS->indent(Indent) << "Proposed writes at the same time as an already " 502 "Existing write\n"; 503 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n"; 504 if (!ExistingConflictingWritten.is_empty()) 505 OS->indent(Indent) 506 << "Exiting write: " << ExistingConflictingWritten << "\n"; 507 if (!ProposedConflictingWritten.is_empty()) 508 OS->indent(Indent) 509 << "Proposed write: " << ProposedConflictingWritten << "\n"; 510 } 511 return true; 512 } 513 514 return false; 515 } 516 }; 517 518 /// Implementation of the DeLICM/DePRE transformation. 519 class DeLICMImpl : public ZoneAlgorithm { 520 private: 521 /// Knowledge before any transformation took place. 522 Knowledge OriginalZone; 523 524 /// Current knowledge of the SCoP including all already applied 525 /// transformations. 526 Knowledge Zone; 527 528 /// Number of StoreInsts something can be mapped to. 529 int NumberOfCompatibleTargets = 0; 530 531 /// The number of StoreInsts to which at least one value or PHI has been 532 /// mapped to. 533 int NumberOfTargetsMapped = 0; 534 535 /// The number of llvm::Value mapped to some array element. 536 int NumberOfMappedValueScalars = 0; 537 538 /// The number of PHIs mapped to some array element. 539 int NumberOfMappedPHIScalars = 0; 540 541 /// Determine whether two knowledges are conflicting with each other. 542 /// 543 /// @see Knowledge::isConflicting 544 bool isConflicting(const Knowledge &Proposed) { 545 raw_ostream *OS = nullptr; 546 LLVM_DEBUG(OS = &llvm::dbgs()); 547 return Knowledge::isConflicting(Zone, Proposed, OS, 4); 548 } 549 550 /// Determine whether @p SAI is a scalar that can be mapped to an array 551 /// element. 552 bool isMappable(const ScopArrayInfo *SAI) { 553 assert(SAI); 554 555 if (SAI->isValueKind()) { 556 auto *MA = S->getValueDef(SAI); 557 if (!MA) { 558 LLVM_DEBUG( 559 dbgs() 560 << " Reject because value is read-only within the scop\n"); 561 return false; 562 } 563 564 // Mapping if value is used after scop is not supported. The code 565 // generator would need to reload the scalar after the scop, but it 566 // does not have the information to where it is mapped to. Only the 567 // MemoryAccesses have that information, not the ScopArrayInfo. 568 auto Inst = MA->getAccessInstruction(); 569 for (auto User : Inst->users()) { 570 if (!isa<Instruction>(User)) 571 return false; 572 auto UserInst = cast<Instruction>(User); 573 574 if (!S->contains(UserInst)) { 575 LLVM_DEBUG(dbgs() << " Reject because value is escaping\n"); 576 return false; 577 } 578 } 579 580 return true; 581 } 582 583 if (SAI->isPHIKind()) { 584 auto *MA = S->getPHIRead(SAI); 585 assert(MA); 586 587 // Mapping of an incoming block from before the SCoP is not supported by 588 // the code generator. 589 auto PHI = cast<PHINode>(MA->getAccessInstruction()); 590 for (auto Incoming : PHI->blocks()) { 591 if (!S->contains(Incoming)) { 592 LLVM_DEBUG(dbgs() 593 << " Reject because at least one incoming block is " 594 "not in the scop region\n"); 595 return false; 596 } 597 } 598 599 return true; 600 } 601 602 LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); 603 return false; 604 } 605 606 /// Compute the uses of a MemoryKind::Value and its lifetime (from its 607 /// definition to the last use). 608 /// 609 /// @param SAI The ScopArrayInfo representing the value's storage. 610 /// 611 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } 612 /// First element is the set of uses for each definition. 613 /// The second is the lifetime of each definition. 614 std::tuple<isl::union_map, isl::map> 615 computeValueUses(const ScopArrayInfo *SAI) { 616 assert(SAI->isValueKind()); 617 618 // { DomainRead[] } 619 auto Reads = makeEmptyUnionSet(); 620 621 // Find all uses. 622 for (auto *MA : S->getValueUses(SAI)) 623 Reads = Reads.add_set(getDomainFor(MA)); 624 625 // { DomainRead[] -> Scatter[] } 626 auto ReadSchedule = getScatterFor(Reads); 627 628 auto *DefMA = S->getValueDef(SAI); 629 assert(DefMA); 630 631 // { DomainDef[] } 632 auto Writes = getDomainFor(DefMA); 633 634 // { DomainDef[] -> Scatter[] } 635 auto WriteScatter = getScatterFor(Writes); 636 637 // { Scatter[] -> DomainDef[] } 638 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); 639 640 // { [DomainDef[] -> Scatter[]] -> DomainUse[] } 641 auto Uses = isl::union_map(ReachDef.reverse().range_map()) 642 .apply_range(ReadSchedule.reverse()); 643 644 // { DomainDef[] -> Scatter[] } 645 auto UseScatter = 646 singleton(Uses.domain().unwrap(), 647 Writes.get_space().map_from_domain_and_range(ScatterSpace)); 648 649 // { DomainDef[] -> Zone[] } 650 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); 651 652 // { DomainDef[] -> DomainRead[] } 653 auto DefUses = Uses.domain_factor_domain(); 654 655 return std::make_pair(DefUses, Lifetime); 656 } 657 658 /// Try to map a MemoryKind::Value to a given array element. 659 /// 660 /// @param SAI Representation of the scalar's memory to map. 661 /// @param TargetElt { Scatter[] -> Element[] } 662 /// Suggestion where to map a scalar to when at a timepoint. 663 /// 664 /// @return true if the scalar was successfully mapped. 665 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) { 666 assert(SAI->isValueKind()); 667 668 auto *DefMA = S->getValueDef(SAI); 669 assert(DefMA->isValueKind()); 670 assert(DefMA->isMustWrite()); 671 auto *V = DefMA->getAccessValue(); 672 auto *DefInst = DefMA->getAccessInstruction(); 673 674 // Stop if the scalar has already been mapped. 675 if (!DefMA->getLatestScopArrayInfo()->isValueKind()) 676 return false; 677 678 // { DomainDef[] -> Scatter[] } 679 auto DefSched = getScatterFor(DefMA); 680 681 // Where each write is mapped to, according to the suggestion. 682 // { DomainDef[] -> Element[] } 683 auto DefTarget = TargetElt.apply_domain(DefSched.reverse()); 684 simplify(DefTarget); 685 LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); 686 687 auto OrigDomain = getDomainFor(DefMA); 688 auto MappedDomain = DefTarget.domain(); 689 if (!OrigDomain.is_subset(MappedDomain)) { 690 LLVM_DEBUG( 691 dbgs() 692 << " Reject because mapping does not encompass all instances\n"); 693 return false; 694 } 695 696 // { DomainDef[] -> Zone[] } 697 isl::map Lifetime; 698 699 // { DomainDef[] -> DomainUse[] } 700 isl::union_map DefUses; 701 702 std::tie(DefUses, Lifetime) = computeValueUses(SAI); 703 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); 704 705 /// { [Element[] -> Zone[]] } 706 auto EltZone = Lifetime.apply_domain(DefTarget).wrap(); 707 simplify(EltZone); 708 709 // When known knowledge is disabled, just return the unknown value. It will 710 // either get filtered out or conflict with itself. 711 // { DomainDef[] -> ValInst[] } 712 isl::map ValInst; 713 if (DelicmComputeKnown) 714 ValInst = makeValInst(V, DefMA->getStatement(), 715 LI->getLoopFor(DefInst->getParent())); 716 else 717 ValInst = makeUnknownForDomain(DefMA->getStatement()); 718 719 // { DomainDef[] -> [Element[] -> Zone[]] } 720 auto EltKnownTranslator = DefTarget.range_product(Lifetime); 721 722 // { [Element[] -> Zone[]] -> ValInst[] } 723 auto EltKnown = ValInst.apply_domain(EltKnownTranslator); 724 simplify(EltKnown); 725 726 // { DomainDef[] -> [Element[] -> Scatter[]] } 727 auto WrittenTranslator = DefTarget.range_product(DefSched); 728 729 // { [Element[] -> Scatter[]] -> ValInst[] } 730 auto DefEltSched = ValInst.apply_domain(WrittenTranslator); 731 simplify(DefEltSched); 732 733 Knowledge Proposed(EltZone, nullptr, filterKnownValInst(EltKnown), 734 DefEltSched); 735 if (isConflicting(Proposed)) 736 return false; 737 738 // { DomainUse[] -> Element[] } 739 auto UseTarget = DefUses.reverse().apply_range(DefTarget); 740 741 mapValue(SAI, std::move(DefTarget), std::move(UseTarget), 742 std::move(Lifetime), std::move(Proposed)); 743 return true; 744 } 745 746 /// After a scalar has been mapped, update the global knowledge. 747 void applyLifetime(Knowledge Proposed) { 748 Zone.learnFrom(std::move(Proposed)); 749 } 750 751 /// Map a MemoryKind::Value scalar to an array element. 752 /// 753 /// Callers must have ensured that the mapping is valid and not conflicting. 754 /// 755 /// @param SAI The ScopArrayInfo representing the scalar's memory to 756 /// map. 757 /// @param DefTarget { DomainDef[] -> Element[] } 758 /// The array element to map the scalar to. 759 /// @param UseTarget { DomainUse[] -> Element[] } 760 /// The array elements the uses are mapped to. 761 /// @param Lifetime { DomainDef[] -> Zone[] } 762 /// The lifetime of each llvm::Value definition for 763 /// reporting. 764 /// @param Proposed Mapping constraints for reporting. 765 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget, 766 isl::union_map UseTarget, isl::map Lifetime, 767 Knowledge Proposed) { 768 // Redirect the read accesses. 769 for (auto *MA : S->getValueUses(SAI)) { 770 // { DomainUse[] } 771 auto Domain = getDomainFor(MA); 772 773 // { DomainUse[] -> Element[] } 774 auto NewAccRel = UseTarget.intersect_domain(Domain); 775 simplify(NewAccRel); 776 777 assert(isl_union_map_n_map(NewAccRel.get()) == 1); 778 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel)); 779 } 780 781 auto *WA = S->getValueDef(SAI); 782 WA->setNewAccessRelation(DefTarget); 783 applyLifetime(Proposed); 784 785 MappedValueScalars++; 786 NumberOfMappedValueScalars += 1; 787 } 788 789 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope, 790 bool IsCertain = true) { 791 // When known knowledge is disabled, just return the unknown value. It will 792 // either get filtered out or conflict with itself. 793 if (!DelicmComputeKnown) 794 return makeUnknownForDomain(UserStmt); 795 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain); 796 } 797 798 /// Express the incoming values of a PHI for each incoming statement in an 799 /// isl::union_map. 800 /// 801 /// @param SAI The PHI scalar represented by a ScopArrayInfo. 802 /// 803 /// @return { PHIWriteDomain[] -> ValInst[] } 804 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) { 805 auto Result = makeEmptyUnionMap(); 806 807 // Collect the incoming values. 808 for (auto *MA : S->getPHIIncomings(SAI)) { 809 // { DomainWrite[] -> ValInst[] } 810 isl::union_map ValInst; 811 auto *WriteStmt = MA->getStatement(); 812 813 auto Incoming = MA->getIncoming(); 814 assert(!Incoming.empty()); 815 if (Incoming.size() == 1) { 816 ValInst = makeValInst(Incoming[0].second, WriteStmt, 817 LI->getLoopFor(Incoming[0].first)); 818 } else { 819 // If the PHI is in a subregion's exit node it can have multiple 820 // incoming values (+ maybe another incoming edge from an unrelated 821 // block). We cannot directly represent it as a single llvm::Value. 822 // We currently model it as unknown value, but modeling as the PHIInst 823 // itself could be OK, too. 824 ValInst = makeUnknownForDomain(WriteStmt); 825 } 826 827 Result = Result.unite(ValInst); 828 } 829 830 assert(Result.is_single_valued() && 831 "Cannot have multiple incoming values for same incoming statement"); 832 return Result; 833 } 834 835 /// Try to map a MemoryKind::PHI scalar to a given array element. 836 /// 837 /// @param SAI Representation of the scalar's memory to map. 838 /// @param TargetElt { Scatter[] -> Element[] } 839 /// Suggestion where to map the scalar to when at a 840 /// timepoint. 841 /// 842 /// @return true if the PHI scalar has been mapped. 843 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) { 844 auto *PHIRead = S->getPHIRead(SAI); 845 assert(PHIRead->isPHIKind()); 846 assert(PHIRead->isRead()); 847 848 // Skip if already been mapped. 849 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) 850 return false; 851 852 // { DomainRead[] -> Scatter[] } 853 auto PHISched = getScatterFor(PHIRead); 854 855 // { DomainRead[] -> Element[] } 856 auto PHITarget = PHISched.apply_range(TargetElt); 857 simplify(PHITarget); 858 LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); 859 860 auto OrigDomain = getDomainFor(PHIRead); 861 auto MappedDomain = PHITarget.domain(); 862 if (!OrigDomain.is_subset(MappedDomain)) { 863 LLVM_DEBUG( 864 dbgs() 865 << " Reject because mapping does not encompass all instances\n"); 866 return false; 867 } 868 869 // { DomainRead[] -> DomainWrite[] } 870 auto PerPHIWrites = computePerPHI(SAI); 871 if (!PerPHIWrites) { 872 LLVM_DEBUG( 873 dbgs() << " Reject because cannot determine incoming values\n"); 874 return false; 875 } 876 877 // { DomainWrite[] -> Element[] } 878 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse(); 879 simplify(WritesTarget); 880 881 // { DomainWrite[] } 882 auto UniverseWritesDom = isl::union_set::empty(ParamSpace); 883 884 for (auto *MA : S->getPHIIncomings(SAI)) 885 UniverseWritesDom = UniverseWritesDom.add_set(getDomainFor(MA)); 886 887 auto RelevantWritesTarget = WritesTarget; 888 if (DelicmOverapproximateWrites) 889 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom); 890 891 auto ExpandedWritesDom = WritesTarget.domain(); 892 if (!DelicmPartialWrites && 893 !UniverseWritesDom.is_subset(ExpandedWritesDom)) { 894 LLVM_DEBUG( 895 dbgs() << " Reject because did not find PHI write mapping for " 896 "all instances\n"); 897 if (DelicmOverapproximateWrites) 898 LLVM_DEBUG(dbgs() << " Relevant Mapping: " 899 << RelevantWritesTarget << '\n'); 900 LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget 901 << '\n'); 902 LLVM_DEBUG(dbgs() << " Missing instances: " 903 << UniverseWritesDom.subtract(ExpandedWritesDom) 904 << '\n'); 905 return false; 906 } 907 908 // { DomainRead[] -> Scatter[] } 909 isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule); 910 isl::map PerPHIWriteScatter = 911 singleton(PerPHIWriteScatterUmap, PHISched.get_space()); 912 913 // { DomainRead[] -> Zone[] } 914 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); 915 simplify(Lifetime); 916 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); 917 918 // { DomainWrite[] -> Zone[] } 919 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites); 920 921 // { DomainWrite[] -> ValInst[] } 922 auto WrittenValue = determinePHIWrittenValues(SAI); 923 924 // { DomainWrite[] -> [Element[] -> Scatter[]] } 925 auto WrittenTranslator = WritesTarget.range_product(Schedule); 926 927 // { [Element[] -> Scatter[]] -> ValInst[] } 928 auto Written = WrittenValue.apply_domain(WrittenTranslator); 929 simplify(Written); 930 931 // { DomainWrite[] -> [Element[] -> Zone[]] } 932 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime); 933 934 // { DomainWrite[] -> ValInst[] } 935 auto WrittenKnownValue = filterKnownValInst(WrittenValue); 936 937 // { [Element[] -> Zone[]] -> ValInst[] } 938 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator); 939 simplify(EltLifetimeInst); 940 941 // { [Element[] -> Zone[] } 942 auto Occupied = LifetimeTranslator.range(); 943 simplify(Occupied); 944 945 Knowledge Proposed(Occupied, nullptr, EltLifetimeInst, Written); 946 if (isConflicting(Proposed)) 947 return false; 948 949 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget), 950 std::move(Lifetime), std::move(Proposed)); 951 return true; 952 } 953 954 /// Map a MemoryKind::PHI scalar to an array element. 955 /// 956 /// Callers must have ensured that the mapping is valid and not conflicting 957 /// with the common knowledge. 958 /// 959 /// @param SAI The ScopArrayInfo representing the scalar's memory to 960 /// map. 961 /// @param ReadTarget { DomainRead[] -> Element[] } 962 /// The array element to map the scalar to. 963 /// @param WriteTarget { DomainWrite[] -> Element[] } 964 /// New access target for each PHI incoming write. 965 /// @param Lifetime { DomainRead[] -> Zone[] } 966 /// The lifetime of each PHI for reporting. 967 /// @param Proposed Mapping constraints for reporting. 968 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget, 969 isl::union_map WriteTarget, isl::map Lifetime, 970 Knowledge Proposed) { 971 // { Element[] } 972 isl::space ElementSpace = ReadTarget.get_space().range(); 973 974 // Redirect the PHI incoming writes. 975 for (auto *MA : S->getPHIIncomings(SAI)) { 976 // { DomainWrite[] } 977 auto Domain = getDomainFor(MA); 978 979 // { DomainWrite[] -> Element[] } 980 auto NewAccRel = WriteTarget.intersect_domain(Domain); 981 simplify(NewAccRel); 982 983 isl::space NewAccRelSpace = 984 Domain.get_space().map_from_domain_and_range(ElementSpace); 985 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace); 986 MA->setNewAccessRelation(NewAccRelMap); 987 } 988 989 // Redirect the PHI read. 990 auto *PHIRead = S->getPHIRead(SAI); 991 PHIRead->setNewAccessRelation(ReadTarget); 992 applyLifetime(Proposed); 993 994 MappedPHIScalars++; 995 NumberOfMappedPHIScalars++; 996 } 997 998 /// Search and map scalars to memory overwritten by @p TargetStoreMA. 999 /// 1000 /// Start trying to map scalars that are used in the same statement as the 1001 /// store. For every successful mapping, try to also map scalars of the 1002 /// statements where those are written. Repeat, until no more mapping 1003 /// opportunity is found. 1004 /// 1005 /// There is currently no preference in which order scalars are tried. 1006 /// Ideally, we would direct it towards a load instruction of the same array 1007 /// element. 1008 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { 1009 assert(TargetStoreMA->isLatestArrayKind()); 1010 assert(TargetStoreMA->isMustWrite()); 1011 1012 auto TargetStmt = TargetStoreMA->getStatement(); 1013 1014 // { DomTarget[] } 1015 auto TargetDom = getDomainFor(TargetStmt); 1016 1017 // { DomTarget[] -> Element[] } 1018 auto TargetAccRel = getAccessRelationFor(TargetStoreMA); 1019 1020 // { Zone[] -> DomTarget[] } 1021 // For each point in time, find the next target store instance. 1022 auto Target = 1023 computeScalarReachingOverwrite(Schedule, TargetDom, false, true); 1024 1025 // { Zone[] -> Element[] } 1026 // Use the target store's write location as a suggestion to map scalars to. 1027 auto EltTarget = Target.apply_range(TargetAccRel); 1028 simplify(EltTarget); 1029 LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); 1030 1031 // Stack of elements not yet processed. 1032 SmallVector<MemoryAccess *, 16> Worklist; 1033 1034 // Set of scalars already tested. 1035 SmallPtrSet<const ScopArrayInfo *, 16> Closed; 1036 1037 // Lambda to add all scalar reads to the work list. 1038 auto ProcessAllIncoming = [&](ScopStmt *Stmt) { 1039 for (auto *MA : *Stmt) { 1040 if (!MA->isLatestScalarKind()) 1041 continue; 1042 if (!MA->isRead()) 1043 continue; 1044 1045 Worklist.push_back(MA); 1046 } 1047 }; 1048 1049 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0); 1050 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal)) 1051 Worklist.push_back(WrittenValInputMA); 1052 else 1053 ProcessAllIncoming(TargetStmt); 1054 1055 auto AnyMapped = false; 1056 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout(); 1057 auto StoreSize = 1058 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); 1059 1060 while (!Worklist.empty()) { 1061 auto *MA = Worklist.pop_back_val(); 1062 1063 auto *SAI = MA->getScopArrayInfo(); 1064 if (Closed.count(SAI)) 1065 continue; 1066 Closed.insert(SAI); 1067 LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI 1068 << ")\n"); 1069 1070 // Skip non-mappable scalars. 1071 if (!isMappable(SAI)) 1072 continue; 1073 1074 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); 1075 if (MASize > StoreSize) { 1076 LLVM_DEBUG( 1077 dbgs() << " Reject because storage size is insufficient\n"); 1078 continue; 1079 } 1080 1081 // Try to map MemoryKind::Value scalars. 1082 if (SAI->isValueKind()) { 1083 if (!tryMapValue(SAI, EltTarget)) 1084 continue; 1085 1086 auto *DefAcc = S->getValueDef(SAI); 1087 ProcessAllIncoming(DefAcc->getStatement()); 1088 1089 AnyMapped = true; 1090 continue; 1091 } 1092 1093 // Try to map MemoryKind::PHI scalars. 1094 if (SAI->isPHIKind()) { 1095 if (!tryMapPHI(SAI, EltTarget)) 1096 continue; 1097 // Add inputs of all incoming statements to the worklist. Prefer the 1098 // input accesses of the incoming blocks. 1099 for (auto *PHIWrite : S->getPHIIncomings(SAI)) { 1100 auto *PHIWriteStmt = PHIWrite->getStatement(); 1101 bool FoundAny = false; 1102 for (auto Incoming : PHIWrite->getIncoming()) { 1103 auto *IncomingInputMA = 1104 PHIWriteStmt->lookupInputAccessOf(Incoming.second); 1105 if (!IncomingInputMA) 1106 continue; 1107 1108 Worklist.push_back(IncomingInputMA); 1109 FoundAny = true; 1110 } 1111 1112 if (!FoundAny) 1113 ProcessAllIncoming(PHIWrite->getStatement()); 1114 } 1115 1116 AnyMapped = true; 1117 continue; 1118 } 1119 } 1120 1121 if (AnyMapped) { 1122 TargetsMapped++; 1123 NumberOfTargetsMapped++; 1124 } 1125 return AnyMapped; 1126 } 1127 1128 /// Compute when an array element is unused. 1129 /// 1130 /// @return { [Element[] -> Zone[]] } 1131 isl::union_set computeLifetime() const { 1132 // { Element[] -> Zone[] } 1133 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, 1134 false, false, true); 1135 1136 auto Result = ArrayUnused.wrap(); 1137 1138 simplify(Result); 1139 return Result; 1140 } 1141 1142 /// Determine when an array element is written to, and which value instance is 1143 /// written. 1144 /// 1145 /// @return { [Element[] -> Scatter[]] -> ValInst[] } 1146 isl::union_map computeWritten() const { 1147 // { [Element[] -> Scatter[]] -> ValInst[] } 1148 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule); 1149 1150 simplify(EltWritten); 1151 return EltWritten; 1152 } 1153 1154 /// Determine whether an access touches at most one element. 1155 /// 1156 /// The accessed element could be a scalar or accessing an array with constant 1157 /// subscript, such that all instances access only that element. 1158 /// 1159 /// @param MA The access to test. 1160 /// 1161 /// @return True, if zero or one elements are accessed; False if at least two 1162 /// different elements are accessed. 1163 bool isScalarAccess(MemoryAccess *MA) { 1164 auto Map = getAccessRelationFor(MA); 1165 auto Set = Map.range(); 1166 return Set.is_singleton(); 1167 } 1168 1169 /// Print mapping statistics to @p OS. 1170 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { 1171 OS.indent(Indent) << "Statistics {\n"; 1172 OS.indent(Indent + 4) << "Compatible overwrites: " 1173 << NumberOfCompatibleTargets << "\n"; 1174 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped 1175 << '\n'; 1176 OS.indent(Indent + 4) << "Value scalars mapped: " 1177 << NumberOfMappedValueScalars << '\n'; 1178 OS.indent(Indent + 4) << "PHI scalars mapped: " 1179 << NumberOfMappedPHIScalars << '\n'; 1180 OS.indent(Indent) << "}\n"; 1181 } 1182 1183 public: 1184 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {} 1185 1186 /// Calculate the lifetime (definition to last use) of every array element. 1187 /// 1188 /// @return True if the computed lifetimes (#Zone) is usable. 1189 bool computeZone() { 1190 // Check that nothing strange occurs. 1191 collectCompatibleElts(); 1192 1193 isl::union_set EltUnused; 1194 isl::union_map EltKnown, EltWritten; 1195 1196 { 1197 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); 1198 1199 computeCommon(); 1200 1201 EltUnused = computeLifetime(); 1202 EltKnown = computeKnown(true, false); 1203 EltWritten = computeWritten(); 1204 } 1205 DeLICMAnalyzed++; 1206 1207 if (!EltUnused || !EltKnown || !EltWritten) { 1208 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota && 1209 "The only reason that these things have not been computed should " 1210 "be if the max-operations limit hit"); 1211 DeLICMOutOfQuota++; 1212 LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); 1213 DebugLoc Begin, End; 1214 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End); 1215 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin, 1216 S->getEntry()); 1217 R << "maximal number of operations exceeded during zone analysis"; 1218 S->getFunction().getContext().diagnose(R); 1219 return false; 1220 } 1221 1222 Zone = OriginalZone = Knowledge(nullptr, EltUnused, EltKnown, EltWritten); 1223 LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); 1224 1225 assert(Zone.isUsable() && OriginalZone.isUsable()); 1226 return true; 1227 } 1228 1229 /// Try to map as many scalars to unused array elements as possible. 1230 /// 1231 /// Multiple scalars might be mappable to intersecting unused array element 1232 /// zones, but we can only chose one. This is a greedy algorithm, therefore 1233 /// the first processed element claims it. 1234 void greedyCollapse() { 1235 bool Modified = false; 1236 1237 for (auto &Stmt : *S) { 1238 for (auto *MA : Stmt) { 1239 if (!MA->isLatestArrayKind()) 1240 continue; 1241 if (!MA->isWrite()) 1242 continue; 1243 1244 if (MA->isMayWrite()) { 1245 LLVM_DEBUG(dbgs() << "Access " << MA 1246 << " pruned because it is a MAY_WRITE\n"); 1247 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite", 1248 MA->getAccessInstruction()); 1249 R << "Skipped possible mapping target because it is not an " 1250 "unconditional overwrite"; 1251 S->getFunction().getContext().diagnose(R); 1252 continue; 1253 } 1254 1255 if (Stmt.getNumIterators() == 0) { 1256 LLVM_DEBUG(dbgs() << "Access " << MA 1257 << " pruned because it is not in a loop\n"); 1258 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop", 1259 MA->getAccessInstruction()); 1260 R << "skipped possible mapping target because it is not in a loop"; 1261 S->getFunction().getContext().diagnose(R); 1262 continue; 1263 } 1264 1265 if (isScalarAccess(MA)) { 1266 LLVM_DEBUG(dbgs() 1267 << "Access " << MA 1268 << " pruned because it writes only a single element\n"); 1269 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite", 1270 MA->getAccessInstruction()); 1271 R << "skipped possible mapping target because the memory location " 1272 "written to does not depend on its outer loop"; 1273 S->getFunction().getContext().diagnose(R); 1274 continue; 1275 } 1276 1277 if (!isa<StoreInst>(MA->getAccessInstruction())) { 1278 LLVM_DEBUG(dbgs() << "Access " << MA 1279 << " pruned because it is not a StoreInst\n"); 1280 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore", 1281 MA->getAccessInstruction()); 1282 R << "skipped possible mapping target because non-store instructions " 1283 "are not supported"; 1284 S->getFunction().getContext().diagnose(R); 1285 continue; 1286 } 1287 1288 // Check for more than one element acces per statement instance. 1289 // Currently we expect write accesses to be functional, eg. disallow 1290 // 1291 // { Stmt[0] -> [i] : 0 <= i < 2 } 1292 // 1293 // This may occur when some accesses to the element write/read only 1294 // parts of the element, eg. a single byte. Polly then divides each 1295 // element into subelements of the smallest access length, normal access 1296 // then touch multiple of such subelements. It is very common when the 1297 // array is accesses with memset, memcpy or memmove which take i8* 1298 // arguments. 1299 isl::union_map AccRel = MA->getLatestAccessRelation(); 1300 if (!AccRel.is_single_valued().is_true()) { 1301 LLVM_DEBUG(dbgs() << "Access " << MA 1302 << " is incompatible because it writes multiple " 1303 "elements per instance\n"); 1304 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel", 1305 MA->getAccessInstruction()); 1306 R << "skipped possible mapping target because it writes more than " 1307 "one element"; 1308 S->getFunction().getContext().diagnose(R); 1309 continue; 1310 } 1311 1312 isl::union_set TouchedElts = AccRel.range(); 1313 if (!TouchedElts.is_subset(CompatibleElts)) { 1314 LLVM_DEBUG( 1315 dbgs() 1316 << "Access " << MA 1317 << " is incompatible because it touches incompatible elements\n"); 1318 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts", 1319 MA->getAccessInstruction()); 1320 R << "skipped possible mapping target because a target location " 1321 "cannot be reliably analyzed"; 1322 S->getFunction().getContext().diagnose(R); 1323 continue; 1324 } 1325 1326 assert(isCompatibleAccess(MA)); 1327 NumberOfCompatibleTargets++; 1328 LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); 1329 if (collapseScalarsToStore(MA)) 1330 Modified = true; 1331 } 1332 } 1333 1334 if (Modified) 1335 DeLICMScopsModified++; 1336 } 1337 1338 /// Dump the internal information about a performed DeLICM to @p OS. 1339 void print(llvm::raw_ostream &OS, int Indent = 0) { 1340 if (!Zone.isUsable()) { 1341 OS.indent(Indent) << "Zone not computed\n"; 1342 return; 1343 } 1344 1345 printStatistics(OS, Indent); 1346 if (!isModified()) { 1347 OS.indent(Indent) << "No modification has been made\n"; 1348 return; 1349 } 1350 printAccesses(OS, Indent); 1351 } 1352 1353 /// Return whether at least one transformation been applied. 1354 bool isModified() const { return NumberOfTargetsMapped > 0; } 1355 }; 1356 1357 static std::unique_ptr<DeLICMImpl> collapseToUnused(Scop &S, LoopInfo &LI) { 1358 std::unique_ptr<DeLICMImpl> Impl = std::make_unique<DeLICMImpl>(&S, &LI); 1359 1360 if (!Impl->computeZone()) { 1361 LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); 1362 return Impl; 1363 } 1364 1365 LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); 1366 Impl->greedyCollapse(); 1367 1368 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1369 LLVM_DEBUG(dbgs() << S); 1370 1371 return Impl; 1372 } 1373 1374 static std::unique_ptr<DeLICMImpl> runDeLICM(Scop &S, LoopInfo &LI) { 1375 std::unique_ptr<DeLICMImpl> Impl = collapseToUnused(S, LI); 1376 1377 Scop::ScopStatistics ScopStats = S.getStatistics(); 1378 NumValueWrites += ScopStats.NumValueWrites; 1379 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1380 NumPHIWrites += ScopStats.NumPHIWrites; 1381 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1382 NumSingletonWrites += ScopStats.NumSingletonWrites; 1383 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1384 1385 return Impl; 1386 } 1387 1388 static PreservedAnalyses runDeLICMUsingNPM(Scop &S, ScopAnalysisManager &SAM, 1389 ScopStandardAnalysisResults &SAR, 1390 SPMUpdater &U, raw_ostream *OS) { 1391 LoopInfo &LI = SAR.LI; 1392 std::unique_ptr<DeLICMImpl> Impl = runDeLICM(S, LI); 1393 1394 if (OS) { 1395 *OS << "Printing analysis 'Polly - DeLICM/DePRE' for region: '" 1396 << S.getName() << "' in function '" << S.getFunction().getName() 1397 << "':\n"; 1398 if (Impl) { 1399 assert(Impl->getScop() == &S); 1400 1401 *OS << "DeLICM result:\n"; 1402 Impl->print(*OS); 1403 } 1404 } 1405 1406 if (!Impl->isModified()) 1407 return PreservedAnalyses::all(); 1408 1409 PreservedAnalyses PA; 1410 PA.preserveSet<AllAnalysesOn<Module>>(); 1411 PA.preserveSet<AllAnalysesOn<Function>>(); 1412 PA.preserveSet<AllAnalysesOn<Loop>>(); 1413 return PA; 1414 } 1415 1416 class DeLICMWrapperPass : public ScopPass { 1417 private: 1418 DeLICMWrapperPass(const DeLICMWrapperPass &) = delete; 1419 const DeLICMWrapperPass &operator=(const DeLICMWrapperPass &) = delete; 1420 1421 /// The pass implementation, also holding per-scop data. 1422 std::unique_ptr<DeLICMImpl> Impl; 1423 1424 public: 1425 static char ID; 1426 explicit DeLICMWrapperPass() : ScopPass(ID) {} 1427 1428 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 1429 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1430 AU.addRequired<LoopInfoWrapperPass>(); 1431 AU.setPreservesAll(); 1432 } 1433 1434 virtual bool runOnScop(Scop &S) override { 1435 // Free resources for previous scop's computation, if not yet done. 1436 releaseMemory(); 1437 1438 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1439 Impl = runDeLICM(S, LI); 1440 1441 return Impl->isModified(); 1442 } 1443 1444 virtual void printScop(raw_ostream &OS, Scop &S) const override { 1445 if (!Impl) 1446 return; 1447 assert(Impl->getScop() == &S); 1448 1449 OS << "DeLICM result:\n"; 1450 Impl->print(OS); 1451 } 1452 1453 virtual void releaseMemory() override { Impl.reset(); } 1454 }; 1455 1456 char DeLICMWrapperPass::ID; 1457 } // anonymous namespace 1458 1459 Pass *polly::createDeLICMWrapperPass() { return new DeLICMWrapperPass(); } 1460 1461 INITIALIZE_PASS_BEGIN(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE", 1462 false, false) 1463 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1464 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1465 INITIALIZE_PASS_END(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE", 1466 false, false) 1467 1468 llvm::PreservedAnalyses DeLICMPass::run(Scop &S, ScopAnalysisManager &SAM, 1469 ScopStandardAnalysisResults &SAR, 1470 SPMUpdater &U) { 1471 return runDeLICMUsingNPM(S, SAM, SAR, U, nullptr); 1472 } 1473 1474 llvm::PreservedAnalyses DeLICMPrinterPass::run(Scop &S, 1475 ScopAnalysisManager &SAM, 1476 ScopStandardAnalysisResults &SAR, 1477 SPMUpdater &U) { 1478 return runDeLICMUsingNPM(S, SAM, SAR, U, &OS); 1479 } 1480 1481 bool polly::isConflicting( 1482 isl::union_set ExistingOccupied, isl::union_set ExistingUnused, 1483 isl::union_map ExistingKnown, isl::union_map ExistingWrites, 1484 isl::union_set ProposedOccupied, isl::union_set ProposedUnused, 1485 isl::union_map ProposedKnown, isl::union_map ProposedWrites, 1486 llvm::raw_ostream *OS, unsigned Indent) { 1487 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), 1488 std::move(ExistingKnown), std::move(ExistingWrites)); 1489 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), 1490 std::move(ProposedKnown), std::move(ProposedWrites)); 1491 1492 return Knowledge::isConflicting(Existing, Proposed, OS, Indent); 1493 } 1494