1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Undo the effect of Loop Invariant Code Motion (LICM) and 10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level. 11 // 12 // Namely, remove register/scalar dependencies by mapping them back to array 13 // elements. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "polly/DeLICM.h" 18 #include "polly/LinkAllPasses.h" 19 #include "polly/Options.h" 20 #include "polly/ScopInfo.h" 21 #include "polly/ScopPass.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/ISLOStream.h" 24 #include "polly/Support/ISLTools.h" 25 #include "polly/ZoneAlgo.h" 26 #include "llvm/ADT/Statistic.h" 27 28 #define DEBUG_TYPE "polly-delicm" 29 30 using namespace polly; 31 using namespace llvm; 32 33 namespace { 34 35 cl::opt<int> 36 DelicmMaxOps("polly-delicm-max-ops", 37 cl::desc("Maximum number of isl operations to invest for " 38 "lifetime analysis; 0=no limit"), 39 cl::init(1000000), cl::cat(PollyCategory)); 40 41 cl::opt<bool> DelicmOverapproximateWrites( 42 "polly-delicm-overapproximate-writes", 43 cl::desc( 44 "Do more PHI writes than necessary in order to avoid partial accesses"), 45 cl::init(false), cl::Hidden, cl::cat(PollyCategory)); 46 47 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes", 48 cl::desc("Allow partial writes"), 49 cl::init(true), cl::Hidden, 50 cl::cat(PollyCategory)); 51 52 cl::opt<bool> 53 DelicmComputeKnown("polly-delicm-compute-known", 54 cl::desc("Compute known content of array elements"), 55 cl::init(true), cl::Hidden, cl::cat(PollyCategory)); 56 57 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); 58 STATISTIC(DeLICMOutOfQuota, 59 "Analyses aborted because max_operations was reached"); 60 STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); 61 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); 62 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); 63 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); 64 65 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM"); 66 STATISTIC(NumValueWritesInLoops, 67 "Number of scalar value writes nested in affine loops after DeLICM"); 68 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM"); 69 STATISTIC(NumPHIWritesInLoops, 70 "Number of scalar phi writes nested in affine loops after DeLICM"); 71 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM"); 72 STATISTIC(NumSingletonWritesInLoops, 73 "Number of singleton writes nested in affine loops after DeLICM"); 74 75 isl::union_map computeReachingOverwrite(isl::union_map Schedule, 76 isl::union_map Writes, 77 bool InclPrevWrite, 78 bool InclOverwrite) { 79 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite, 80 InclOverwrite); 81 } 82 83 /// Compute the next overwrite for a scalar. 84 /// 85 /// @param Schedule { DomainWrite[] -> Scatter[] } 86 /// Schedule of (at least) all writes. Instances not in @p 87 /// Writes are ignored. 88 /// @param Writes { DomainWrite[] } 89 /// The element instances that write to the scalar. 90 /// @param InclPrevWrite Whether to extend the timepoints to include 91 /// the timepoint where the previous write happens. 92 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint 93 /// of the overwrite itself. 94 /// 95 /// @return { Scatter[] -> DomainDef[] } 96 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule, 97 isl::union_set Writes, 98 bool InclPrevWrite, 99 bool InclOverwrite) { 100 101 // { DomainWrite[] } 102 auto WritesMap = isl::union_map::from_domain(Writes); 103 104 // { [Element[] -> Scatter[]] -> DomainWrite[] } 105 auto Result = computeReachingOverwrite( 106 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); 107 108 return Result.domain_factor_range(); 109 } 110 111 /// Overload of computeScalarReachingOverwrite, with only one writing statement. 112 /// Consequently, the result consists of only one map space. 113 /// 114 /// @param Schedule { DomainWrite[] -> Scatter[] } 115 /// @param Writes { DomainWrite[] } 116 /// @param InclPrevWrite Include the previous write to result. 117 /// @param InclOverwrite Include the overwrite to the result. 118 /// 119 /// @return { Scatter[] -> DomainWrite[] } 120 isl::map computeScalarReachingOverwrite(isl::union_map Schedule, 121 isl::set Writes, bool InclPrevWrite, 122 bool InclOverwrite) { 123 isl::space ScatterSpace = getScatterSpace(Schedule); 124 isl::space DomSpace = Writes.get_space(); 125 126 isl::union_map ReachOverwrite = computeScalarReachingOverwrite( 127 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite); 128 129 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace); 130 return singleton(std::move(ReachOverwrite), ResultSpace); 131 } 132 133 /// Try to find a 'natural' extension of a mapped to elements outside its 134 /// domain. 135 /// 136 /// @param Relevant The map with mapping that may not be modified. 137 /// @param Universe The domain to which @p Relevant needs to be extended. 138 /// 139 /// @return A map with that associates the domain elements of @p Relevant to the 140 /// same elements and in addition the elements of @p Universe to some 141 /// undefined elements. The function prefers to return simple maps. 142 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) { 143 Relevant = Relevant.coalesce(); 144 isl::union_set RelevantDomain = Relevant.domain(); 145 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain); 146 Simplified = Simplified.coalesce(); 147 return Simplified.intersect_domain(Universe); 148 } 149 150 /// Represent the knowledge of the contents of any array elements in any zone or 151 /// the knowledge we would add when mapping a scalar to an array element. 152 /// 153 /// Every array element at every zone unit has one of two states: 154 /// 155 /// - Unused: Not occupied by any value so a transformation can change it to 156 /// other values. 157 /// 158 /// - Occupied: The element contains a value that is still needed. 159 /// 160 /// The union of Unused and Unknown zones forms the universe, the set of all 161 /// elements at every timepoint. The universe can easily be derived from the 162 /// array elements that are accessed someway. Arrays that are never accessed 163 /// also never play a role in any computation and can hence be ignored. With a 164 /// given universe, only one of the sets needs to stored implicitly. Computing 165 /// the complement is also an expensive operation, hence this class has been 166 /// designed that only one of sets is needed while the other is assumed to be 167 /// implicit. It can still be given, but is mostly ignored. 168 /// 169 /// There are two use cases for the Knowledge class: 170 /// 171 /// 1) To represent the knowledge of the current state of ScopInfo. The unused 172 /// state means that an element is currently unused: there is no read of it 173 /// before the next overwrite. Also called 'Existing'. 174 /// 175 /// 2) To represent the requirements for mapping a scalar to array elements. The 176 /// unused state means that there is no change/requirement. Also called 177 /// 'Proposed'. 178 /// 179 /// In addition to these states at unit zones, Knowledge needs to know when 180 /// values are written. This is because written values may have no lifetime (one 181 /// reason is that the value is never read). Such writes would therefore never 182 /// conflict, but overwrite values that might still be required. Another source 183 /// of problems are multiple writes to the same element at the same timepoint, 184 /// because their order is undefined. 185 class Knowledge { 186 private: 187 /// { [Element[] -> Zone[]] } 188 /// Set of array elements and when they are alive. 189 /// Can contain a nullptr; in this case the set is implicitly defined as the 190 /// complement of #Unused. 191 /// 192 /// The set of alive array elements is represented as zone, as the set of live 193 /// values can differ depending on how the elements are interpreted. 194 /// Assuming a value X is written at timestep [0] and read at timestep [1] 195 /// without being used at any later point, then the value is alive in the 196 /// interval ]0,1[. This interval cannot be represented by an integer set, as 197 /// it does not contain any integer point. Zones allow us to represent this 198 /// interval and can be converted to sets of timepoints when needed (e.g., in 199 /// isConflicting when comparing to the write sets). 200 /// @see convertZoneToTimepoints and this file's comment for more details. 201 isl::union_set Occupied; 202 203 /// { [Element[] -> Zone[]] } 204 /// Set of array elements when they are not alive, i.e. their memory can be 205 /// used for other purposed. Can contain a nullptr; in this case the set is 206 /// implicitly defined as the complement of #Occupied. 207 isl::union_set Unused; 208 209 /// { [Element[] -> Zone[]] -> ValInst[] } 210 /// Maps to the known content for each array element at any interval. 211 /// 212 /// Any element/interval can map to multiple known elements. This is due to 213 /// multiple llvm::Value referring to the same content. Examples are 214 /// 215 /// - A value stored and loaded again. The LoadInst represents the same value 216 /// as the StoreInst's value operand. 217 /// 218 /// - A PHINode is equal to any one of the incoming values. In case of 219 /// LCSSA-form, it is always equal to its single incoming value. 220 /// 221 /// Two Knowledges are considered not conflicting if at least one of the known 222 /// values match. Not known values are not stored as an unnamed tuple (as 223 /// #Written does), but maps to nothing. 224 /// 225 /// Known values are usually just defined for #Occupied elements. Knowing 226 /// #Unused contents has no advantage as it can be overwritten. 227 isl::union_map Known; 228 229 /// { [Element[] -> Scatter[]] -> ValInst[] } 230 /// The write actions currently in the scop or that would be added when 231 /// mapping a scalar. Maps to the value that is written. 232 /// 233 /// Written values that cannot be identified are represented by an unknown 234 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself. 235 isl::union_map Written; 236 237 /// Check whether this Knowledge object is well-formed. 238 void checkConsistency() const { 239 #ifndef NDEBUG 240 // Default-initialized object 241 if (!Occupied && !Unused && !Known && !Written) 242 return; 243 244 assert(Occupied || Unused); 245 assert(Known); 246 assert(Written); 247 248 // If not all fields are defined, we cannot derived the universe. 249 if (!Occupied || !Unused) 250 return; 251 252 assert(Occupied.is_disjoint(Unused)); 253 auto Universe = Occupied.unite(Unused); 254 255 assert(!Known.domain().is_subset(Universe).is_false()); 256 assert(!Written.domain().is_subset(Universe).is_false()); 257 #endif 258 } 259 260 public: 261 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do 262 /// not use such an object. 263 Knowledge() {} 264 265 /// Create a new object with the given members. 266 Knowledge(isl::union_set Occupied, isl::union_set Unused, 267 isl::union_map Known, isl::union_map Written) 268 : Occupied(std::move(Occupied)), Unused(std::move(Unused)), 269 Known(std::move(Known)), Written(std::move(Written)) { 270 checkConsistency(); 271 } 272 273 /// Return whether this object was not default-constructed. 274 bool isUsable() const { return (Occupied || Unused) && Known && Written; } 275 276 /// Print the content of this object to @p OS. 277 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { 278 if (isUsable()) { 279 if (Occupied) 280 OS.indent(Indent) << "Occupied: " << Occupied << "\n"; 281 else 282 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n"; 283 if (Unused) 284 OS.indent(Indent) << "Unused: " << Unused << "\n"; 285 else 286 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n"; 287 OS.indent(Indent) << "Known: " << Known << "\n"; 288 OS.indent(Indent) << "Written : " << Written << '\n'; 289 } else { 290 OS.indent(Indent) << "Invalid knowledge\n"; 291 } 292 } 293 294 /// Combine two knowledges, this and @p That. 295 void learnFrom(Knowledge That) { 296 assert(!isConflicting(*this, That)); 297 assert(Unused && That.Occupied); 298 assert( 299 !That.Unused && 300 "This function is only prepared to learn occupied elements from That"); 301 assert(!Occupied && "This function does not implement " 302 "`this->Occupied = " 303 "this->Occupied.unite(That.Occupied);`"); 304 305 Unused = Unused.subtract(That.Occupied); 306 Known = Known.unite(That.Known); 307 Written = Written.unite(That.Written); 308 309 checkConsistency(); 310 } 311 312 /// Determine whether two Knowledges conflict with each other. 313 /// 314 /// In theory @p Existing and @p Proposed are symmetric, but the 315 /// implementation is constrained by the implicit interpretation. That is, @p 316 /// Existing must have #Unused defined (use case 1) and @p Proposed must have 317 /// #Occupied defined (use case 1). 318 /// 319 /// A conflict is defined as non-preserved semantics when they are merged. For 320 /// instance, when for the same array and zone they assume different 321 /// llvm::Values. 322 /// 323 /// @param Existing One of the knowledges with #Unused defined. 324 /// @param Proposed One of the knowledges with #Occupied defined. 325 /// @param OS Dump the conflict reason to this output stream; use 326 /// nullptr to not output anything. 327 /// @param Indent Indention for the conflict reason. 328 /// 329 /// @return True, iff the two knowledges are conflicting. 330 static bool isConflicting(const Knowledge &Existing, 331 const Knowledge &Proposed, 332 llvm::raw_ostream *OS = nullptr, 333 unsigned Indent = 0) { 334 assert(Existing.Unused); 335 assert(Proposed.Occupied); 336 337 #ifndef NDEBUG 338 if (Existing.Occupied && Proposed.Unused) { 339 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused); 340 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused); 341 assert(ExistingUniverse.is_equal(ProposedUniverse) && 342 "Both inputs' Knowledges must be over the same universe"); 343 } 344 #endif 345 346 // Do the Existing and Proposed lifetimes conflict? 347 // 348 // Lifetimes are described as the cross-product of array elements and zone 349 // intervals in which they are alive (the space { [Element[] -> Zone[]] }). 350 // In the following we call this "element/lifetime interval". 351 // 352 // In order to not conflict, one of the following conditions must apply for 353 // each element/lifetime interval: 354 // 355 // 1. If occupied in one of the knowledges, it is unused in the other. 356 // 357 // - or - 358 // 359 // 2. Both contain the same value. 360 // 361 // Instead of partitioning the element/lifetime intervals into a part that 362 // both Knowledges occupy (which requires an expensive subtraction) and for 363 // these to check whether they are known to be the same value, we check only 364 // the second condition and ensure that it also applies when then first 365 // condition is true. This is done by adding a wildcard value to 366 // Proposed.Known and Existing.Unused such that they match as a common known 367 // value. We use the "unknown ValInst" for this purpose. Every 368 // Existing.Unused may match with an unknown Proposed.Occupied because these 369 // never are in conflict with each other. 370 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied); 371 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal); 372 373 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused); 374 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal); 375 376 auto MatchingVals = ExistingValues.intersect(ProposedValues); 377 auto Matches = MatchingVals.domain(); 378 379 // Any Proposed.Occupied must either have a match between the known values 380 // of Existing and Occupied, or be in Existing.Unused. In the latter case, 381 // the previously added "AnyVal" will match each other. 382 if (!Proposed.Occupied.is_subset(Matches)) { 383 if (OS) { 384 auto Conflicting = Proposed.Occupied.subtract(Matches); 385 auto ExistingConflictingKnown = 386 Existing.Known.intersect_domain(Conflicting); 387 auto ProposedConflictingKnown = 388 Proposed.Known.intersect_domain(Conflicting); 389 390 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n"; 391 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n"; 392 if (!ExistingConflictingKnown.is_empty()) 393 OS->indent(Indent) 394 << "Existing Known: " << ExistingConflictingKnown << "\n"; 395 if (!ProposedConflictingKnown.is_empty()) 396 OS->indent(Indent) 397 << "Proposed Known: " << ProposedConflictingKnown << "\n"; 398 } 399 return true; 400 } 401 402 // Do the writes in Existing conflict with occupied values in Proposed? 403 // 404 // In order to not conflict, it must either write to unused lifetime or 405 // write the same value. To check, we remove the writes that write into 406 // Proposed.Unused (they never conflict) and then see whether the written 407 // value is already in Proposed.Known. If there are multiple known values 408 // and a written value is known under different names, it is enough when one 409 // of the written values (assuming that they are the same value under 410 // different names, e.g. a PHINode and one of the incoming values) matches 411 // one of the known names. 412 // 413 // We convert here the set of lifetimes to actual timepoints. A lifetime is 414 // in conflict with a set of write timepoints, if either a live timepoint is 415 // clearly within the lifetime or if a write happens at the beginning of the 416 // lifetime (where it would conflict with the value that actually writes the 417 // value alive). There is no conflict at the end of a lifetime, as the alive 418 // value will always be read, before it is overwritten again. The last 419 // property holds in Polly for all scalar values and we expect all users of 420 // Knowledge to check this property also for accesses to MemoryKind::Array. 421 auto ProposedFixedDefs = 422 convertZoneToTimepoints(Proposed.Occupied, true, false); 423 auto ProposedFixedKnown = 424 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false); 425 426 auto ExistingConflictingWrites = 427 Existing.Written.intersect_domain(ProposedFixedDefs); 428 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain(); 429 430 auto CommonWrittenVal = 431 ProposedFixedKnown.intersect(ExistingConflictingWrites); 432 auto CommonWrittenValDomain = CommonWrittenVal.domain(); 433 434 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) { 435 if (OS) { 436 auto ExistingConflictingWritten = 437 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain); 438 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain( 439 ExistingConflictingWritten.domain()); 440 441 OS->indent(Indent) 442 << "Proposed a lifetime where there is an Existing write into it\n"; 443 OS->indent(Indent) << "Existing conflicting writes: " 444 << ExistingConflictingWritten << "\n"; 445 if (!ProposedConflictingKnown.is_empty()) 446 OS->indent(Indent) 447 << "Proposed conflicting known: " << ProposedConflictingKnown 448 << "\n"; 449 } 450 return true; 451 } 452 453 // Do the writes in Proposed conflict with occupied values in Existing? 454 auto ExistingAvailableDefs = 455 convertZoneToTimepoints(Existing.Unused, true, false); 456 auto ExistingKnownDefs = 457 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false); 458 459 auto ProposedWrittenDomain = Proposed.Written.domain(); 460 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written); 461 auto IdenticalOrUnused = 462 ExistingAvailableDefs.unite(KnownIdentical.domain()); 463 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) { 464 if (OS) { 465 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused); 466 auto ExistingConflictingKnown = 467 ExistingKnownDefs.intersect_domain(Conflicting); 468 auto ProposedConflictingWritten = 469 Proposed.Written.intersect_domain(Conflicting); 470 471 OS->indent(Indent) << "Proposed writes into range used by Existing\n"; 472 OS->indent(Indent) << "Proposed conflicting writes: " 473 << ProposedConflictingWritten << "\n"; 474 if (!ExistingConflictingKnown.is_empty()) 475 OS->indent(Indent) 476 << "Existing conflicting known: " << ExistingConflictingKnown 477 << "\n"; 478 } 479 return true; 480 } 481 482 // Does Proposed write at the same time as Existing already does (order of 483 // writes is undefined)? Writing the same value is permitted. 484 auto ExistingWrittenDomain = Existing.Written.domain(); 485 auto BothWritten = 486 Existing.Written.domain().intersect(Proposed.Written.domain()); 487 auto ExistingKnownWritten = filterKnownValInst(Existing.Written); 488 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written); 489 auto CommonWritten = 490 ExistingKnownWritten.intersect(ProposedKnownWritten).domain(); 491 492 if (!BothWritten.is_subset(CommonWritten)) { 493 if (OS) { 494 auto Conflicting = BothWritten.subtract(CommonWritten); 495 auto ExistingConflictingWritten = 496 Existing.Written.intersect_domain(Conflicting); 497 auto ProposedConflictingWritten = 498 Proposed.Written.intersect_domain(Conflicting); 499 500 OS->indent(Indent) << "Proposed writes at the same time as an already " 501 "Existing write\n"; 502 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n"; 503 if (!ExistingConflictingWritten.is_empty()) 504 OS->indent(Indent) 505 << "Exiting write: " << ExistingConflictingWritten << "\n"; 506 if (!ProposedConflictingWritten.is_empty()) 507 OS->indent(Indent) 508 << "Proposed write: " << ProposedConflictingWritten << "\n"; 509 } 510 return true; 511 } 512 513 return false; 514 } 515 }; 516 517 /// Implementation of the DeLICM/DePRE transformation. 518 class DeLICMImpl : public ZoneAlgorithm { 519 private: 520 /// Knowledge before any transformation took place. 521 Knowledge OriginalZone; 522 523 /// Current knowledge of the SCoP including all already applied 524 /// transformations. 525 Knowledge Zone; 526 527 /// Number of StoreInsts something can be mapped to. 528 int NumberOfCompatibleTargets = 0; 529 530 /// The number of StoreInsts to which at least one value or PHI has been 531 /// mapped to. 532 int NumberOfTargetsMapped = 0; 533 534 /// The number of llvm::Value mapped to some array element. 535 int NumberOfMappedValueScalars = 0; 536 537 /// The number of PHIs mapped to some array element. 538 int NumberOfMappedPHIScalars = 0; 539 540 /// Determine whether two knowledges are conflicting with each other. 541 /// 542 /// @see Knowledge::isConflicting 543 bool isConflicting(const Knowledge &Proposed) { 544 raw_ostream *OS = nullptr; 545 LLVM_DEBUG(OS = &llvm::dbgs()); 546 return Knowledge::isConflicting(Zone, Proposed, OS, 4); 547 } 548 549 /// Determine whether @p SAI is a scalar that can be mapped to an array 550 /// element. 551 bool isMappable(const ScopArrayInfo *SAI) { 552 assert(SAI); 553 554 if (SAI->isValueKind()) { 555 auto *MA = S->getValueDef(SAI); 556 if (!MA) { 557 LLVM_DEBUG( 558 dbgs() 559 << " Reject because value is read-only within the scop\n"); 560 return false; 561 } 562 563 // Mapping if value is used after scop is not supported. The code 564 // generator would need to reload the scalar after the scop, but it 565 // does not have the information to where it is mapped to. Only the 566 // MemoryAccesses have that information, not the ScopArrayInfo. 567 auto Inst = MA->getAccessInstruction(); 568 for (auto User : Inst->users()) { 569 if (!isa<Instruction>(User)) 570 return false; 571 auto UserInst = cast<Instruction>(User); 572 573 if (!S->contains(UserInst)) { 574 LLVM_DEBUG(dbgs() << " Reject because value is escaping\n"); 575 return false; 576 } 577 } 578 579 return true; 580 } 581 582 if (SAI->isPHIKind()) { 583 auto *MA = S->getPHIRead(SAI); 584 assert(MA); 585 586 // Mapping of an incoming block from before the SCoP is not supported by 587 // the code generator. 588 auto PHI = cast<PHINode>(MA->getAccessInstruction()); 589 for (auto Incoming : PHI->blocks()) { 590 if (!S->contains(Incoming)) { 591 LLVM_DEBUG(dbgs() 592 << " Reject because at least one incoming block is " 593 "not in the scop region\n"); 594 return false; 595 } 596 } 597 598 return true; 599 } 600 601 LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); 602 return false; 603 } 604 605 /// Compute the uses of a MemoryKind::Value and its lifetime (from its 606 /// definition to the last use). 607 /// 608 /// @param SAI The ScopArrayInfo representing the value's storage. 609 /// 610 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } 611 /// First element is the set of uses for each definition. 612 /// The second is the lifetime of each definition. 613 std::tuple<isl::union_map, isl::map> 614 computeValueUses(const ScopArrayInfo *SAI) { 615 assert(SAI->isValueKind()); 616 617 // { DomainRead[] } 618 auto Reads = makeEmptyUnionSet(); 619 620 // Find all uses. 621 for (auto *MA : S->getValueUses(SAI)) 622 Reads = Reads.add_set(getDomainFor(MA)); 623 624 // { DomainRead[] -> Scatter[] } 625 auto ReadSchedule = getScatterFor(Reads); 626 627 auto *DefMA = S->getValueDef(SAI); 628 assert(DefMA); 629 630 // { DomainDef[] } 631 auto Writes = getDomainFor(DefMA); 632 633 // { DomainDef[] -> Scatter[] } 634 auto WriteScatter = getScatterFor(Writes); 635 636 // { Scatter[] -> DomainDef[] } 637 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); 638 639 // { [DomainDef[] -> Scatter[]] -> DomainUse[] } 640 auto Uses = isl::union_map(ReachDef.reverse().range_map()) 641 .apply_range(ReadSchedule.reverse()); 642 643 // { DomainDef[] -> Scatter[] } 644 auto UseScatter = 645 singleton(Uses.domain().unwrap(), 646 Writes.get_space().map_from_domain_and_range(ScatterSpace)); 647 648 // { DomainDef[] -> Zone[] } 649 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); 650 651 // { DomainDef[] -> DomainRead[] } 652 auto DefUses = Uses.domain_factor_domain(); 653 654 return std::make_pair(DefUses, Lifetime); 655 } 656 657 /// Try to map a MemoryKind::Value to a given array element. 658 /// 659 /// @param SAI Representation of the scalar's memory to map. 660 /// @param TargetElt { Scatter[] -> Element[] } 661 /// Suggestion where to map a scalar to when at a timepoint. 662 /// 663 /// @return true if the scalar was successfully mapped. 664 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) { 665 assert(SAI->isValueKind()); 666 667 auto *DefMA = S->getValueDef(SAI); 668 assert(DefMA->isValueKind()); 669 assert(DefMA->isMustWrite()); 670 auto *V = DefMA->getAccessValue(); 671 auto *DefInst = DefMA->getAccessInstruction(); 672 673 // Stop if the scalar has already been mapped. 674 if (!DefMA->getLatestScopArrayInfo()->isValueKind()) 675 return false; 676 677 // { DomainDef[] -> Scatter[] } 678 auto DefSched = getScatterFor(DefMA); 679 680 // Where each write is mapped to, according to the suggestion. 681 // { DomainDef[] -> Element[] } 682 auto DefTarget = TargetElt.apply_domain(DefSched.reverse()); 683 simplify(DefTarget); 684 LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); 685 686 auto OrigDomain = getDomainFor(DefMA); 687 auto MappedDomain = DefTarget.domain(); 688 if (!OrigDomain.is_subset(MappedDomain)) { 689 LLVM_DEBUG( 690 dbgs() 691 << " Reject because mapping does not encompass all instances\n"); 692 return false; 693 } 694 695 // { DomainDef[] -> Zone[] } 696 isl::map Lifetime; 697 698 // { DomainDef[] -> DomainUse[] } 699 isl::union_map DefUses; 700 701 std::tie(DefUses, Lifetime) = computeValueUses(SAI); 702 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); 703 704 /// { [Element[] -> Zone[]] } 705 auto EltZone = Lifetime.apply_domain(DefTarget).wrap(); 706 simplify(EltZone); 707 708 // When known knowledge is disabled, just return the unknown value. It will 709 // either get filtered out or conflict with itself. 710 // { DomainDef[] -> ValInst[] } 711 isl::map ValInst; 712 if (DelicmComputeKnown) 713 ValInst = makeValInst(V, DefMA->getStatement(), 714 LI->getLoopFor(DefInst->getParent())); 715 else 716 ValInst = makeUnknownForDomain(DefMA->getStatement()); 717 718 // { DomainDef[] -> [Element[] -> Zone[]] } 719 auto EltKnownTranslator = DefTarget.range_product(Lifetime); 720 721 // { [Element[] -> Zone[]] -> ValInst[] } 722 auto EltKnown = ValInst.apply_domain(EltKnownTranslator); 723 simplify(EltKnown); 724 725 // { DomainDef[] -> [Element[] -> Scatter[]] } 726 auto WrittenTranslator = DefTarget.range_product(DefSched); 727 728 // { [Element[] -> Scatter[]] -> ValInst[] } 729 auto DefEltSched = ValInst.apply_domain(WrittenTranslator); 730 simplify(DefEltSched); 731 732 Knowledge Proposed(EltZone, nullptr, filterKnownValInst(EltKnown), 733 DefEltSched); 734 if (isConflicting(Proposed)) 735 return false; 736 737 // { DomainUse[] -> Element[] } 738 auto UseTarget = DefUses.reverse().apply_range(DefTarget); 739 740 mapValue(SAI, std::move(DefTarget), std::move(UseTarget), 741 std::move(Lifetime), std::move(Proposed)); 742 return true; 743 } 744 745 /// After a scalar has been mapped, update the global knowledge. 746 void applyLifetime(Knowledge Proposed) { 747 Zone.learnFrom(std::move(Proposed)); 748 } 749 750 /// Map a MemoryKind::Value scalar to an array element. 751 /// 752 /// Callers must have ensured that the mapping is valid and not conflicting. 753 /// 754 /// @param SAI The ScopArrayInfo representing the scalar's memory to 755 /// map. 756 /// @param DefTarget { DomainDef[] -> Element[] } 757 /// The array element to map the scalar to. 758 /// @param UseTarget { DomainUse[] -> Element[] } 759 /// The array elements the uses are mapped to. 760 /// @param Lifetime { DomainDef[] -> Zone[] } 761 /// The lifetime of each llvm::Value definition for 762 /// reporting. 763 /// @param Proposed Mapping constraints for reporting. 764 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget, 765 isl::union_map UseTarget, isl::map Lifetime, 766 Knowledge Proposed) { 767 // Redirect the read accesses. 768 for (auto *MA : S->getValueUses(SAI)) { 769 // { DomainUse[] } 770 auto Domain = getDomainFor(MA); 771 772 // { DomainUse[] -> Element[] } 773 auto NewAccRel = UseTarget.intersect_domain(Domain); 774 simplify(NewAccRel); 775 776 assert(isl_union_map_n_map(NewAccRel.get()) == 1); 777 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel)); 778 } 779 780 auto *WA = S->getValueDef(SAI); 781 WA->setNewAccessRelation(DefTarget); 782 applyLifetime(Proposed); 783 784 MappedValueScalars++; 785 NumberOfMappedValueScalars += 1; 786 } 787 788 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope, 789 bool IsCertain = true) { 790 // When known knowledge is disabled, just return the unknown value. It will 791 // either get filtered out or conflict with itself. 792 if (!DelicmComputeKnown) 793 return makeUnknownForDomain(UserStmt); 794 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain); 795 } 796 797 /// Express the incoming values of a PHI for each incoming statement in an 798 /// isl::union_map. 799 /// 800 /// @param SAI The PHI scalar represented by a ScopArrayInfo. 801 /// 802 /// @return { PHIWriteDomain[] -> ValInst[] } 803 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) { 804 auto Result = makeEmptyUnionMap(); 805 806 // Collect the incoming values. 807 for (auto *MA : S->getPHIIncomings(SAI)) { 808 // { DomainWrite[] -> ValInst[] } 809 isl::union_map ValInst; 810 auto *WriteStmt = MA->getStatement(); 811 812 auto Incoming = MA->getIncoming(); 813 assert(!Incoming.empty()); 814 if (Incoming.size() == 1) { 815 ValInst = makeValInst(Incoming[0].second, WriteStmt, 816 LI->getLoopFor(Incoming[0].first)); 817 } else { 818 // If the PHI is in a subregion's exit node it can have multiple 819 // incoming values (+ maybe another incoming edge from an unrelated 820 // block). We cannot directly represent it as a single llvm::Value. 821 // We currently model it as unknown value, but modeling as the PHIInst 822 // itself could be OK, too. 823 ValInst = makeUnknownForDomain(WriteStmt); 824 } 825 826 Result = Result.unite(ValInst); 827 } 828 829 assert(Result.is_single_valued() && 830 "Cannot have multiple incoming values for same incoming statement"); 831 return Result; 832 } 833 834 /// Try to map a MemoryKind::PHI scalar to a given array element. 835 /// 836 /// @param SAI Representation of the scalar's memory to map. 837 /// @param TargetElt { Scatter[] -> Element[] } 838 /// Suggestion where to map the scalar to when at a 839 /// timepoint. 840 /// 841 /// @return true if the PHI scalar has been mapped. 842 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) { 843 auto *PHIRead = S->getPHIRead(SAI); 844 assert(PHIRead->isPHIKind()); 845 assert(PHIRead->isRead()); 846 847 // Skip if already been mapped. 848 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) 849 return false; 850 851 // { DomainRead[] -> Scatter[] } 852 auto PHISched = getScatterFor(PHIRead); 853 854 // { DomainRead[] -> Element[] } 855 auto PHITarget = PHISched.apply_range(TargetElt); 856 simplify(PHITarget); 857 LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); 858 859 auto OrigDomain = getDomainFor(PHIRead); 860 auto MappedDomain = PHITarget.domain(); 861 if (!OrigDomain.is_subset(MappedDomain)) { 862 LLVM_DEBUG( 863 dbgs() 864 << " Reject because mapping does not encompass all instances\n"); 865 return false; 866 } 867 868 // { DomainRead[] -> DomainWrite[] } 869 auto PerPHIWrites = computePerPHI(SAI); 870 871 // { DomainWrite[] -> Element[] } 872 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse(); 873 simplify(WritesTarget); 874 875 // { DomainWrite[] } 876 auto UniverseWritesDom = isl::union_set::empty(ParamSpace); 877 878 for (auto *MA : S->getPHIIncomings(SAI)) 879 UniverseWritesDom = UniverseWritesDom.add_set(getDomainFor(MA)); 880 881 auto RelevantWritesTarget = WritesTarget; 882 if (DelicmOverapproximateWrites) 883 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom); 884 885 auto ExpandedWritesDom = WritesTarget.domain(); 886 if (!DelicmPartialWrites && 887 !UniverseWritesDom.is_subset(ExpandedWritesDom)) { 888 LLVM_DEBUG( 889 dbgs() << " Reject because did not find PHI write mapping for " 890 "all instances\n"); 891 if (DelicmOverapproximateWrites) 892 LLVM_DEBUG(dbgs() << " Relevant Mapping: " 893 << RelevantWritesTarget << '\n'); 894 LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget 895 << '\n'); 896 LLVM_DEBUG(dbgs() << " Missing instances: " 897 << UniverseWritesDom.subtract(ExpandedWritesDom) 898 << '\n'); 899 return false; 900 } 901 902 // { DomainRead[] -> Scatter[] } 903 isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule); 904 isl::map PerPHIWriteScatter = 905 singleton(PerPHIWriteScatterUmap, PHISched.get_space()); 906 907 // { DomainRead[] -> Zone[] } 908 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); 909 simplify(Lifetime); 910 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); 911 912 // { DomainWrite[] -> Zone[] } 913 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites); 914 915 // { DomainWrite[] -> ValInst[] } 916 auto WrittenValue = determinePHIWrittenValues(SAI); 917 918 // { DomainWrite[] -> [Element[] -> Scatter[]] } 919 auto WrittenTranslator = WritesTarget.range_product(Schedule); 920 921 // { [Element[] -> Scatter[]] -> ValInst[] } 922 auto Written = WrittenValue.apply_domain(WrittenTranslator); 923 simplify(Written); 924 925 // { DomainWrite[] -> [Element[] -> Zone[]] } 926 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime); 927 928 // { DomainWrite[] -> ValInst[] } 929 auto WrittenKnownValue = filterKnownValInst(WrittenValue); 930 931 // { [Element[] -> Zone[]] -> ValInst[] } 932 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator); 933 simplify(EltLifetimeInst); 934 935 // { [Element[] -> Zone[] } 936 auto Occupied = LifetimeTranslator.range(); 937 simplify(Occupied); 938 939 Knowledge Proposed(Occupied, nullptr, EltLifetimeInst, Written); 940 if (isConflicting(Proposed)) 941 return false; 942 943 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget), 944 std::move(Lifetime), std::move(Proposed)); 945 return true; 946 } 947 948 /// Map a MemoryKind::PHI scalar to an array element. 949 /// 950 /// Callers must have ensured that the mapping is valid and not conflicting 951 /// with the common knowledge. 952 /// 953 /// @param SAI The ScopArrayInfo representing the scalar's memory to 954 /// map. 955 /// @param ReadTarget { DomainRead[] -> Element[] } 956 /// The array element to map the scalar to. 957 /// @param WriteTarget { DomainWrite[] -> Element[] } 958 /// New access target for each PHI incoming write. 959 /// @param Lifetime { DomainRead[] -> Zone[] } 960 /// The lifetime of each PHI for reporting. 961 /// @param Proposed Mapping constraints for reporting. 962 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget, 963 isl::union_map WriteTarget, isl::map Lifetime, 964 Knowledge Proposed) { 965 // { Element[] } 966 isl::space ElementSpace = ReadTarget.get_space().range(); 967 968 // Redirect the PHI incoming writes. 969 for (auto *MA : S->getPHIIncomings(SAI)) { 970 // { DomainWrite[] } 971 auto Domain = getDomainFor(MA); 972 973 // { DomainWrite[] -> Element[] } 974 auto NewAccRel = WriteTarget.intersect_domain(Domain); 975 simplify(NewAccRel); 976 977 isl::space NewAccRelSpace = 978 Domain.get_space().map_from_domain_and_range(ElementSpace); 979 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace); 980 MA->setNewAccessRelation(NewAccRelMap); 981 } 982 983 // Redirect the PHI read. 984 auto *PHIRead = S->getPHIRead(SAI); 985 PHIRead->setNewAccessRelation(ReadTarget); 986 applyLifetime(Proposed); 987 988 MappedPHIScalars++; 989 NumberOfMappedPHIScalars++; 990 } 991 992 /// Search and map scalars to memory overwritten by @p TargetStoreMA. 993 /// 994 /// Start trying to map scalars that are used in the same statement as the 995 /// store. For every successful mapping, try to also map scalars of the 996 /// statements where those are written. Repeat, until no more mapping 997 /// opportunity is found. 998 /// 999 /// There is currently no preference in which order scalars are tried. 1000 /// Ideally, we would direct it towards a load instruction of the same array 1001 /// element. 1002 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { 1003 assert(TargetStoreMA->isLatestArrayKind()); 1004 assert(TargetStoreMA->isMustWrite()); 1005 1006 auto TargetStmt = TargetStoreMA->getStatement(); 1007 1008 // { DomTarget[] } 1009 auto TargetDom = getDomainFor(TargetStmt); 1010 1011 // { DomTarget[] -> Element[] } 1012 auto TargetAccRel = getAccessRelationFor(TargetStoreMA); 1013 1014 // { Zone[] -> DomTarget[] } 1015 // For each point in time, find the next target store instance. 1016 auto Target = 1017 computeScalarReachingOverwrite(Schedule, TargetDom, false, true); 1018 1019 // { Zone[] -> Element[] } 1020 // Use the target store's write location as a suggestion to map scalars to. 1021 auto EltTarget = Target.apply_range(TargetAccRel); 1022 simplify(EltTarget); 1023 LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); 1024 1025 // Stack of elements not yet processed. 1026 SmallVector<MemoryAccess *, 16> Worklist; 1027 1028 // Set of scalars already tested. 1029 SmallPtrSet<const ScopArrayInfo *, 16> Closed; 1030 1031 // Lambda to add all scalar reads to the work list. 1032 auto ProcessAllIncoming = [&](ScopStmt *Stmt) { 1033 for (auto *MA : *Stmt) { 1034 if (!MA->isLatestScalarKind()) 1035 continue; 1036 if (!MA->isRead()) 1037 continue; 1038 1039 Worklist.push_back(MA); 1040 } 1041 }; 1042 1043 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0); 1044 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal)) 1045 Worklist.push_back(WrittenValInputMA); 1046 else 1047 ProcessAllIncoming(TargetStmt); 1048 1049 auto AnyMapped = false; 1050 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout(); 1051 auto StoreSize = 1052 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); 1053 1054 while (!Worklist.empty()) { 1055 auto *MA = Worklist.pop_back_val(); 1056 1057 auto *SAI = MA->getScopArrayInfo(); 1058 if (Closed.count(SAI)) 1059 continue; 1060 Closed.insert(SAI); 1061 LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI 1062 << ")\n"); 1063 1064 // Skip non-mappable scalars. 1065 if (!isMappable(SAI)) 1066 continue; 1067 1068 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); 1069 if (MASize > StoreSize) { 1070 LLVM_DEBUG( 1071 dbgs() << " Reject because storage size is insufficient\n"); 1072 continue; 1073 } 1074 1075 // Try to map MemoryKind::Value scalars. 1076 if (SAI->isValueKind()) { 1077 if (!tryMapValue(SAI, EltTarget)) 1078 continue; 1079 1080 auto *DefAcc = S->getValueDef(SAI); 1081 ProcessAllIncoming(DefAcc->getStatement()); 1082 1083 AnyMapped = true; 1084 continue; 1085 } 1086 1087 // Try to map MemoryKind::PHI scalars. 1088 if (SAI->isPHIKind()) { 1089 if (!tryMapPHI(SAI, EltTarget)) 1090 continue; 1091 // Add inputs of all incoming statements to the worklist. Prefer the 1092 // input accesses of the incoming blocks. 1093 for (auto *PHIWrite : S->getPHIIncomings(SAI)) { 1094 auto *PHIWriteStmt = PHIWrite->getStatement(); 1095 bool FoundAny = false; 1096 for (auto Incoming : PHIWrite->getIncoming()) { 1097 auto *IncomingInputMA = 1098 PHIWriteStmt->lookupInputAccessOf(Incoming.second); 1099 if (!IncomingInputMA) 1100 continue; 1101 1102 Worklist.push_back(IncomingInputMA); 1103 FoundAny = true; 1104 } 1105 1106 if (!FoundAny) 1107 ProcessAllIncoming(PHIWrite->getStatement()); 1108 } 1109 1110 AnyMapped = true; 1111 continue; 1112 } 1113 } 1114 1115 if (AnyMapped) { 1116 TargetsMapped++; 1117 NumberOfTargetsMapped++; 1118 } 1119 return AnyMapped; 1120 } 1121 1122 /// Compute when an array element is unused. 1123 /// 1124 /// @return { [Element[] -> Zone[]] } 1125 isl::union_set computeLifetime() const { 1126 // { Element[] -> Zone[] } 1127 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, 1128 false, false, true); 1129 1130 auto Result = ArrayUnused.wrap(); 1131 1132 simplify(Result); 1133 return Result; 1134 } 1135 1136 /// Determine when an array element is written to, and which value instance is 1137 /// written. 1138 /// 1139 /// @return { [Element[] -> Scatter[]] -> ValInst[] } 1140 isl::union_map computeWritten() const { 1141 // { [Element[] -> Scatter[]] -> ValInst[] } 1142 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule); 1143 1144 simplify(EltWritten); 1145 return EltWritten; 1146 } 1147 1148 /// Determine whether an access touches at most one element. 1149 /// 1150 /// The accessed element could be a scalar or accessing an array with constant 1151 /// subscript, such that all instances access only that element. 1152 /// 1153 /// @param MA The access to test. 1154 /// 1155 /// @return True, if zero or one elements are accessed; False if at least two 1156 /// different elements are accessed. 1157 bool isScalarAccess(MemoryAccess *MA) { 1158 auto Map = getAccessRelationFor(MA); 1159 auto Set = Map.range(); 1160 return Set.is_singleton(); 1161 } 1162 1163 /// Print mapping statistics to @p OS. 1164 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { 1165 OS.indent(Indent) << "Statistics {\n"; 1166 OS.indent(Indent + 4) << "Compatible overwrites: " 1167 << NumberOfCompatibleTargets << "\n"; 1168 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped 1169 << '\n'; 1170 OS.indent(Indent + 4) << "Value scalars mapped: " 1171 << NumberOfMappedValueScalars << '\n'; 1172 OS.indent(Indent + 4) << "PHI scalars mapped: " 1173 << NumberOfMappedPHIScalars << '\n'; 1174 OS.indent(Indent) << "}\n"; 1175 } 1176 1177 /// Return whether at least one transformation been applied. 1178 bool isModified() const { return NumberOfTargetsMapped > 0; } 1179 1180 public: 1181 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {} 1182 1183 /// Calculate the lifetime (definition to last use) of every array element. 1184 /// 1185 /// @return True if the computed lifetimes (#Zone) is usable. 1186 bool computeZone() { 1187 // Check that nothing strange occurs. 1188 collectCompatibleElts(); 1189 1190 isl::union_set EltUnused; 1191 isl::union_map EltKnown, EltWritten; 1192 1193 { 1194 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); 1195 1196 computeCommon(); 1197 1198 EltUnused = computeLifetime(); 1199 EltKnown = computeKnown(true, false); 1200 EltWritten = computeWritten(); 1201 } 1202 DeLICMAnalyzed++; 1203 1204 if (!EltUnused || !EltKnown || !EltWritten) { 1205 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota && 1206 "The only reason that these things have not been computed should " 1207 "be if the max-operations limit hit"); 1208 DeLICMOutOfQuota++; 1209 LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); 1210 DebugLoc Begin, End; 1211 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End); 1212 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin, 1213 S->getEntry()); 1214 R << "maximal number of operations exceeded during zone analysis"; 1215 S->getFunction().getContext().diagnose(R); 1216 return false; 1217 } 1218 1219 Zone = OriginalZone = Knowledge(nullptr, EltUnused, EltKnown, EltWritten); 1220 LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); 1221 1222 assert(Zone.isUsable() && OriginalZone.isUsable()); 1223 return true; 1224 } 1225 1226 /// Try to map as many scalars to unused array elements as possible. 1227 /// 1228 /// Multiple scalars might be mappable to intersecting unused array element 1229 /// zones, but we can only chose one. This is a greedy algorithm, therefore 1230 /// the first processed element claims it. 1231 void greedyCollapse() { 1232 bool Modified = false; 1233 1234 for (auto &Stmt : *S) { 1235 for (auto *MA : Stmt) { 1236 if (!MA->isLatestArrayKind()) 1237 continue; 1238 if (!MA->isWrite()) 1239 continue; 1240 1241 if (MA->isMayWrite()) { 1242 LLVM_DEBUG(dbgs() << "Access " << MA 1243 << " pruned because it is a MAY_WRITE\n"); 1244 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite", 1245 MA->getAccessInstruction()); 1246 R << "Skipped possible mapping target because it is not an " 1247 "unconditional overwrite"; 1248 S->getFunction().getContext().diagnose(R); 1249 continue; 1250 } 1251 1252 if (Stmt.getNumIterators() == 0) { 1253 LLVM_DEBUG(dbgs() << "Access " << MA 1254 << " pruned because it is not in a loop\n"); 1255 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop", 1256 MA->getAccessInstruction()); 1257 R << "skipped possible mapping target because it is not in a loop"; 1258 S->getFunction().getContext().diagnose(R); 1259 continue; 1260 } 1261 1262 if (isScalarAccess(MA)) { 1263 LLVM_DEBUG(dbgs() 1264 << "Access " << MA 1265 << " pruned because it writes only a single element\n"); 1266 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite", 1267 MA->getAccessInstruction()); 1268 R << "skipped possible mapping target because the memory location " 1269 "written to does not depend on its outer loop"; 1270 S->getFunction().getContext().diagnose(R); 1271 continue; 1272 } 1273 1274 if (!isa<StoreInst>(MA->getAccessInstruction())) { 1275 LLVM_DEBUG(dbgs() << "Access " << MA 1276 << " pruned because it is not a StoreInst\n"); 1277 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore", 1278 MA->getAccessInstruction()); 1279 R << "skipped possible mapping target because non-store instructions " 1280 "are not supported"; 1281 S->getFunction().getContext().diagnose(R); 1282 continue; 1283 } 1284 1285 // Check for more than one element acces per statement instance. 1286 // Currently we expect write accesses to be functional, eg. disallow 1287 // 1288 // { Stmt[0] -> [i] : 0 <= i < 2 } 1289 // 1290 // This may occur when some accesses to the element write/read only 1291 // parts of the element, eg. a single byte. Polly then divides each 1292 // element into subelements of the smallest access length, normal access 1293 // then touch multiple of such subelements. It is very common when the 1294 // array is accesses with memset, memcpy or memmove which take i8* 1295 // arguments. 1296 isl::union_map AccRel = MA->getLatestAccessRelation(); 1297 if (!AccRel.is_single_valued().is_true()) { 1298 LLVM_DEBUG(dbgs() << "Access " << MA 1299 << " is incompatible because it writes multiple " 1300 "elements per instance\n"); 1301 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel", 1302 MA->getAccessInstruction()); 1303 R << "skipped possible mapping target because it writes more than " 1304 "one element"; 1305 S->getFunction().getContext().diagnose(R); 1306 continue; 1307 } 1308 1309 isl::union_set TouchedElts = AccRel.range(); 1310 if (!TouchedElts.is_subset(CompatibleElts)) { 1311 LLVM_DEBUG( 1312 dbgs() 1313 << "Access " << MA 1314 << " is incompatible because it touches incompatible elements\n"); 1315 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts", 1316 MA->getAccessInstruction()); 1317 R << "skipped possible mapping target because a target location " 1318 "cannot be reliably analyzed"; 1319 S->getFunction().getContext().diagnose(R); 1320 continue; 1321 } 1322 1323 assert(isCompatibleAccess(MA)); 1324 NumberOfCompatibleTargets++; 1325 LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); 1326 if (collapseScalarsToStore(MA)) 1327 Modified = true; 1328 } 1329 } 1330 1331 if (Modified) 1332 DeLICMScopsModified++; 1333 } 1334 1335 /// Dump the internal information about a performed DeLICM to @p OS. 1336 void print(llvm::raw_ostream &OS, int Indent = 0) { 1337 if (!Zone.isUsable()) { 1338 OS.indent(Indent) << "Zone not computed\n"; 1339 return; 1340 } 1341 1342 printStatistics(OS, Indent); 1343 if (!isModified()) { 1344 OS.indent(Indent) << "No modification has been made\n"; 1345 return; 1346 } 1347 printAccesses(OS, Indent); 1348 } 1349 }; 1350 1351 class DeLICM : public ScopPass { 1352 private: 1353 DeLICM(const DeLICM &) = delete; 1354 const DeLICM &operator=(const DeLICM &) = delete; 1355 1356 /// The pass implementation, also holding per-scop data. 1357 std::unique_ptr<DeLICMImpl> Impl; 1358 1359 void collapseToUnused(Scop &S) { 1360 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1361 Impl = std::make_unique<DeLICMImpl>(&S, &LI); 1362 1363 if (!Impl->computeZone()) { 1364 LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); 1365 return; 1366 } 1367 1368 LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); 1369 Impl->greedyCollapse(); 1370 1371 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1372 LLVM_DEBUG(dbgs() << S); 1373 } 1374 1375 public: 1376 static char ID; 1377 explicit DeLICM() : ScopPass(ID) {} 1378 1379 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 1380 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1381 AU.addRequired<LoopInfoWrapperPass>(); 1382 AU.setPreservesAll(); 1383 } 1384 1385 virtual bool runOnScop(Scop &S) override { 1386 // Free resources for previous scop's computation, if not yet done. 1387 releaseMemory(); 1388 1389 collapseToUnused(S); 1390 1391 auto ScopStats = S.getStatistics(); 1392 NumValueWrites += ScopStats.NumValueWrites; 1393 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1394 NumPHIWrites += ScopStats.NumPHIWrites; 1395 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1396 NumSingletonWrites += ScopStats.NumSingletonWrites; 1397 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1398 1399 return false; 1400 } 1401 1402 virtual void printScop(raw_ostream &OS, Scop &S) const override { 1403 if (!Impl) 1404 return; 1405 assert(Impl->getScop() == &S); 1406 1407 OS << "DeLICM result:\n"; 1408 Impl->print(OS); 1409 } 1410 1411 virtual void releaseMemory() override { Impl.reset(); } 1412 }; 1413 1414 char DeLICM::ID; 1415 } // anonymous namespace 1416 1417 Pass *polly::createDeLICMPass() { return new DeLICM(); } 1418 1419 INITIALIZE_PASS_BEGIN(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, 1420 false) 1421 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1422 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1423 INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, 1424 false) 1425 1426 bool polly::isConflicting( 1427 isl::union_set ExistingOccupied, isl::union_set ExistingUnused, 1428 isl::union_map ExistingKnown, isl::union_map ExistingWrites, 1429 isl::union_set ProposedOccupied, isl::union_set ProposedUnused, 1430 isl::union_map ProposedKnown, isl::union_map ProposedWrites, 1431 llvm::raw_ostream *OS, unsigned Indent) { 1432 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), 1433 std::move(ExistingKnown), std::move(ExistingWrites)); 1434 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), 1435 std::move(ProposedKnown), std::move(ProposedWrites)); 1436 1437 return Knowledge::isConflicting(Existing, Proposed, OS, Indent); 1438 } 1439