1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Undo the effect of Loop Invariant Code Motion (LICM) and 11 // GVN Partial Redundancy Elimination (PRE) on SCoP-level. 12 // 13 // Namely, remove register/scalar dependencies by mapping them back to array 14 // elements. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "polly/DeLICM.h" 19 #include "polly/Options.h" 20 #include "polly/ScopInfo.h" 21 #include "polly/ScopPass.h" 22 #include "polly/Support/ISLOStream.h" 23 #include "polly/Support/ISLTools.h" 24 #include "polly/ZoneAlgo.h" 25 #include "llvm/ADT/Statistic.h" 26 #define DEBUG_TYPE "polly-delicm" 27 28 using namespace polly; 29 using namespace llvm; 30 31 namespace { 32 33 cl::opt<int> 34 DelicmMaxOps("polly-delicm-max-ops", 35 cl::desc("Maximum number of isl operations to invest for " 36 "lifetime analysis; 0=no limit"), 37 cl::init(1000000), cl::cat(PollyCategory)); 38 39 cl::opt<bool> DelicmOverapproximateWrites( 40 "polly-delicm-overapproximate-writes", 41 cl::desc( 42 "Do more PHI writes than necessary in order to avoid partial accesses"), 43 cl::init(false), cl::Hidden, cl::cat(PollyCategory)); 44 45 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes", 46 cl::desc("Allow partial writes"), 47 cl::init(true), cl::Hidden, 48 cl::cat(PollyCategory)); 49 50 cl::opt<bool> 51 DelicmComputeKnown("polly-delicm-compute-known", 52 cl::desc("Compute known content of array elements"), 53 cl::init(true), cl::Hidden, cl::cat(PollyCategory)); 54 55 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); 56 STATISTIC(DeLICMOutOfQuota, 57 "Analyses aborted because max_operations was reached"); 58 STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); 59 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); 60 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); 61 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); 62 63 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM"); 64 STATISTIC(NumValueWritesInLoops, 65 "Number of scalar value writes nested in affine loops after DeLICM"); 66 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM"); 67 STATISTIC(NumPHIWritesInLoops, 68 "Number of scalar phi writes nested in affine loops after DeLICM"); 69 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM"); 70 STATISTIC(NumSingletonWritesInLoops, 71 "Number of singleton writes nested in affine loops after DeLICM"); 72 73 isl::union_map computeReachingOverwrite(isl::union_map Schedule, 74 isl::union_map Writes, 75 bool InclPrevWrite, 76 bool InclOverwrite) { 77 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite, 78 InclOverwrite); 79 } 80 81 /// Compute the next overwrite for a scalar. 82 /// 83 /// @param Schedule { DomainWrite[] -> Scatter[] } 84 /// Schedule of (at least) all writes. Instances not in @p 85 /// Writes are ignored. 86 /// @param Writes { DomainWrite[] } 87 /// The element instances that write to the scalar. 88 /// @param InclPrevWrite Whether to extend the timepoints to include 89 /// the timepoint where the previous write happens. 90 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint 91 /// of the overwrite itself. 92 /// 93 /// @return { Scatter[] -> DomainDef[] } 94 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule, 95 isl::union_set Writes, 96 bool InclPrevWrite, 97 bool InclOverwrite) { 98 99 // { DomainWrite[] } 100 auto WritesMap = isl::union_map::from_domain(Writes); 101 102 // { [Element[] -> Scatter[]] -> DomainWrite[] } 103 auto Result = computeReachingOverwrite( 104 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); 105 106 return Result.domain_factor_range(); 107 } 108 109 /// Overload of computeScalarReachingOverwrite, with only one writing statement. 110 /// Consequently, the result consists of only one map space. 111 /// 112 /// @param Schedule { DomainWrite[] -> Scatter[] } 113 /// @param Writes { DomainWrite[] } 114 /// @param InclPrevWrite Include the previous write to result. 115 /// @param InclOverwrite Include the overwrite to the result. 116 /// 117 /// @return { Scatter[] -> DomainWrite[] } 118 isl::map computeScalarReachingOverwrite(isl::union_map Schedule, 119 isl::set Writes, bool InclPrevWrite, 120 bool InclOverwrite) { 121 isl::space ScatterSpace = getScatterSpace(Schedule); 122 isl::space DomSpace = Writes.get_space(); 123 124 isl::union_map ReachOverwrite = computeScalarReachingOverwrite( 125 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite); 126 127 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace); 128 return singleton(std::move(ReachOverwrite), ResultSpace); 129 } 130 131 /// Try to find a 'natural' extension of a mapped to elements outside its 132 /// domain. 133 /// 134 /// @param Relevant The map with mapping that may not be modified. 135 /// @param Universe The domain to which @p Relevant needs to be extended. 136 /// 137 /// @return A map with that associates the domain elements of @p Relevant to the 138 /// same elements and in addition the elements of @p Universe to some 139 /// undefined elements. The function prefers to return simple maps. 140 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) { 141 Relevant = Relevant.coalesce(); 142 isl::union_set RelevantDomain = Relevant.domain(); 143 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain); 144 Simplified = Simplified.coalesce(); 145 return Simplified.intersect_domain(Universe); 146 } 147 148 /// Represent the knowledge of the contents of any array elements in any zone or 149 /// the knowledge we would add when mapping a scalar to an array element. 150 /// 151 /// Every array element at every zone unit has one of two states: 152 /// 153 /// - Unused: Not occupied by any value so a transformation can change it to 154 /// other values. 155 /// 156 /// - Occupied: The element contains a value that is still needed. 157 /// 158 /// The union of Unused and Unknown zones forms the universe, the set of all 159 /// elements at every timepoint. The universe can easily be derived from the 160 /// array elements that are accessed someway. Arrays that are never accessed 161 /// also never play a role in any computation and can hence be ignored. With a 162 /// given universe, only one of the sets needs to stored implicitly. Computing 163 /// the complement is also an expensive operation, hence this class has been 164 /// designed that only one of sets is needed while the other is assumed to be 165 /// implicit. It can still be given, but is mostly ignored. 166 /// 167 /// There are two use cases for the Knowledge class: 168 /// 169 /// 1) To represent the knowledge of the current state of ScopInfo. The unused 170 /// state means that an element is currently unused: there is no read of it 171 /// before the next overwrite. Also called 'Existing'. 172 /// 173 /// 2) To represent the requirements for mapping a scalar to array elements. The 174 /// unused state means that there is no change/requirement. Also called 175 /// 'Proposed'. 176 /// 177 /// In addition to these states at unit zones, Knowledge needs to know when 178 /// values are written. This is because written values may have no lifetime (one 179 /// reason is that the value is never read). Such writes would therefore never 180 /// conflict, but overwrite values that might still be required. Another source 181 /// of problems are multiple writes to the same element at the same timepoint, 182 /// because their order is undefined. 183 class Knowledge { 184 private: 185 /// { [Element[] -> Zone[]] } 186 /// Set of array elements and when they are alive. 187 /// Can contain a nullptr; in this case the set is implicitly defined as the 188 /// complement of #Unused. 189 /// 190 /// The set of alive array elements is represented as zone, as the set of live 191 /// values can differ depending on how the elements are interpreted. 192 /// Assuming a value X is written at timestep [0] and read at timestep [1] 193 /// without being used at any later point, then the value is alive in the 194 /// interval ]0,1[. This interval cannot be represented by an integer set, as 195 /// it does not contain any integer point. Zones allow us to represent this 196 /// interval and can be converted to sets of timepoints when needed (e.g., in 197 /// isConflicting when comparing to the write sets). 198 /// @see convertZoneToTimepoints and this file's comment for more details. 199 isl::union_set Occupied; 200 201 /// { [Element[] -> Zone[]] } 202 /// Set of array elements when they are not alive, i.e. their memory can be 203 /// used for other purposed. Can contain a nullptr; in this case the set is 204 /// implicitly defined as the complement of #Occupied. 205 isl::union_set Unused; 206 207 /// { [Element[] -> Zone[]] -> ValInst[] } 208 /// Maps to the known content for each array element at any interval. 209 /// 210 /// Any element/interval can map to multiple known elements. This is due to 211 /// multiple llvm::Value referring to the same content. Examples are 212 /// 213 /// - A value stored and loaded again. The LoadInst represents the same value 214 /// as the StoreInst's value operand. 215 /// 216 /// - A PHINode is equal to any one of the incoming values. In case of 217 /// LCSSA-form, it is always equal to its single incoming value. 218 /// 219 /// Two Knowledges are considered not conflicting if at least one of the known 220 /// values match. Not known values are not stored as an unnamed tuple (as 221 /// #Written does), but maps to nothing. 222 /// 223 /// Known values are usually just defined for #Occupied elements. Knowing 224 /// #Unused contents has no advantage as it can be overwritten. 225 isl::union_map Known; 226 227 /// { [Element[] -> Scatter[]] -> ValInst[] } 228 /// The write actions currently in the scop or that would be added when 229 /// mapping a scalar. Maps to the value that is written. 230 /// 231 /// Written values that cannot be identified are represented by an unknown 232 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself. 233 isl::union_map Written; 234 235 /// Check whether this Knowledge object is well-formed. 236 void checkConsistency() const { 237 #ifndef NDEBUG 238 // Default-initialized object 239 if (!Occupied && !Unused && !Known && !Written) 240 return; 241 242 assert(Occupied || Unused); 243 assert(Known); 244 assert(Written); 245 246 // If not all fields are defined, we cannot derived the universe. 247 if (!Occupied || !Unused) 248 return; 249 250 assert(Occupied.is_disjoint(Unused)); 251 auto Universe = Occupied.unite(Unused); 252 253 assert(!Known.domain().is_subset(Universe).is_false()); 254 assert(!Written.domain().is_subset(Universe).is_false()); 255 #endif 256 } 257 258 public: 259 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do 260 /// not use such an object. 261 Knowledge() {} 262 263 /// Create a new object with the given members. 264 Knowledge(isl::union_set Occupied, isl::union_set Unused, 265 isl::union_map Known, isl::union_map Written) 266 : Occupied(std::move(Occupied)), Unused(std::move(Unused)), 267 Known(std::move(Known)), Written(std::move(Written)) { 268 checkConsistency(); 269 } 270 271 /// Return whether this object was not default-constructed. 272 bool isUsable() const { return (Occupied || Unused) && Known && Written; } 273 274 /// Print the content of this object to @p OS. 275 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { 276 if (isUsable()) { 277 if (Occupied) 278 OS.indent(Indent) << "Occupied: " << Occupied << "\n"; 279 else 280 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n"; 281 if (Unused) 282 OS.indent(Indent) << "Unused: " << Unused << "\n"; 283 else 284 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n"; 285 OS.indent(Indent) << "Known: " << Known << "\n"; 286 OS.indent(Indent) << "Written : " << Written << '\n'; 287 } else { 288 OS.indent(Indent) << "Invalid knowledge\n"; 289 } 290 } 291 292 /// Combine two knowledges, this and @p That. 293 void learnFrom(Knowledge That) { 294 assert(!isConflicting(*this, That)); 295 assert(Unused && That.Occupied); 296 assert( 297 !That.Unused && 298 "This function is only prepared to learn occupied elements from That"); 299 assert(!Occupied && "This function does not implement " 300 "`this->Occupied = " 301 "this->Occupied.unite(That.Occupied);`"); 302 303 Unused = Unused.subtract(That.Occupied); 304 Known = Known.unite(That.Known); 305 Written = Written.unite(That.Written); 306 307 checkConsistency(); 308 } 309 310 /// Determine whether two Knowledges conflict with each other. 311 /// 312 /// In theory @p Existing and @p Proposed are symmetric, but the 313 /// implementation is constrained by the implicit interpretation. That is, @p 314 /// Existing must have #Unused defined (use case 1) and @p Proposed must have 315 /// #Occupied defined (use case 1). 316 /// 317 /// A conflict is defined as non-preserved semantics when they are merged. For 318 /// instance, when for the same array and zone they assume different 319 /// llvm::Values. 320 /// 321 /// @param Existing One of the knowledges with #Unused defined. 322 /// @param Proposed One of the knowledges with #Occupied defined. 323 /// @param OS Dump the conflict reason to this output stream; use 324 /// nullptr to not output anything. 325 /// @param Indent Indention for the conflict reason. 326 /// 327 /// @return True, iff the two knowledges are conflicting. 328 static bool isConflicting(const Knowledge &Existing, 329 const Knowledge &Proposed, 330 llvm::raw_ostream *OS = nullptr, 331 unsigned Indent = 0) { 332 assert(Existing.Unused); 333 assert(Proposed.Occupied); 334 335 #ifndef NDEBUG 336 if (Existing.Occupied && Proposed.Unused) { 337 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused); 338 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused); 339 assert(ExistingUniverse.is_equal(ProposedUniverse) && 340 "Both inputs' Knowledges must be over the same universe"); 341 } 342 #endif 343 344 // Do the Existing and Proposed lifetimes conflict? 345 // 346 // Lifetimes are described as the cross-product of array elements and zone 347 // intervals in which they are alive (the space { [Element[] -> Zone[]] }). 348 // In the following we call this "element/lifetime interval". 349 // 350 // In order to not conflict, one of the following conditions must apply for 351 // each element/lifetime interval: 352 // 353 // 1. If occupied in one of the knowledges, it is unused in the other. 354 // 355 // - or - 356 // 357 // 2. Both contain the same value. 358 // 359 // Instead of partitioning the element/lifetime intervals into a part that 360 // both Knowledges occupy (which requires an expensive subtraction) and for 361 // these to check whether they are known to be the same value, we check only 362 // the second condition and ensure that it also applies when then first 363 // condition is true. This is done by adding a wildcard value to 364 // Proposed.Known and Existing.Unused such that they match as a common known 365 // value. We use the "unknown ValInst" for this purpose. Every 366 // Existing.Unused may match with an unknown Proposed.Occupied because these 367 // never are in conflict with each other. 368 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied); 369 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal); 370 371 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused); 372 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal); 373 374 auto MatchingVals = ExistingValues.intersect(ProposedValues); 375 auto Matches = MatchingVals.domain(); 376 377 // Any Proposed.Occupied must either have a match between the known values 378 // of Existing and Occupied, or be in Existing.Unused. In the latter case, 379 // the previously added "AnyVal" will match each other. 380 if (!Proposed.Occupied.is_subset(Matches)) { 381 if (OS) { 382 auto Conflicting = Proposed.Occupied.subtract(Matches); 383 auto ExistingConflictingKnown = 384 Existing.Known.intersect_domain(Conflicting); 385 auto ProposedConflictingKnown = 386 Proposed.Known.intersect_domain(Conflicting); 387 388 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n"; 389 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n"; 390 if (!ExistingConflictingKnown.is_empty()) 391 OS->indent(Indent) 392 << "Existing Known: " << ExistingConflictingKnown << "\n"; 393 if (!ProposedConflictingKnown.is_empty()) 394 OS->indent(Indent) 395 << "Proposed Known: " << ProposedConflictingKnown << "\n"; 396 } 397 return true; 398 } 399 400 // Do the writes in Existing conflict with occupied values in Proposed? 401 // 402 // In order to not conflict, it must either write to unused lifetime or 403 // write the same value. To check, we remove the writes that write into 404 // Proposed.Unused (they never conflict) and then see whether the written 405 // value is already in Proposed.Known. If there are multiple known values 406 // and a written value is known under different names, it is enough when one 407 // of the written values (assuming that they are the same value under 408 // different names, e.g. a PHINode and one of the incoming values) matches 409 // one of the known names. 410 // 411 // We convert here the set of lifetimes to actual timepoints. A lifetime is 412 // in conflict with a set of write timepoints, if either a live timepoint is 413 // clearly within the lifetime or if a write happens at the beginning of the 414 // lifetime (where it would conflict with the value that actually writes the 415 // value alive). There is no conflict at the end of a lifetime, as the alive 416 // value will always be read, before it is overwritten again. The last 417 // property holds in Polly for all scalar values and we expect all users of 418 // Knowledge to check this property also for accesses to MemoryKind::Array. 419 auto ProposedFixedDefs = 420 convertZoneToTimepoints(Proposed.Occupied, true, false); 421 auto ProposedFixedKnown = 422 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false); 423 424 auto ExistingConflictingWrites = 425 Existing.Written.intersect_domain(ProposedFixedDefs); 426 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain(); 427 428 auto CommonWrittenVal = 429 ProposedFixedKnown.intersect(ExistingConflictingWrites); 430 auto CommonWrittenValDomain = CommonWrittenVal.domain(); 431 432 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) { 433 if (OS) { 434 auto ExistingConflictingWritten = 435 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain); 436 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain( 437 ExistingConflictingWritten.domain()); 438 439 OS->indent(Indent) 440 << "Proposed a lifetime where there is an Existing write into it\n"; 441 OS->indent(Indent) << "Existing conflicting writes: " 442 << ExistingConflictingWritten << "\n"; 443 if (!ProposedConflictingKnown.is_empty()) 444 OS->indent(Indent) 445 << "Proposed conflicting known: " << ProposedConflictingKnown 446 << "\n"; 447 } 448 return true; 449 } 450 451 // Do the writes in Proposed conflict with occupied values in Existing? 452 auto ExistingAvailableDefs = 453 convertZoneToTimepoints(Existing.Unused, true, false); 454 auto ExistingKnownDefs = 455 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false); 456 457 auto ProposedWrittenDomain = Proposed.Written.domain(); 458 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written); 459 auto IdenticalOrUnused = 460 ExistingAvailableDefs.unite(KnownIdentical.domain()); 461 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) { 462 if (OS) { 463 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused); 464 auto ExistingConflictingKnown = 465 ExistingKnownDefs.intersect_domain(Conflicting); 466 auto ProposedConflictingWritten = 467 Proposed.Written.intersect_domain(Conflicting); 468 469 OS->indent(Indent) << "Proposed writes into range used by Existing\n"; 470 OS->indent(Indent) << "Proposed conflicting writes: " 471 << ProposedConflictingWritten << "\n"; 472 if (!ExistingConflictingKnown.is_empty()) 473 OS->indent(Indent) 474 << "Existing conflicting known: " << ExistingConflictingKnown 475 << "\n"; 476 } 477 return true; 478 } 479 480 // Does Proposed write at the same time as Existing already does (order of 481 // writes is undefined)? Writing the same value is permitted. 482 auto ExistingWrittenDomain = Existing.Written.domain(); 483 auto BothWritten = 484 Existing.Written.domain().intersect(Proposed.Written.domain()); 485 auto ExistingKnownWritten = filterKnownValInst(Existing.Written); 486 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written); 487 auto CommonWritten = 488 ExistingKnownWritten.intersect(ProposedKnownWritten).domain(); 489 490 if (!BothWritten.is_subset(CommonWritten)) { 491 if (OS) { 492 auto Conflicting = BothWritten.subtract(CommonWritten); 493 auto ExistingConflictingWritten = 494 Existing.Written.intersect_domain(Conflicting); 495 auto ProposedConflictingWritten = 496 Proposed.Written.intersect_domain(Conflicting); 497 498 OS->indent(Indent) << "Proposed writes at the same time as an already " 499 "Existing write\n"; 500 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n"; 501 if (!ExistingConflictingWritten.is_empty()) 502 OS->indent(Indent) 503 << "Exiting write: " << ExistingConflictingWritten << "\n"; 504 if (!ProposedConflictingWritten.is_empty()) 505 OS->indent(Indent) 506 << "Proposed write: " << ProposedConflictingWritten << "\n"; 507 } 508 return true; 509 } 510 511 return false; 512 } 513 }; 514 515 /// Implementation of the DeLICM/DePRE transformation. 516 class DeLICMImpl : public ZoneAlgorithm { 517 private: 518 /// Knowledge before any transformation took place. 519 Knowledge OriginalZone; 520 521 /// Current knowledge of the SCoP including all already applied 522 /// transformations. 523 Knowledge Zone; 524 525 /// Number of StoreInsts something can be mapped to. 526 int NumberOfCompatibleTargets = 0; 527 528 /// The number of StoreInsts to which at least one value or PHI has been 529 /// mapped to. 530 int NumberOfTargetsMapped = 0; 531 532 /// The number of llvm::Value mapped to some array element. 533 int NumberOfMappedValueScalars = 0; 534 535 /// The number of PHIs mapped to some array element. 536 int NumberOfMappedPHIScalars = 0; 537 538 /// Determine whether two knowledges are conflicting with each other. 539 /// 540 /// @see Knowledge::isConflicting 541 bool isConflicting(const Knowledge &Proposed) { 542 raw_ostream *OS = nullptr; 543 LLVM_DEBUG(OS = &llvm::dbgs()); 544 return Knowledge::isConflicting(Zone, Proposed, OS, 4); 545 } 546 547 /// Determine whether @p SAI is a scalar that can be mapped to an array 548 /// element. 549 bool isMappable(const ScopArrayInfo *SAI) { 550 assert(SAI); 551 552 if (SAI->isValueKind()) { 553 auto *MA = S->getValueDef(SAI); 554 if (!MA) { 555 LLVM_DEBUG( 556 dbgs() 557 << " Reject because value is read-only within the scop\n"); 558 return false; 559 } 560 561 // Mapping if value is used after scop is not supported. The code 562 // generator would need to reload the scalar after the scop, but it 563 // does not have the information to where it is mapped to. Only the 564 // MemoryAccesses have that information, not the ScopArrayInfo. 565 auto Inst = MA->getAccessInstruction(); 566 for (auto User : Inst->users()) { 567 if (!isa<Instruction>(User)) 568 return false; 569 auto UserInst = cast<Instruction>(User); 570 571 if (!S->contains(UserInst)) { 572 LLVM_DEBUG(dbgs() << " Reject because value is escaping\n"); 573 return false; 574 } 575 } 576 577 return true; 578 } 579 580 if (SAI->isPHIKind()) { 581 auto *MA = S->getPHIRead(SAI); 582 assert(MA); 583 584 // Mapping of an incoming block from before the SCoP is not supported by 585 // the code generator. 586 auto PHI = cast<PHINode>(MA->getAccessInstruction()); 587 for (auto Incoming : PHI->blocks()) { 588 if (!S->contains(Incoming)) { 589 LLVM_DEBUG(dbgs() 590 << " Reject because at least one incoming block is " 591 "not in the scop region\n"); 592 return false; 593 } 594 } 595 596 return true; 597 } 598 599 LLVM_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); 600 return false; 601 } 602 603 /// Compute the uses of a MemoryKind::Value and its lifetime (from its 604 /// definition to the last use). 605 /// 606 /// @param SAI The ScopArrayInfo representing the value's storage. 607 /// 608 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } 609 /// First element is the set of uses for each definition. 610 /// The second is the lifetime of each definition. 611 std::tuple<isl::union_map, isl::map> 612 computeValueUses(const ScopArrayInfo *SAI) { 613 assert(SAI->isValueKind()); 614 615 // { DomainRead[] } 616 auto Reads = makeEmptyUnionSet(); 617 618 // Find all uses. 619 for (auto *MA : S->getValueUses(SAI)) 620 Reads = Reads.add_set(getDomainFor(MA)); 621 622 // { DomainRead[] -> Scatter[] } 623 auto ReadSchedule = getScatterFor(Reads); 624 625 auto *DefMA = S->getValueDef(SAI); 626 assert(DefMA); 627 628 // { DomainDef[] } 629 auto Writes = getDomainFor(DefMA); 630 631 // { DomainDef[] -> Scatter[] } 632 auto WriteScatter = getScatterFor(Writes); 633 634 // { Scatter[] -> DomainDef[] } 635 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); 636 637 // { [DomainDef[] -> Scatter[]] -> DomainUse[] } 638 auto Uses = isl::union_map(ReachDef.reverse().range_map()) 639 .apply_range(ReadSchedule.reverse()); 640 641 // { DomainDef[] -> Scatter[] } 642 auto UseScatter = 643 singleton(Uses.domain().unwrap(), 644 Writes.get_space().map_from_domain_and_range(ScatterSpace)); 645 646 // { DomainDef[] -> Zone[] } 647 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); 648 649 // { DomainDef[] -> DomainRead[] } 650 auto DefUses = Uses.domain_factor_domain(); 651 652 return std::make_pair(DefUses, Lifetime); 653 } 654 655 /// Try to map a MemoryKind::Value to a given array element. 656 /// 657 /// @param SAI Representation of the scalar's memory to map. 658 /// @param TargetElt { Scatter[] -> Element[] } 659 /// Suggestion where to map a scalar to when at a timepoint. 660 /// 661 /// @return true if the scalar was successfully mapped. 662 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) { 663 assert(SAI->isValueKind()); 664 665 auto *DefMA = S->getValueDef(SAI); 666 assert(DefMA->isValueKind()); 667 assert(DefMA->isMustWrite()); 668 auto *V = DefMA->getAccessValue(); 669 auto *DefInst = DefMA->getAccessInstruction(); 670 671 // Stop if the scalar has already been mapped. 672 if (!DefMA->getLatestScopArrayInfo()->isValueKind()) 673 return false; 674 675 // { DomainDef[] -> Scatter[] } 676 auto DefSched = getScatterFor(DefMA); 677 678 // Where each write is mapped to, according to the suggestion. 679 // { DomainDef[] -> Element[] } 680 auto DefTarget = TargetElt.apply_domain(DefSched.reverse()); 681 simplify(DefTarget); 682 LLVM_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); 683 684 auto OrigDomain = getDomainFor(DefMA); 685 auto MappedDomain = DefTarget.domain(); 686 if (!OrigDomain.is_subset(MappedDomain)) { 687 LLVM_DEBUG( 688 dbgs() 689 << " Reject because mapping does not encompass all instances\n"); 690 return false; 691 } 692 693 // { DomainDef[] -> Zone[] } 694 isl::map Lifetime; 695 696 // { DomainDef[] -> DomainUse[] } 697 isl::union_map DefUses; 698 699 std::tie(DefUses, Lifetime) = computeValueUses(SAI); 700 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); 701 702 /// { [Element[] -> Zone[]] } 703 auto EltZone = Lifetime.apply_domain(DefTarget).wrap(); 704 simplify(EltZone); 705 706 // When known knowledge is disabled, just return the unknown value. It will 707 // either get filtered out or conflict with itself. 708 // { DomainDef[] -> ValInst[] } 709 isl::map ValInst; 710 if (DelicmComputeKnown) 711 ValInst = makeValInst(V, DefMA->getStatement(), 712 LI->getLoopFor(DefInst->getParent())); 713 else 714 ValInst = makeUnknownForDomain(DefMA->getStatement()); 715 716 // { DomainDef[] -> [Element[] -> Zone[]] } 717 auto EltKnownTranslator = DefTarget.range_product(Lifetime); 718 719 // { [Element[] -> Zone[]] -> ValInst[] } 720 auto EltKnown = ValInst.apply_domain(EltKnownTranslator); 721 simplify(EltKnown); 722 723 // { DomainDef[] -> [Element[] -> Scatter[]] } 724 auto WrittenTranslator = DefTarget.range_product(DefSched); 725 726 // { [Element[] -> Scatter[]] -> ValInst[] } 727 auto DefEltSched = ValInst.apply_domain(WrittenTranslator); 728 simplify(DefEltSched); 729 730 Knowledge Proposed(EltZone, nullptr, filterKnownValInst(EltKnown), 731 DefEltSched); 732 if (isConflicting(Proposed)) 733 return false; 734 735 // { DomainUse[] -> Element[] } 736 auto UseTarget = DefUses.reverse().apply_range(DefTarget); 737 738 mapValue(SAI, std::move(DefTarget), std::move(UseTarget), 739 std::move(Lifetime), std::move(Proposed)); 740 return true; 741 } 742 743 /// After a scalar has been mapped, update the global knowledge. 744 void applyLifetime(Knowledge Proposed) { 745 Zone.learnFrom(std::move(Proposed)); 746 } 747 748 /// Map a MemoryKind::Value scalar to an array element. 749 /// 750 /// Callers must have ensured that the mapping is valid and not conflicting. 751 /// 752 /// @param SAI The ScopArrayInfo representing the scalar's memory to 753 /// map. 754 /// @param DefTarget { DomainDef[] -> Element[] } 755 /// The array element to map the scalar to. 756 /// @param UseTarget { DomainUse[] -> Element[] } 757 /// The array elements the uses are mapped to. 758 /// @param Lifetime { DomainDef[] -> Zone[] } 759 /// The lifetime of each llvm::Value definition for 760 /// reporting. 761 /// @param Proposed Mapping constraints for reporting. 762 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget, 763 isl::union_map UseTarget, isl::map Lifetime, 764 Knowledge Proposed) { 765 // Redirect the read accesses. 766 for (auto *MA : S->getValueUses(SAI)) { 767 // { DomainUse[] } 768 auto Domain = getDomainFor(MA); 769 770 // { DomainUse[] -> Element[] } 771 auto NewAccRel = UseTarget.intersect_domain(Domain); 772 simplify(NewAccRel); 773 774 assert(isl_union_map_n_map(NewAccRel.get()) == 1); 775 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel)); 776 } 777 778 auto *WA = S->getValueDef(SAI); 779 WA->setNewAccessRelation(DefTarget); 780 applyLifetime(Proposed); 781 782 MappedValueScalars++; 783 NumberOfMappedValueScalars += 1; 784 } 785 786 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope, 787 bool IsCertain = true) { 788 // When known knowledge is disabled, just return the unknown value. It will 789 // either get filtered out or conflict with itself. 790 if (!DelicmComputeKnown) 791 return makeUnknownForDomain(UserStmt); 792 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain); 793 } 794 795 /// Express the incoming values of a PHI for each incoming statement in an 796 /// isl::union_map. 797 /// 798 /// @param SAI The PHI scalar represented by a ScopArrayInfo. 799 /// 800 /// @return { PHIWriteDomain[] -> ValInst[] } 801 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) { 802 auto Result = makeEmptyUnionMap(); 803 804 // Collect the incoming values. 805 for (auto *MA : S->getPHIIncomings(SAI)) { 806 // { DomainWrite[] -> ValInst[] } 807 isl::union_map ValInst; 808 auto *WriteStmt = MA->getStatement(); 809 810 auto Incoming = MA->getIncoming(); 811 assert(!Incoming.empty()); 812 if (Incoming.size() == 1) { 813 ValInst = makeValInst(Incoming[0].second, WriteStmt, 814 LI->getLoopFor(Incoming[0].first)); 815 } else { 816 // If the PHI is in a subregion's exit node it can have multiple 817 // incoming values (+ maybe another incoming edge from an unrelated 818 // block). We cannot directly represent it as a single llvm::Value. 819 // We currently model it as unknown value, but modeling as the PHIInst 820 // itself could be OK, too. 821 ValInst = makeUnknownForDomain(WriteStmt); 822 } 823 824 Result = Result.unite(ValInst); 825 } 826 827 assert(Result.is_single_valued() && 828 "Cannot have multiple incoming values for same incoming statement"); 829 return Result; 830 } 831 832 /// Try to map a MemoryKind::PHI scalar to a given array element. 833 /// 834 /// @param SAI Representation of the scalar's memory to map. 835 /// @param TargetElt { Scatter[] -> Element[] } 836 /// Suggestion where to map the scalar to when at a 837 /// timepoint. 838 /// 839 /// @return true if the PHI scalar has been mapped. 840 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) { 841 auto *PHIRead = S->getPHIRead(SAI); 842 assert(PHIRead->isPHIKind()); 843 assert(PHIRead->isRead()); 844 845 // Skip if already been mapped. 846 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) 847 return false; 848 849 // { DomainRead[] -> Scatter[] } 850 auto PHISched = getScatterFor(PHIRead); 851 852 // { DomainRead[] -> Element[] } 853 auto PHITarget = PHISched.apply_range(TargetElt); 854 simplify(PHITarget); 855 LLVM_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); 856 857 auto OrigDomain = getDomainFor(PHIRead); 858 auto MappedDomain = PHITarget.domain(); 859 if (!OrigDomain.is_subset(MappedDomain)) { 860 LLVM_DEBUG( 861 dbgs() 862 << " Reject because mapping does not encompass all instances\n"); 863 return false; 864 } 865 866 // { DomainRead[] -> DomainWrite[] } 867 auto PerPHIWrites = computePerPHI(SAI); 868 869 // { DomainWrite[] -> Element[] } 870 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse(); 871 simplify(WritesTarget); 872 873 // { DomainWrite[] } 874 auto UniverseWritesDom = isl::union_set::empty(ParamSpace); 875 876 for (auto *MA : S->getPHIIncomings(SAI)) 877 UniverseWritesDom = UniverseWritesDom.add_set(getDomainFor(MA)); 878 879 auto RelevantWritesTarget = WritesTarget; 880 if (DelicmOverapproximateWrites) 881 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom); 882 883 auto ExpandedWritesDom = WritesTarget.domain(); 884 if (!DelicmPartialWrites && 885 !UniverseWritesDom.is_subset(ExpandedWritesDom)) { 886 LLVM_DEBUG( 887 dbgs() << " Reject because did not find PHI write mapping for " 888 "all instances\n"); 889 if (DelicmOverapproximateWrites) 890 LLVM_DEBUG(dbgs() << " Relevant Mapping: " 891 << RelevantWritesTarget << '\n'); 892 LLVM_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget 893 << '\n'); 894 LLVM_DEBUG(dbgs() << " Missing instances: " 895 << UniverseWritesDom.subtract(ExpandedWritesDom) 896 << '\n'); 897 return false; 898 } 899 900 // { DomainRead[] -> Scatter[] } 901 auto PerPHIWriteScatter = 902 isl::map::from_union_map(PerPHIWrites.apply_range(Schedule)); 903 904 // { DomainRead[] -> Zone[] } 905 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); 906 simplify(Lifetime); 907 LLVM_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); 908 909 // { DomainWrite[] -> Zone[] } 910 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites); 911 912 // { DomainWrite[] -> ValInst[] } 913 auto WrittenValue = determinePHIWrittenValues(SAI); 914 915 // { DomainWrite[] -> [Element[] -> Scatter[]] } 916 auto WrittenTranslator = WritesTarget.range_product(Schedule); 917 918 // { [Element[] -> Scatter[]] -> ValInst[] } 919 auto Written = WrittenValue.apply_domain(WrittenTranslator); 920 simplify(Written); 921 922 // { DomainWrite[] -> [Element[] -> Zone[]] } 923 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime); 924 925 // { DomainWrite[] -> ValInst[] } 926 auto WrittenKnownValue = filterKnownValInst(WrittenValue); 927 928 // { [Element[] -> Zone[]] -> ValInst[] } 929 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator); 930 simplify(EltLifetimeInst); 931 932 // { [Element[] -> Zone[] } 933 auto Occupied = LifetimeTranslator.range(); 934 simplify(Occupied); 935 936 Knowledge Proposed(Occupied, nullptr, EltLifetimeInst, Written); 937 if (isConflicting(Proposed)) 938 return false; 939 940 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget), 941 std::move(Lifetime), std::move(Proposed)); 942 return true; 943 } 944 945 /// Map a MemoryKind::PHI scalar to an array element. 946 /// 947 /// Callers must have ensured that the mapping is valid and not conflicting 948 /// with the common knowledge. 949 /// 950 /// @param SAI The ScopArrayInfo representing the scalar's memory to 951 /// map. 952 /// @param ReadTarget { DomainRead[] -> Element[] } 953 /// The array element to map the scalar to. 954 /// @param WriteTarget { DomainWrite[] -> Element[] } 955 /// New access target for each PHI incoming write. 956 /// @param Lifetime { DomainRead[] -> Zone[] } 957 /// The lifetime of each PHI for reporting. 958 /// @param Proposed Mapping constraints for reporting. 959 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget, 960 isl::union_map WriteTarget, isl::map Lifetime, 961 Knowledge Proposed) { 962 // { Element[] } 963 isl::space ElementSpace = ReadTarget.get_space().range(); 964 965 // Redirect the PHI incoming writes. 966 for (auto *MA : S->getPHIIncomings(SAI)) { 967 // { DomainWrite[] } 968 auto Domain = getDomainFor(MA); 969 970 // { DomainWrite[] -> Element[] } 971 auto NewAccRel = WriteTarget.intersect_domain(Domain); 972 simplify(NewAccRel); 973 974 isl::space NewAccRelSpace = 975 Domain.get_space().map_from_domain_and_range(ElementSpace); 976 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace); 977 MA->setNewAccessRelation(NewAccRelMap); 978 } 979 980 // Redirect the PHI read. 981 auto *PHIRead = S->getPHIRead(SAI); 982 PHIRead->setNewAccessRelation(ReadTarget); 983 applyLifetime(Proposed); 984 985 MappedPHIScalars++; 986 NumberOfMappedPHIScalars++; 987 } 988 989 /// Search and map scalars to memory overwritten by @p TargetStoreMA. 990 /// 991 /// Start trying to map scalars that are used in the same statement as the 992 /// store. For every successful mapping, try to also map scalars of the 993 /// statements where those are written. Repeat, until no more mapping 994 /// opportunity is found. 995 /// 996 /// There is currently no preference in which order scalars are tried. 997 /// Ideally, we would direct it towards a load instruction of the same array 998 /// element. 999 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { 1000 assert(TargetStoreMA->isLatestArrayKind()); 1001 assert(TargetStoreMA->isMustWrite()); 1002 1003 auto TargetStmt = TargetStoreMA->getStatement(); 1004 1005 // { DomTarget[] } 1006 auto TargetDom = getDomainFor(TargetStmt); 1007 1008 // { DomTarget[] -> Element[] } 1009 auto TargetAccRel = getAccessRelationFor(TargetStoreMA); 1010 1011 // { Zone[] -> DomTarget[] } 1012 // For each point in time, find the next target store instance. 1013 auto Target = 1014 computeScalarReachingOverwrite(Schedule, TargetDom, false, true); 1015 1016 // { Zone[] -> Element[] } 1017 // Use the target store's write location as a suggestion to map scalars to. 1018 auto EltTarget = Target.apply_range(TargetAccRel); 1019 simplify(EltTarget); 1020 LLVM_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); 1021 1022 // Stack of elements not yet processed. 1023 SmallVector<MemoryAccess *, 16> Worklist; 1024 1025 // Set of scalars already tested. 1026 SmallPtrSet<const ScopArrayInfo *, 16> Closed; 1027 1028 // Lambda to add all scalar reads to the work list. 1029 auto ProcessAllIncoming = [&](ScopStmt *Stmt) { 1030 for (auto *MA : *Stmt) { 1031 if (!MA->isLatestScalarKind()) 1032 continue; 1033 if (!MA->isRead()) 1034 continue; 1035 1036 Worklist.push_back(MA); 1037 } 1038 }; 1039 1040 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0); 1041 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal)) 1042 Worklist.push_back(WrittenValInputMA); 1043 else 1044 ProcessAllIncoming(TargetStmt); 1045 1046 auto AnyMapped = false; 1047 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout(); 1048 auto StoreSize = 1049 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); 1050 1051 while (!Worklist.empty()) { 1052 auto *MA = Worklist.pop_back_val(); 1053 1054 auto *SAI = MA->getScopArrayInfo(); 1055 if (Closed.count(SAI)) 1056 continue; 1057 Closed.insert(SAI); 1058 LLVM_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI 1059 << ")\n"); 1060 1061 // Skip non-mappable scalars. 1062 if (!isMappable(SAI)) 1063 continue; 1064 1065 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); 1066 if (MASize > StoreSize) { 1067 LLVM_DEBUG( 1068 dbgs() << " Reject because storage size is insufficient\n"); 1069 continue; 1070 } 1071 1072 // Try to map MemoryKind::Value scalars. 1073 if (SAI->isValueKind()) { 1074 if (!tryMapValue(SAI, EltTarget)) 1075 continue; 1076 1077 auto *DefAcc = S->getValueDef(SAI); 1078 ProcessAllIncoming(DefAcc->getStatement()); 1079 1080 AnyMapped = true; 1081 continue; 1082 } 1083 1084 // Try to map MemoryKind::PHI scalars. 1085 if (SAI->isPHIKind()) { 1086 if (!tryMapPHI(SAI, EltTarget)) 1087 continue; 1088 // Add inputs of all incoming statements to the worklist. Prefer the 1089 // input accesses of the incoming blocks. 1090 for (auto *PHIWrite : S->getPHIIncomings(SAI)) { 1091 auto *PHIWriteStmt = PHIWrite->getStatement(); 1092 bool FoundAny = false; 1093 for (auto Incoming : PHIWrite->getIncoming()) { 1094 auto *IncomingInputMA = 1095 PHIWriteStmt->lookupInputAccessOf(Incoming.second); 1096 if (!IncomingInputMA) 1097 continue; 1098 1099 Worklist.push_back(IncomingInputMA); 1100 FoundAny = true; 1101 } 1102 1103 if (!FoundAny) 1104 ProcessAllIncoming(PHIWrite->getStatement()); 1105 } 1106 1107 AnyMapped = true; 1108 continue; 1109 } 1110 } 1111 1112 if (AnyMapped) { 1113 TargetsMapped++; 1114 NumberOfTargetsMapped++; 1115 } 1116 return AnyMapped; 1117 } 1118 1119 /// Compute when an array element is unused. 1120 /// 1121 /// @return { [Element[] -> Zone[]] } 1122 isl::union_set computeLifetime() const { 1123 // { Element[] -> Zone[] } 1124 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, 1125 false, false, true); 1126 1127 auto Result = ArrayUnused.wrap(); 1128 1129 simplify(Result); 1130 return Result; 1131 } 1132 1133 /// Determine when an array element is written to, and which value instance is 1134 /// written. 1135 /// 1136 /// @return { [Element[] -> Scatter[]] -> ValInst[] } 1137 isl::union_map computeWritten() const { 1138 // { [Element[] -> Scatter[]] -> ValInst[] } 1139 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule); 1140 1141 simplify(EltWritten); 1142 return EltWritten; 1143 } 1144 1145 /// Determine whether an access touches at most one element. 1146 /// 1147 /// The accessed element could be a scalar or accessing an array with constant 1148 /// subscript, such that all instances access only that element. 1149 /// 1150 /// @param MA The access to test. 1151 /// 1152 /// @return True, if zero or one elements are accessed; False if at least two 1153 /// different elements are accessed. 1154 bool isScalarAccess(MemoryAccess *MA) { 1155 auto Map = getAccessRelationFor(MA); 1156 auto Set = Map.range(); 1157 return Set.is_singleton(); 1158 } 1159 1160 /// Print mapping statistics to @p OS. 1161 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { 1162 OS.indent(Indent) << "Statistics {\n"; 1163 OS.indent(Indent + 4) << "Compatible overwrites: " 1164 << NumberOfCompatibleTargets << "\n"; 1165 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped 1166 << '\n'; 1167 OS.indent(Indent + 4) << "Value scalars mapped: " 1168 << NumberOfMappedValueScalars << '\n'; 1169 OS.indent(Indent + 4) << "PHI scalars mapped: " 1170 << NumberOfMappedPHIScalars << '\n'; 1171 OS.indent(Indent) << "}\n"; 1172 } 1173 1174 /// Return whether at least one transformation been applied. 1175 bool isModified() const { return NumberOfTargetsMapped > 0; } 1176 1177 public: 1178 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {} 1179 1180 /// Calculate the lifetime (definition to last use) of every array element. 1181 /// 1182 /// @return True if the computed lifetimes (#Zone) is usable. 1183 bool computeZone() { 1184 // Check that nothing strange occurs. 1185 collectCompatibleElts(); 1186 1187 isl::union_set EltUnused; 1188 isl::union_map EltKnown, EltWritten; 1189 1190 { 1191 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); 1192 1193 computeCommon(); 1194 1195 EltUnused = computeLifetime(); 1196 EltKnown = computeKnown(true, false); 1197 EltWritten = computeWritten(); 1198 } 1199 DeLICMAnalyzed++; 1200 1201 if (!EltUnused || !EltKnown || !EltWritten) { 1202 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota && 1203 "The only reason that these things have not been computed should " 1204 "be if the max-operations limit hit"); 1205 DeLICMOutOfQuota++; 1206 LLVM_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); 1207 DebugLoc Begin, End; 1208 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End); 1209 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin, 1210 S->getEntry()); 1211 R << "maximal number of operations exceeded during zone analysis"; 1212 S->getFunction().getContext().diagnose(R); 1213 return false; 1214 } 1215 1216 Zone = OriginalZone = Knowledge(nullptr, EltUnused, EltKnown, EltWritten); 1217 LLVM_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); 1218 1219 assert(Zone.isUsable() && OriginalZone.isUsable()); 1220 return true; 1221 } 1222 1223 /// Try to map as many scalars to unused array elements as possible. 1224 /// 1225 /// Multiple scalars might be mappable to intersecting unused array element 1226 /// zones, but we can only chose one. This is a greedy algorithm, therefore 1227 /// the first processed element claims it. 1228 void greedyCollapse() { 1229 bool Modified = false; 1230 1231 for (auto &Stmt : *S) { 1232 for (auto *MA : Stmt) { 1233 if (!MA->isLatestArrayKind()) 1234 continue; 1235 if (!MA->isWrite()) 1236 continue; 1237 1238 if (MA->isMayWrite()) { 1239 LLVM_DEBUG(dbgs() << "Access " << MA 1240 << " pruned because it is a MAY_WRITE\n"); 1241 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite", 1242 MA->getAccessInstruction()); 1243 R << "Skipped possible mapping target because it is not an " 1244 "unconditional overwrite"; 1245 S->getFunction().getContext().diagnose(R); 1246 continue; 1247 } 1248 1249 if (Stmt.getNumIterators() == 0) { 1250 LLVM_DEBUG(dbgs() << "Access " << MA 1251 << " pruned because it is not in a loop\n"); 1252 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop", 1253 MA->getAccessInstruction()); 1254 R << "skipped possible mapping target because it is not in a loop"; 1255 S->getFunction().getContext().diagnose(R); 1256 continue; 1257 } 1258 1259 if (isScalarAccess(MA)) { 1260 LLVM_DEBUG(dbgs() 1261 << "Access " << MA 1262 << " pruned because it writes only a single element\n"); 1263 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite", 1264 MA->getAccessInstruction()); 1265 R << "skipped possible mapping target because the memory location " 1266 "written to does not depend on its outer loop"; 1267 S->getFunction().getContext().diagnose(R); 1268 continue; 1269 } 1270 1271 if (!isa<StoreInst>(MA->getAccessInstruction())) { 1272 LLVM_DEBUG(dbgs() << "Access " << MA 1273 << " pruned because it is not a StoreInst\n"); 1274 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore", 1275 MA->getAccessInstruction()); 1276 R << "skipped possible mapping target because non-store instructions " 1277 "are not supported"; 1278 S->getFunction().getContext().diagnose(R); 1279 continue; 1280 } 1281 1282 // Check for more than one element acces per statement instance. 1283 // Currently we expect write accesses to be functional, eg. disallow 1284 // 1285 // { Stmt[0] -> [i] : 0 <= i < 2 } 1286 // 1287 // This may occur when some accesses to the element write/read only 1288 // parts of the element, eg. a single byte. Polly then divides each 1289 // element into subelements of the smallest access length, normal access 1290 // then touch multiple of such subelements. It is very common when the 1291 // array is accesses with memset, memcpy or memmove which take i8* 1292 // arguments. 1293 isl::union_map AccRel = MA->getLatestAccessRelation(); 1294 if (!AccRel.is_single_valued().is_true()) { 1295 LLVM_DEBUG(dbgs() << "Access " << MA 1296 << " is incompatible because it writes multiple " 1297 "elements per instance\n"); 1298 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel", 1299 MA->getAccessInstruction()); 1300 R << "skipped possible mapping target because it writes more than " 1301 "one element"; 1302 S->getFunction().getContext().diagnose(R); 1303 continue; 1304 } 1305 1306 isl::union_set TouchedElts = AccRel.range(); 1307 if (!TouchedElts.is_subset(CompatibleElts)) { 1308 LLVM_DEBUG( 1309 dbgs() 1310 << "Access " << MA 1311 << " is incompatible because it touches incompatible elements\n"); 1312 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts", 1313 MA->getAccessInstruction()); 1314 R << "skipped possible mapping target because a target location " 1315 "cannot be reliably analyzed"; 1316 S->getFunction().getContext().diagnose(R); 1317 continue; 1318 } 1319 1320 assert(isCompatibleAccess(MA)); 1321 NumberOfCompatibleTargets++; 1322 LLVM_DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); 1323 if (collapseScalarsToStore(MA)) 1324 Modified = true; 1325 } 1326 } 1327 1328 if (Modified) 1329 DeLICMScopsModified++; 1330 } 1331 1332 /// Dump the internal information about a performed DeLICM to @p OS. 1333 void print(llvm::raw_ostream &OS, int Indent = 0) { 1334 if (!Zone.isUsable()) { 1335 OS.indent(Indent) << "Zone not computed\n"; 1336 return; 1337 } 1338 1339 printStatistics(OS, Indent); 1340 if (!isModified()) { 1341 OS.indent(Indent) << "No modification has been made\n"; 1342 return; 1343 } 1344 printAccesses(OS, Indent); 1345 } 1346 }; 1347 1348 class DeLICM : public ScopPass { 1349 private: 1350 DeLICM(const DeLICM &) = delete; 1351 const DeLICM &operator=(const DeLICM &) = delete; 1352 1353 /// The pass implementation, also holding per-scop data. 1354 std::unique_ptr<DeLICMImpl> Impl; 1355 1356 void collapseToUnused(Scop &S) { 1357 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1358 Impl = make_unique<DeLICMImpl>(&S, &LI); 1359 1360 if (!Impl->computeZone()) { 1361 LLVM_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); 1362 return; 1363 } 1364 1365 LLVM_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); 1366 Impl->greedyCollapse(); 1367 1368 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1369 LLVM_DEBUG(dbgs() << S); 1370 } 1371 1372 public: 1373 static char ID; 1374 explicit DeLICM() : ScopPass(ID) {} 1375 1376 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 1377 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1378 AU.addRequired<LoopInfoWrapperPass>(); 1379 AU.setPreservesAll(); 1380 } 1381 1382 virtual bool runOnScop(Scop &S) override { 1383 // Free resources for previous scop's computation, if not yet done. 1384 releaseMemory(); 1385 1386 collapseToUnused(S); 1387 1388 auto ScopStats = S.getStatistics(); 1389 NumValueWrites += ScopStats.NumValueWrites; 1390 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1391 NumPHIWrites += ScopStats.NumPHIWrites; 1392 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1393 NumSingletonWrites += ScopStats.NumSingletonWrites; 1394 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1395 1396 return false; 1397 } 1398 1399 virtual void printScop(raw_ostream &OS, Scop &S) const override { 1400 if (!Impl) 1401 return; 1402 assert(Impl->getScop() == &S); 1403 1404 OS << "DeLICM result:\n"; 1405 Impl->print(OS); 1406 } 1407 1408 virtual void releaseMemory() override { Impl.reset(); } 1409 }; 1410 1411 char DeLICM::ID; 1412 } // anonymous namespace 1413 1414 Pass *polly::createDeLICMPass() { return new DeLICM(); } 1415 1416 INITIALIZE_PASS_BEGIN(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, 1417 false) 1418 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1419 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1420 INITIALIZE_PASS_END(DeLICM, "polly-delicm", "Polly - DeLICM/DePRE", false, 1421 false) 1422 1423 bool polly::isConflicting( 1424 isl::union_set ExistingOccupied, isl::union_set ExistingUnused, 1425 isl::union_map ExistingKnown, isl::union_map ExistingWrites, 1426 isl::union_set ProposedOccupied, isl::union_set ProposedUnused, 1427 isl::union_map ProposedKnown, isl::union_map ProposedWrites, 1428 llvm::raw_ostream *OS, unsigned Indent) { 1429 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), 1430 std::move(ExistingKnown), std::move(ExistingWrites)); 1431 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), 1432 std::move(ProposedKnown), std::move(ProposedWrites)); 1433 1434 return Knowledge::isConflicting(Existing, Proposed, OS, Indent); 1435 } 1436