1 //===------ Simplify.cpp ----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Simplify a SCoP by removing unnecessary statements and accesses. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/Simplify.h" 14 #include "polly/ScopInfo.h" 15 #include "polly/ScopPass.h" 16 #include "polly/Support/GICHelper.h" 17 #include "polly/Support/ISLOStream.h" 18 #include "polly/Support/ISLTools.h" 19 #include "polly/Support/VirtualInstruction.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/InitializePasses.h" 22 #include "llvm/Support/Debug.h" 23 24 #define DEBUG_TYPE "polly-simplify" 25 26 using namespace llvm; 27 using namespace polly; 28 29 namespace { 30 31 #define TWO_STATISTICS(VARNAME, DESC) \ 32 static llvm::Statistic VARNAME[2] = { \ 33 {DEBUG_TYPE, #VARNAME "0", DESC " (first)"}, \ 34 {DEBUG_TYPE, #VARNAME "1", DESC " (second)"}} 35 36 /// Number of max disjuncts we allow in removeOverwrites(). This is to avoid 37 /// that the analysis of accesses in a statement is becoming too complex. Chosen 38 /// to be relatively small because all the common cases should access only few 39 /// array elements per statement. 40 static int const SimplifyMaxDisjuncts = 4; 41 42 TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed"); 43 TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified"); 44 45 TWO_STATISTICS(TotalEmptyDomainsRemoved, 46 "Number of statement with empty domains removed in any SCoP"); 47 TWO_STATISTICS(TotalOverwritesRemoved, "Number of removed overwritten writes"); 48 TWO_STATISTICS(TotalWritesCoalesced, "Number of writes coalesced with another"); 49 TWO_STATISTICS(TotalRedundantWritesRemoved, 50 "Number of writes of same value removed in any SCoP"); 51 TWO_STATISTICS(TotalEmptyPartialAccessesRemoved, 52 "Number of empty partial accesses removed"); 53 TWO_STATISTICS(TotalDeadAccessesRemoved, "Number of dead accesses removed"); 54 TWO_STATISTICS(TotalDeadInstructionsRemoved, 55 "Number of unused instructions removed"); 56 TWO_STATISTICS(TotalStmtsRemoved, "Number of statements removed in any SCoP"); 57 58 TWO_STATISTICS(NumValueWrites, "Number of scalar value writes after Simplify"); 59 TWO_STATISTICS( 60 NumValueWritesInLoops, 61 "Number of scalar value writes nested in affine loops after Simplify"); 62 TWO_STATISTICS(NumPHIWrites, 63 "Number of scalar phi writes after the first simplification"); 64 TWO_STATISTICS( 65 NumPHIWritesInLoops, 66 "Number of scalar phi writes nested in affine loops after Simplify"); 67 TWO_STATISTICS(NumSingletonWrites, "Number of singleton writes after Simplify"); 68 TWO_STATISTICS( 69 NumSingletonWritesInLoops, 70 "Number of singleton writes nested in affine loops after Simplify"); 71 72 static bool isImplicitRead(MemoryAccess *MA) { 73 return MA->isRead() && MA->isOriginalScalarKind(); 74 } 75 76 static bool isExplicitAccess(MemoryAccess *MA) { 77 return MA->isOriginalArrayKind(); 78 } 79 80 static bool isImplicitWrite(MemoryAccess *MA) { 81 return MA->isWrite() && MA->isOriginalScalarKind(); 82 } 83 84 /// Like isl::union_map::unite, but may also return an underapproximated 85 /// result if getting too complex. 86 /// 87 /// This is implemented by adding disjuncts to the results until the limit is 88 /// reached. 89 static isl::union_map underapproximatedAddMap(isl::union_map UMap, 90 isl::map Map) { 91 if (UMap.is_null() || Map.is_null()) 92 return {}; 93 94 isl::map PrevMap = UMap.extract_map(Map.get_space()); 95 96 // Fast path: If known that we cannot exceed the disjunct limit, just add 97 // them. 98 if (isl_map_n_basic_map(PrevMap.get()) + isl_map_n_basic_map(Map.get()) <= 99 SimplifyMaxDisjuncts) 100 return UMap.unite(Map); 101 102 isl::map Result = isl::map::empty(PrevMap.get_space()); 103 for (isl::basic_map BMap : PrevMap.get_basic_map_list()) { 104 if (Result.n_basic_map().release() > SimplifyMaxDisjuncts) 105 break; 106 Result = Result.unite(BMap); 107 } 108 for (isl::basic_map BMap : Map.get_basic_map_list()) { 109 if (isl_map_n_basic_map(Result.get()) > SimplifyMaxDisjuncts) 110 break; 111 Result = Result.unite(BMap); 112 } 113 114 isl::union_map UResult = 115 UMap.subtract(isl::map::universe(PrevMap.get_space())); 116 UResult.unite(Result); 117 118 return UResult; 119 } 120 121 class SimplifyImpl { 122 private: 123 /// The invocation id (if there are multiple instances in the pass manager's 124 /// pipeline) to determine which statistics to update. 125 int CallNo; 126 127 /// The last/current SCoP that is/has been processed. 128 Scop *S = nullptr; 129 130 /// Number of statements with empty domains removed from the SCoP. 131 int EmptyDomainsRemoved = 0; 132 133 /// Number of writes that are overwritten anyway. 134 int OverwritesRemoved = 0; 135 136 /// Number of combined writes. 137 int WritesCoalesced = 0; 138 139 /// Number of redundant writes removed from this SCoP. 140 int RedundantWritesRemoved = 0; 141 142 /// Number of writes with empty access domain removed. 143 int EmptyPartialAccessesRemoved = 0; 144 145 /// Number of unused accesses removed from this SCoP. 146 int DeadAccessesRemoved = 0; 147 148 /// Number of unused instructions removed from this SCoP. 149 int DeadInstructionsRemoved = 0; 150 151 /// Number of unnecessary statements removed from the SCoP. 152 int StmtsRemoved = 0; 153 154 /// Remove statements that are never executed due to their domains being 155 /// empty. 156 /// 157 /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's 158 /// effective domain, i.e. including the SCoP's context as used by some other 159 /// simplification methods in this pass. This is necessary because the 160 /// analysis on empty domains is unreliable, e.g. remove a scalar value 161 /// definition MemoryAccesses, but not its use. 162 void removeEmptyDomainStmts(); 163 164 /// Remove writes that are overwritten unconditionally later in the same 165 /// statement. 166 /// 167 /// There must be no read of the same value between the write (that is to be 168 /// removed) and the overwrite. 169 void removeOverwrites(); 170 171 /// Combine writes that write the same value if possible. 172 /// 173 /// This function is able to combine: 174 /// - Partial writes with disjoint domain. 175 /// - Writes that write to the same array element. 176 /// 177 /// In all cases, both writes must write the same values. 178 void coalesceWrites(); 179 180 /// Remove writes that just write the same value already stored in the 181 /// element. 182 void removeRedundantWrites(); 183 184 /// Remove statements without side effects. 185 void removeUnnecessaryStmts(); 186 187 /// Remove accesses that have an empty domain. 188 void removeEmptyPartialAccesses(); 189 190 /// Mark all reachable instructions and access, and sweep those that are not 191 /// reachable. 192 void markAndSweep(LoopInfo *LI); 193 194 /// Print simplification statistics to @p OS. 195 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const; 196 197 /// Print the current state of all MemoryAccesses to @p OS. 198 void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const; 199 200 public: 201 explicit SimplifyImpl(int CallNo = 0) : CallNo(CallNo) {} 202 203 void run(Scop &S, LoopInfo *LI); 204 205 void printScop(raw_ostream &OS, Scop &S) const; 206 207 /// Return whether at least one simplification has been applied. 208 bool isModified() const; 209 }; 210 211 /// Return whether at least one simplification has been applied. 212 bool SimplifyImpl::isModified() const { 213 return EmptyDomainsRemoved > 0 || OverwritesRemoved > 0 || 214 WritesCoalesced > 0 || RedundantWritesRemoved > 0 || 215 EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 || 216 DeadInstructionsRemoved > 0 || StmtsRemoved > 0; 217 } 218 219 /// Remove statements that are never executed due to their domains being 220 /// empty. 221 /// 222 /// In contrast to Scop::simplifySCoP, this removes based on the SCoP's 223 /// effective domain, i.e. including the SCoP's context as used by some other 224 /// simplification methods in this pass. This is necessary because the 225 /// analysis on empty domains is unreliable, e.g. remove a scalar value 226 /// definition MemoryAccesses, but not its use. 227 void SimplifyImpl::removeEmptyDomainStmts() { 228 size_t NumStmtsBefore = S->getSize(); 229 230 S->removeStmts([](ScopStmt &Stmt) -> bool { 231 auto EffectiveDomain = 232 Stmt.getDomain().intersect_params(Stmt.getParent()->getContext()); 233 return EffectiveDomain.is_empty(); 234 }); 235 236 assert(NumStmtsBefore >= S->getSize()); 237 EmptyDomainsRemoved = NumStmtsBefore - S->getSize(); 238 LLVM_DEBUG(dbgs() << "Removed " << EmptyDomainsRemoved << " (of " 239 << NumStmtsBefore << ") statements with empty domains \n"); 240 TotalEmptyDomainsRemoved[CallNo] += EmptyDomainsRemoved; 241 } 242 243 /// Remove writes that are overwritten unconditionally later in the same 244 /// statement. 245 /// 246 /// There must be no read of the same value between the write (that is to be 247 /// removed) and the overwrite. 248 void SimplifyImpl::removeOverwrites() { 249 for (auto &Stmt : *S) { 250 isl::set Domain = Stmt.getDomain(); 251 isl::union_map WillBeOverwritten = isl::union_map::empty(S->getIslCtx()); 252 253 SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); 254 255 // Iterate in reverse order, so the overwrite comes before the write that 256 // is to be removed. 257 for (auto *MA : reverse(Accesses)) { 258 259 // In region statements, the explicit accesses can be in blocks that are 260 // can be executed in any order. We therefore process only the implicit 261 // writes and stop after that. 262 if (Stmt.isRegionStmt() && isExplicitAccess(MA)) 263 break; 264 265 auto AccRel = MA->getAccessRelation(); 266 AccRel = AccRel.intersect_domain(Domain); 267 AccRel = AccRel.intersect_params(S->getContext()); 268 269 // If a value is read in-between, do not consider it as overwritten. 270 if (MA->isRead()) { 271 // Invalidate all overwrites for the array it accesses to avoid too 272 // complex isl sets. 273 isl::map AccRelUniv = isl::map::universe(AccRel.get_space()); 274 WillBeOverwritten = WillBeOverwritten.subtract(AccRelUniv); 275 continue; 276 } 277 278 // If all of a write's elements are overwritten, remove it. 279 isl::union_map AccRelUnion = AccRel; 280 if (AccRelUnion.is_subset(WillBeOverwritten)) { 281 LLVM_DEBUG(dbgs() << "Removing " << MA 282 << " which will be overwritten anyway\n"); 283 284 Stmt.removeSingleMemoryAccess(MA); 285 OverwritesRemoved++; 286 TotalOverwritesRemoved[CallNo]++; 287 } 288 289 // Unconditional writes overwrite other values. 290 if (MA->isMustWrite()) { 291 // Avoid too complex isl sets. If necessary, throw away some of the 292 // knowledge. 293 WillBeOverwritten = underapproximatedAddMap(WillBeOverwritten, AccRel); 294 } 295 } 296 } 297 } 298 299 /// Combine writes that write the same value if possible. 300 /// 301 /// This function is able to combine: 302 /// - Partial writes with disjoint domain. 303 /// - Writes that write to the same array element. 304 /// 305 /// In all cases, both writes must write the same values. 306 void SimplifyImpl::coalesceWrites() { 307 for (auto &Stmt : *S) { 308 isl::set Domain = Stmt.getDomain().intersect_params(S->getContext()); 309 310 // We let isl do the lookup for the same-value condition. For this, we 311 // wrap llvm::Value into an isl::set such that isl can do the lookup in 312 // its hashtable implementation. llvm::Values are only compared within a 313 // ScopStmt, so the map can be local to this scope. TODO: Refactor with 314 // ZoneAlgorithm::makeValueSet() 315 SmallDenseMap<Value *, isl::set> ValueSets; 316 auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { 317 assert(V); 318 isl::set &Result = ValueSets[V]; 319 if (Result.is_null()) { 320 isl::ctx Ctx = S->getIslCtx(); 321 std::string Name = getIslCompatibleName( 322 "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames); 323 isl::id Id = isl::id::alloc(Ctx, Name, V); 324 Result = isl::set::universe( 325 isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id)); 326 } 327 return Result; 328 }; 329 330 // List of all eligible (for coalescing) writes of the future. 331 // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } 332 isl::union_map FutureWrites = isl::union_map::empty(S->getIslCtx()); 333 334 // Iterate over accesses from the last to the first. 335 SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); 336 for (MemoryAccess *MA : reverse(Accesses)) { 337 // In region statements, the explicit accesses can be in blocks that can 338 // be executed in any order. We therefore process only the implicit 339 // writes and stop after that. 340 if (Stmt.isRegionStmt() && isExplicitAccess(MA)) 341 break; 342 343 // { Domain[] -> Element[] } 344 isl::map AccRel = MA->getLatestAccessRelation().intersect_domain(Domain); 345 346 // { [Domain[] -> Element[]] } 347 isl::set AccRelWrapped = AccRel.wrap(); 348 349 // { Value[] } 350 isl::set ValSet; 351 352 if (MA->isMustWrite() && (MA->isOriginalScalarKind() || 353 isa<StoreInst>(MA->getAccessInstruction()))) { 354 // Normally, tryGetValueStored() should be used to determine which 355 // element is written, but it can return nullptr; For PHI accesses, 356 // getAccessValue() returns the PHI instead of the PHI's incoming 357 // value. In this case, where we only compare values of a single 358 // statement, this is fine, because within a statement, a PHI in a 359 // successor block has always the same value as the incoming write. We 360 // still preferably use the incoming value directly so we also catch 361 // direct uses of that. 362 Value *StoredVal = MA->tryGetValueStored(); 363 if (!StoredVal) 364 StoredVal = MA->getAccessValue(); 365 ValSet = makeValueSet(StoredVal); 366 367 // { Domain[] } 368 isl::set AccDomain = AccRel.domain(); 369 370 // Parts of the statement's domain that is not written by this access. 371 isl::set UndefDomain = Domain.subtract(AccDomain); 372 373 // { Element[] } 374 isl::set ElementUniverse = 375 isl::set::universe(AccRel.get_space().range()); 376 377 // { Domain[] -> Element[] } 378 isl::map UndefAnything = 379 isl::map::from_domain_and_range(UndefDomain, ElementUniverse); 380 381 // We are looking a compatible write access. The other write can 382 // access these elements... 383 isl::map AllowedAccesses = AccRel.unite(UndefAnything); 384 385 // ... and must write the same value. 386 // { [Domain[] -> Element[]] -> Value[] } 387 isl::map Filter = 388 isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet); 389 390 // Lookup future write that fulfills these conditions. 391 // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] } 392 isl::union_map Filtered = 393 FutureWrites.uncurry().intersect_domain(Filter.wrap()); 394 395 // Iterate through the candidates. 396 for (isl::map Map : Filtered.get_map_list()) { 397 MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space() 398 .get_tuple_id(isl::dim::out) 399 .get_user(); 400 401 isl::map OtherAccRel = 402 OtherMA->getLatestAccessRelation().intersect_domain(Domain); 403 404 // The filter only guaranteed that some of OtherMA's accessed 405 // elements are allowed. Verify that it only accesses allowed 406 // elements. Otherwise, continue with the next candidate. 407 if (!OtherAccRel.is_subset(AllowedAccesses).is_true()) 408 continue; 409 410 // The combined access relation. 411 // { Domain[] -> Element[] } 412 isl::map NewAccRel = AccRel.unite(OtherAccRel); 413 simplify(NewAccRel); 414 415 // Carry out the coalescing. 416 Stmt.removeSingleMemoryAccess(MA); 417 OtherMA->setNewAccessRelation(NewAccRel); 418 419 // We removed MA, OtherMA takes its role. 420 MA = OtherMA; 421 422 TotalWritesCoalesced[CallNo]++; 423 WritesCoalesced++; 424 425 // Don't look for more candidates. 426 break; 427 } 428 } 429 430 // Two writes cannot be coalesced if there is another access (to some of 431 // the written elements) between them. Remove all visited write accesses 432 // from the list of eligible writes. Don't just remove the accessed 433 // elements, but any MemoryAccess that touches any of the invalidated 434 // elements. 435 SmallPtrSet<MemoryAccess *, 2> TouchedAccesses; 436 for (isl::map Map : 437 FutureWrites.intersect_domain(AccRelWrapped).get_map_list()) { 438 MemoryAccess *MA = (MemoryAccess *)Map.get_space() 439 .range() 440 .unwrap() 441 .get_tuple_id(isl::dim::out) 442 .get_user(); 443 TouchedAccesses.insert(MA); 444 } 445 isl::union_map NewFutureWrites = 446 isl::union_map::empty(FutureWrites.ctx()); 447 for (isl::map FutureWrite : FutureWrites.get_map_list()) { 448 MemoryAccess *MA = (MemoryAccess *)FutureWrite.get_space() 449 .range() 450 .unwrap() 451 .get_tuple_id(isl::dim::out) 452 .get_user(); 453 if (!TouchedAccesses.count(MA)) 454 NewFutureWrites = NewFutureWrites.unite(FutureWrite); 455 } 456 FutureWrites = NewFutureWrites; 457 458 if (MA->isMustWrite() && !ValSet.is_null()) { 459 // { MemoryAccess[] } 460 auto AccSet = 461 isl::set::universe(isl::space(S->getIslCtx(), 0, 0) 462 .set_tuple_id(isl::dim::set, MA->getId())); 463 464 // { Val[] -> MemoryAccess[] } 465 isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet); 466 467 // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } 468 isl::map AccRelValAcc = 469 isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap()); 470 FutureWrites = FutureWrites.unite(AccRelValAcc); 471 } 472 } 473 } 474 } 475 476 /// Remove writes that just write the same value already stored in the 477 /// element. 478 void SimplifyImpl::removeRedundantWrites() { 479 for (auto &Stmt : *S) { 480 SmallDenseMap<Value *, isl::set> ValueSets; 481 auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { 482 assert(V); 483 isl::set &Result = ValueSets[V]; 484 if (Result.is_null()) { 485 isl_ctx *Ctx = S->getIslCtx().get(); 486 std::string Name = getIslCompatibleName( 487 "Val", V, ValueSets.size() - 1, std::string(), UseInstructionNames); 488 isl::id Id = isl::manage(isl_id_alloc(Ctx, Name.c_str(), V)); 489 Result = isl::set::universe( 490 isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id)); 491 } 492 return Result; 493 }; 494 495 isl::set Domain = Stmt.getDomain(); 496 Domain = Domain.intersect_params(S->getContext()); 497 498 // List of element reads that still have the same value while iterating 499 // through the MemoryAccesses. 500 // { [Domain[] -> Element[]] -> Val[] } 501 isl::union_map Known = isl::union_map::empty(S->getIslCtx()); 502 503 SmallVector<MemoryAccess *, 32> Accesses(getAccessesInOrder(Stmt)); 504 for (MemoryAccess *MA : Accesses) { 505 // Is the memory access in a defined order relative to the other 506 // accesses? In region statements, only the first and the last accesses 507 // have defined order. Execution of those in the middle may depend on 508 // runtime conditions an therefore cannot be modified. 509 bool IsOrdered = 510 Stmt.isBlockStmt() || MA->isOriginalScalarKind() || 511 (!S->getBoxedLoops().size() && MA->getAccessInstruction() && 512 Stmt.getEntryBlock() == MA->getAccessInstruction()->getParent()); 513 514 isl::map AccRel = MA->getAccessRelation(); 515 AccRel = AccRel.intersect_domain(Domain); 516 isl::set AccRelWrapped = AccRel.wrap(); 517 518 // Determine whether a write is redundant (stores only values that are 519 // already present in the written array elements) and remove it if this 520 // is the case. 521 if (IsOrdered && MA->isMustWrite() && 522 (isa<StoreInst>(MA->getAccessInstruction()) || 523 MA->isOriginalScalarKind())) { 524 Value *StoredVal = MA->tryGetValueStored(); 525 if (!StoredVal) 526 StoredVal = MA->getAccessValue(); 527 528 if (StoredVal) { 529 // Lookup in the set of known values. 530 isl::map AccRelStoredVal = isl::map::from_domain_and_range( 531 AccRelWrapped, makeValueSet(StoredVal)); 532 if (isl::union_map(AccRelStoredVal).is_subset(Known)) { 533 LLVM_DEBUG(dbgs() << "Cleanup of " << MA << ":\n"); 534 LLVM_DEBUG(dbgs() << " Scalar: " << *StoredVal << "\n"); 535 LLVM_DEBUG(dbgs() << " AccRel: " << AccRel << "\n"); 536 537 Stmt.removeSingleMemoryAccess(MA); 538 539 RedundantWritesRemoved++; 540 TotalRedundantWritesRemoved[CallNo]++; 541 } 542 } 543 } 544 545 // Update the know values set. 546 if (MA->isRead()) { 547 // Loaded values are the currently known values of the array element 548 // it was loaded from. 549 Value *LoadedVal = MA->getAccessValue(); 550 if (LoadedVal && IsOrdered) { 551 isl::map AccRelVal = isl::map::from_domain_and_range( 552 AccRelWrapped, makeValueSet(LoadedVal)); 553 554 Known = Known.unite(AccRelVal); 555 } 556 } else if (MA->isWrite()) { 557 // Remove (possibly) overwritten values from the known elements set. 558 // We remove all elements of the accessed array to avoid too complex 559 // isl sets. 560 isl::set AccRelUniv = isl::set::universe(AccRelWrapped.get_space()); 561 Known = Known.subtract_domain(AccRelUniv); 562 563 // At this point, we could add the written value of must-writes. 564 // However, writing same values is already handled by 565 // coalesceWrites(). 566 } 567 } 568 } 569 } 570 571 /// Remove statements without side effects. 572 void SimplifyImpl::removeUnnecessaryStmts() { 573 auto NumStmtsBefore = S->getSize(); 574 S->simplifySCoP(true); 575 assert(NumStmtsBefore >= S->getSize()); 576 StmtsRemoved = NumStmtsBefore - S->getSize(); 577 LLVM_DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore 578 << ") statements\n"); 579 TotalStmtsRemoved[CallNo] += StmtsRemoved; 580 } 581 582 /// Remove accesses that have an empty domain. 583 void SimplifyImpl::removeEmptyPartialAccesses() { 584 for (ScopStmt &Stmt : *S) { 585 // Defer the actual removal to not invalidate iterators. 586 SmallVector<MemoryAccess *, 8> DeferredRemove; 587 588 for (MemoryAccess *MA : Stmt) { 589 if (!MA->isWrite()) 590 continue; 591 592 isl::map AccRel = MA->getAccessRelation(); 593 if (!AccRel.is_empty().is_true()) 594 continue; 595 596 LLVM_DEBUG( 597 dbgs() << "Removing " << MA 598 << " because it's a partial access that never occurs\n"); 599 DeferredRemove.push_back(MA); 600 } 601 602 for (MemoryAccess *MA : DeferredRemove) { 603 Stmt.removeSingleMemoryAccess(MA); 604 EmptyPartialAccessesRemoved++; 605 TotalEmptyPartialAccessesRemoved[CallNo]++; 606 } 607 } 608 } 609 610 /// Mark all reachable instructions and access, and sweep those that are not 611 /// reachable. 612 void SimplifyImpl::markAndSweep(LoopInfo *LI) { 613 DenseSet<MemoryAccess *> UsedMA; 614 DenseSet<VirtualInstruction> UsedInsts; 615 616 // Get all reachable instructions and accesses. 617 markReachable(S, LI, UsedInsts, UsedMA); 618 619 // Remove all non-reachable accesses. 620 // We need get all MemoryAccesses first, in order to not invalidate the 621 // iterators when removing them. 622 SmallVector<MemoryAccess *, 64> AllMAs; 623 for (ScopStmt &Stmt : *S) 624 AllMAs.append(Stmt.begin(), Stmt.end()); 625 626 for (MemoryAccess *MA : AllMAs) { 627 if (UsedMA.count(MA)) 628 continue; 629 LLVM_DEBUG(dbgs() << "Removing " << MA 630 << " because its value is not used\n"); 631 ScopStmt *Stmt = MA->getStatement(); 632 Stmt->removeSingleMemoryAccess(MA); 633 634 DeadAccessesRemoved++; 635 TotalDeadAccessesRemoved[CallNo]++; 636 } 637 638 // Remove all non-reachable instructions. 639 for (ScopStmt &Stmt : *S) { 640 // Note that for region statements, we can only remove the non-terminator 641 // instructions of the entry block. All other instructions are not in the 642 // instructions list, but implicitly always part of the statement. 643 644 SmallVector<Instruction *, 32> AllInsts(Stmt.insts_begin(), 645 Stmt.insts_end()); 646 SmallVector<Instruction *, 32> RemainInsts; 647 648 for (Instruction *Inst : AllInsts) { 649 auto It = UsedInsts.find({&Stmt, Inst}); 650 if (It == UsedInsts.end()) { 651 LLVM_DEBUG(dbgs() << "Removing "; Inst->print(dbgs()); 652 dbgs() << " because it is not used\n"); 653 DeadInstructionsRemoved++; 654 TotalDeadInstructionsRemoved[CallNo]++; 655 continue; 656 } 657 658 RemainInsts.push_back(Inst); 659 660 // If instructions appear multiple times, keep only the first. 661 UsedInsts.erase(It); 662 } 663 664 // Set the new instruction list to be only those we did not remove. 665 Stmt.setInstructions(RemainInsts); 666 } 667 } 668 669 /// Print simplification statistics to @p OS. 670 void SimplifyImpl::printStatistics(llvm::raw_ostream &OS, int Indent) const { 671 OS.indent(Indent) << "Statistics {\n"; 672 OS.indent(Indent + 4) << "Empty domains removed: " << EmptyDomainsRemoved 673 << '\n'; 674 OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n'; 675 OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced 676 << "\n"; 677 OS.indent(Indent + 4) << "Redundant writes removed: " 678 << RedundantWritesRemoved << "\n"; 679 OS.indent(Indent + 4) << "Accesses with empty domains removed: " 680 << EmptyPartialAccessesRemoved << "\n"; 681 OS.indent(Indent + 4) << "Dead accesses removed: " << DeadAccessesRemoved 682 << '\n'; 683 OS.indent(Indent + 4) << "Dead instructions removed: " 684 << DeadInstructionsRemoved << '\n'; 685 OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n"; 686 OS.indent(Indent) << "}\n"; 687 } 688 689 /// Print the current state of all MemoryAccesses to @p OS. 690 void SimplifyImpl::printAccesses(llvm::raw_ostream &OS, int Indent) const { 691 OS.indent(Indent) << "After accesses {\n"; 692 for (auto &Stmt : *S) { 693 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 694 for (auto *MA : Stmt) 695 MA->print(OS); 696 } 697 OS.indent(Indent) << "}\n"; 698 } 699 700 void SimplifyImpl::run(Scop &S, LoopInfo *LI) { 701 // Must not have run before. 702 assert(!this->S); 703 assert(!isModified()); 704 705 // Prepare processing of this SCoP. 706 this->S = &S; 707 ScopsProcessed[CallNo]++; 708 709 LLVM_DEBUG(dbgs() << "Removing statements that are never executed...\n"); 710 removeEmptyDomainStmts(); 711 712 LLVM_DEBUG(dbgs() << "Removing partial writes that never happen...\n"); 713 removeEmptyPartialAccesses(); 714 715 LLVM_DEBUG(dbgs() << "Removing overwrites...\n"); 716 removeOverwrites(); 717 718 LLVM_DEBUG(dbgs() << "Coalesce partial writes...\n"); 719 coalesceWrites(); 720 721 LLVM_DEBUG(dbgs() << "Removing redundant writes...\n"); 722 removeRedundantWrites(); 723 724 LLVM_DEBUG(dbgs() << "Cleanup unused accesses...\n"); 725 markAndSweep(LI); 726 727 LLVM_DEBUG(dbgs() << "Removing statements without side effects...\n"); 728 removeUnnecessaryStmts(); 729 730 if (isModified()) 731 ScopsModified[CallNo]++; 732 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 733 LLVM_DEBUG(dbgs() << S); 734 735 auto ScopStats = S.getStatistics(); 736 NumValueWrites[CallNo] += ScopStats.NumValueWrites; 737 NumValueWritesInLoops[CallNo] += ScopStats.NumValueWritesInLoops; 738 NumPHIWrites[CallNo] += ScopStats.NumPHIWrites; 739 NumPHIWritesInLoops[CallNo] += ScopStats.NumPHIWritesInLoops; 740 NumSingletonWrites[CallNo] += ScopStats.NumSingletonWrites; 741 NumSingletonWritesInLoops[CallNo] += ScopStats.NumSingletonWritesInLoops; 742 } 743 744 void SimplifyImpl::printScop(raw_ostream &OS, Scop &S) const { 745 assert(&S == this->S && 746 "Can only print analysis for the last processed SCoP"); 747 printStatistics(OS); 748 749 if (!isModified()) { 750 OS << "SCoP could not be simplified\n"; 751 return; 752 } 753 printAccesses(OS); 754 } 755 756 class SimplifyWrapperPass : public ScopPass { 757 public: 758 static char ID; 759 int CallNo; 760 Optional<SimplifyImpl> Impl; 761 762 explicit SimplifyWrapperPass(int CallNo = 0) : ScopPass(ID), CallNo(CallNo) {} 763 764 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 765 AU.addRequiredTransitive<ScopInfoRegionPass>(); 766 AU.addRequired<LoopInfoWrapperPass>(); 767 AU.setPreservesAll(); 768 } 769 770 virtual bool runOnScop(Scop &S) override { 771 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 772 773 Impl.emplace(CallNo); 774 Impl->run(S, LI); 775 776 return false; 777 } 778 779 virtual void printScop(raw_ostream &OS, Scop &S) const override { 780 if (Impl) 781 Impl->printScop(OS, S); 782 } 783 784 virtual void releaseMemory() override { Impl.reset(); } 785 }; 786 787 char SimplifyWrapperPass::ID; 788 789 static llvm::PreservedAnalyses 790 runSimplifyUsingNPM(Scop &S, ScopAnalysisManager &SAM, 791 ScopStandardAnalysisResults &SAR, SPMUpdater &U, int CallNo, 792 raw_ostream *OS) { 793 SimplifyImpl Impl(CallNo); 794 Impl.run(S, &SAR.LI); 795 if (OS) { 796 *OS << "Printing analysis 'Polly - Simplify' for region: '" << S.getName() 797 << "' in function '" << S.getFunction().getName() << "':\n"; 798 Impl.printScop(*OS, S); 799 } 800 801 if (!Impl.isModified()) 802 return llvm::PreservedAnalyses::all(); 803 804 PreservedAnalyses PA; 805 PA.preserveSet<AllAnalysesOn<Module>>(); 806 PA.preserveSet<AllAnalysesOn<Function>>(); 807 PA.preserveSet<AllAnalysesOn<Loop>>(); 808 return PA; 809 } 810 811 } // anonymous namespace 812 813 llvm::PreservedAnalyses SimplifyPass::run(Scop &S, ScopAnalysisManager &SAM, 814 ScopStandardAnalysisResults &SAR, 815 SPMUpdater &U) { 816 return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, nullptr); 817 } 818 819 llvm::PreservedAnalyses 820 SimplifyPrinterPass::run(Scop &S, ScopAnalysisManager &SAM, 821 ScopStandardAnalysisResults &SAR, SPMUpdater &U) { 822 return runSimplifyUsingNPM(S, SAM, SAR, U, CallNo, &OS); 823 } 824 825 SmallVector<MemoryAccess *, 32> polly::getAccessesInOrder(ScopStmt &Stmt) { 826 SmallVector<MemoryAccess *, 32> Accesses; 827 828 for (MemoryAccess *MemAcc : Stmt) 829 if (isImplicitRead(MemAcc)) 830 Accesses.push_back(MemAcc); 831 832 for (MemoryAccess *MemAcc : Stmt) 833 if (isExplicitAccess(MemAcc)) 834 Accesses.push_back(MemAcc); 835 836 for (MemoryAccess *MemAcc : Stmt) 837 if (isImplicitWrite(MemAcc)) 838 Accesses.push_back(MemAcc); 839 840 return Accesses; 841 } 842 843 Pass *polly::createSimplifyWrapperPass(int CallNo) { 844 return new SimplifyWrapperPass(CallNo); 845 } 846 847 INITIALIZE_PASS_BEGIN(SimplifyWrapperPass, "polly-simplify", "Polly - Simplify", 848 false, false) 849 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 850 INITIALIZE_PASS_END(SimplifyWrapperPass, "polly-simplify", "Polly - Simplify", 851 false, false) 852