1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Move instructions between statements. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/ForwardOpTree.h" 14 #include "polly/Options.h" 15 #include "polly/ScopBuilder.h" 16 #include "polly/ScopInfo.h" 17 #include "polly/ScopPass.h" 18 #include "polly/Support/GICHelper.h" 19 #include "polly/Support/ISLOStream.h" 20 #include "polly/Support/ISLTools.h" 21 #include "polly/Support/VirtualInstruction.h" 22 #include "polly/ZoneAlgo.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/LoopInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Value.h" 31 #include "llvm/InitializePasses.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "isl/ctx.h" 39 #include "isl/isl-noexceptions.h" 40 #include <cassert> 41 #include <memory> 42 43 #define DEBUG_TYPE "polly-optree" 44 45 using namespace llvm; 46 using namespace polly; 47 48 static cl::opt<bool> 49 AnalyzeKnown("polly-optree-analyze-known", 50 cl::desc("Analyze array contents for load forwarding"), 51 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 52 53 static cl::opt<bool> 54 NormalizePHIs("polly-optree-normalize-phi", 55 cl::desc("Replace PHIs by their incoming values"), 56 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 57 58 static cl::opt<unsigned> 59 MaxOps("polly-optree-max-ops", 60 cl::desc("Maximum number of ISL operations to invest for known " 61 "analysis; 0=no limit"), 62 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 63 64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 65 STATISTIC(KnownOutOfQuota, 66 "Analyses aborted because max_operations was reached"); 67 68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 69 STATISTIC(TotalKnownLoadsForwarded, 70 "Number of forwarded loads because their value was known"); 71 STATISTIC(TotalReloads, "Number of reloaded values"); 72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 74 STATISTIC(TotalModifiedStmts, 75 "Number of statements with at least one forwarded tree"); 76 77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 78 79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 80 STATISTIC(NumValueWritesInLoops, 81 "Number of scalar value writes nested in affine loops after OpTree"); 82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 83 STATISTIC(NumPHIWritesInLoops, 84 "Number of scalar phi writes nested in affine loops after OpTree"); 85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 86 STATISTIC(NumSingletonWritesInLoops, 87 "Number of singleton writes nested in affine loops after OpTree"); 88 89 namespace { 90 91 /// The state of whether an operand tree was/can be forwarded. 92 /// 93 /// The items apply to an instructions and its operand tree with the instruction 94 /// as the root element. If the value in question is not an instruction in the 95 /// SCoP, it can be a leaf of an instruction's operand tree. 96 enum ForwardingDecision { 97 /// An uninitialized value. 98 FD_Unknown, 99 100 /// The root instruction or value cannot be forwarded at all. 101 FD_CannotForward, 102 103 /// The root instruction or value can be forwarded as a leaf of a larger 104 /// operand tree. 105 /// It does not make sense to move the value itself, it would just replace it 106 /// by a use of itself. For instance, a constant "5" used in a statement can 107 /// be forwarded, but it would just replace it by the same constant "5". 108 /// However, it makes sense to move as an operand of 109 /// 110 /// %add = add 5, 5 111 /// 112 /// where "5" is moved as part of a larger operand tree. "5" would be placed 113 /// (disregarding for a moment that literal constants don't have a location 114 /// and can be used anywhere) into the same statement as %add would. 115 FD_CanForwardLeaf, 116 117 /// The root instruction can be forwarded and doing so avoids a scalar 118 /// dependency. 119 /// 120 /// This can be either because the operand tree can be moved to the target 121 /// statement, or a memory access is redirected to read from a different 122 /// location. 123 FD_CanForwardProfitably, 124 125 /// A forwarding method cannot be applied to the operand tree. 126 /// The difference to FD_CannotForward is that there might be other methods 127 /// that can handle it. 128 FD_NotApplicable 129 }; 130 131 /// Represents the evaluation of and action to taken when forwarding a value 132 /// from an operand tree. 133 struct ForwardingAction { 134 using KeyTy = std::pair<Value *, ScopStmt *>; 135 136 /// Evaluation of forwarding a value. 137 ForwardingDecision Decision = FD_Unknown; 138 139 /// Callback to execute the forwarding. 140 /// Returning true allows deleting the polly::MemoryAccess if the value is the 141 /// root of the operand tree (and its elimination the reason why the 142 /// forwarding is done). Return false if the MemoryAccess is reused or there 143 /// might be other users of the read accesses. In the letter case the 144 /// polly::SimplifyPass can remove dead MemoryAccesses. 145 std::function<bool()> Execute = []() -> bool { 146 llvm_unreachable("unspecified how to forward"); 147 }; 148 149 /// Other values that need to be forwarded if this action is executed. Their 150 /// actions are executed after this one. 151 SmallVector<KeyTy, 4> Depends; 152 153 /// Named ctor: The method creating this object does not apply to the kind of 154 /// value, but other methods may. 155 static ForwardingAction notApplicable() { 156 ForwardingAction Result; 157 Result.Decision = FD_NotApplicable; 158 return Result; 159 } 160 161 /// Named ctor: The value cannot be forwarded. 162 static ForwardingAction cannotForward() { 163 ForwardingAction Result; 164 Result.Decision = FD_CannotForward; 165 return Result; 166 } 167 168 /// Named ctor: The value can just be used without any preparation. 169 static ForwardingAction triviallyForwardable(bool IsProfitable) { 170 ForwardingAction Result; 171 Result.Decision = 172 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; 173 Result.Execute = []() { return true; }; 174 return Result; 175 } 176 177 /// Name ctor: The value can be forwarded by executing an action. 178 static ForwardingAction canForward(std::function<bool()> Execute, 179 ArrayRef<KeyTy> Depends, 180 bool IsProfitable) { 181 ForwardingAction Result; 182 Result.Decision = 183 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; 184 Result.Execute = std::move(Execute); 185 Result.Depends.append(Depends.begin(), Depends.end()); 186 return Result; 187 } 188 }; 189 190 /// Implementation of operand tree forwarding for a specific SCoP. 191 /// 192 /// For a statement that requires a scalar value (through a value read 193 /// MemoryAccess), see if its operand can be moved into the statement. If so, 194 /// the MemoryAccess is removed and the all the operand tree instructions are 195 /// moved into the statement. All original instructions are left in the source 196 /// statements. The simplification pass can clean these up. 197 class ForwardOpTreeImpl : ZoneAlgorithm { 198 private: 199 using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>; 200 201 /// Scope guard to limit the number of isl operations for this pass. 202 IslMaxOperationsGuard &MaxOpGuard; 203 204 /// How many instructions have been copied to other statements. 205 int NumInstructionsCopied = 0; 206 207 /// Number of loads forwarded because their value was known. 208 int NumKnownLoadsForwarded = 0; 209 210 /// Number of values reloaded from known array elements. 211 int NumReloads = 0; 212 213 /// How many read-only accesses have been copied. 214 int NumReadOnlyCopied = 0; 215 216 /// How many operand trees have been forwarded. 217 int NumForwardedTrees = 0; 218 219 /// Number of statements with at least one forwarded operand tree. 220 int NumModifiedStmts = 0; 221 222 /// Whether we carried out at least one change to the SCoP. 223 bool Modified = false; 224 225 /// Cache of how to forward values. 226 /// The key of this map is the llvm::Value to be forwarded and the 227 /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value 228 /// can evaluate differently depending on where it is evaluate. For instance, 229 /// a synthesizable Scev represents a recurrence with an loop but the loop's 230 /// exit value if evaluated after the loop. 231 /// The cached results are only valid for the current TargetStmt. 232 /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the 233 /// exit value when instantiated outside of the loop. The primary concern is 234 /// ambiguity when crossing PHI nodes, which currently is not supported. 235 MemoizationTy ForwardingActions; 236 237 /// Contains the zones where array elements are known to contain a specific 238 /// value. 239 /// { [Element[] -> Zone[]] -> ValInst[] } 240 /// @see computeKnown() 241 isl::union_map Known; 242 243 /// Translator for newly introduced ValInsts to already existing ValInsts such 244 /// that new introduced load instructions can reuse the Known analysis of its 245 /// original load. { ValInst[] -> ValInst[] } 246 isl::union_map Translator; 247 248 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 249 /// 250 /// @param ValInst { Domain[] -> ValInst[] } 251 /// The values for which we search for alternative locations, 252 /// per statement instance. 253 /// 254 /// @return { Domain[] -> Element[] } 255 /// For each statement instance, the array elements that contain the 256 /// same ValInst. 257 isl::union_map findSameContentElements(isl::union_map ValInst) { 258 assert(!ValInst.is_single_valued().is_false()); 259 260 // { Domain[] } 261 isl::union_set Domain = ValInst.domain(); 262 263 // { Domain[] -> Scatter[] } 264 isl::union_map Schedule = getScatterFor(Domain); 265 266 // { Element[] -> [Scatter[] -> ValInst[]] } 267 isl::union_map MustKnownCurried = 268 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 269 270 // { [Domain[] -> ValInst[]] -> Scatter[] } 271 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 272 273 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 274 isl::union_map SchedValDomVal = 275 DomValSched.range_product(ValInst.range_map()).reverse(); 276 277 // { Element[] -> [Domain[] -> ValInst[]] } 278 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 279 280 // { Domain[] -> Element[] } 281 isl::union_map MustKnownMap = 282 MustKnownInst.uncurry().domain().unwrap().reverse(); 283 simplify(MustKnownMap); 284 285 return MustKnownMap; 286 } 287 288 /// Find a single array element for each statement instance, within a single 289 /// array. 290 /// 291 /// @param MustKnown { Domain[] -> Element[] } 292 /// Set of candidate array elements. 293 /// @param Domain { Domain[] } 294 /// The statement instance for which we need elements for. 295 /// 296 /// @return { Domain[] -> Element[] } 297 /// For each statement instance, an array element out of @p MustKnown. 298 /// All array elements must be in the same array (Polly does not yet 299 /// support reading from different accesses using the same 300 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 301 /// null. 302 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 303 // { Domain[] -> Element[] } 304 isl::map Result; 305 306 // MemoryAccesses can read only elements from a single array 307 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 308 // Look through all spaces until we find one that contains at least the 309 // wanted statement instance.s 310 for (isl::map Map : MustKnown.get_map_list()) { 311 // Get the array this is accessing. 312 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 313 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 314 315 // No support for generation of indirect array accesses. 316 if (SAI->getBasePtrOriginSAI()) 317 continue; 318 319 // Determine whether this map contains all wanted values. 320 isl::set MapDom = Map.domain(); 321 if (!Domain.is_subset(MapDom).is_true()) 322 continue; 323 324 // There might be multiple array elements that contain the same value, but 325 // choose only one of them. lexmin is used because it returns a one-value 326 // mapping, we do not care about which one. 327 // TODO: Get the simplest access function. 328 Result = Map.lexmin(); 329 break; 330 } 331 332 return Result; 333 } 334 335 public: 336 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 337 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 338 339 /// Compute the zones of known array element contents. 340 /// 341 /// @return True if the computed #Known is usable. 342 bool computeKnownValues() { 343 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 344 345 // Check that nothing strange occurs. 346 collectCompatibleElts(); 347 348 { 349 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 350 351 computeCommon(); 352 if (NormalizePHIs) 353 computeNormalizedPHIs(); 354 Known = computeKnown(true, true); 355 356 // Preexisting ValInsts use the known content analysis of themselves. 357 Translator = makeIdentityMap(Known.range(), false); 358 } 359 360 if (!Known || !Translator || !NormalizeMap) { 361 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 362 Known = nullptr; 363 Translator = nullptr; 364 NormalizeMap = nullptr; 365 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 366 return false; 367 } 368 369 KnownAnalyzed++; 370 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 371 372 return true; 373 } 374 375 void printStatistics(raw_ostream &OS, int Indent = 0) { 376 OS.indent(Indent) << "Statistics {\n"; 377 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 378 << '\n'; 379 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 380 << '\n'; 381 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 382 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 383 << '\n'; 384 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 385 << '\n'; 386 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 387 << NumModifiedStmts << '\n'; 388 OS.indent(Indent) << "}\n"; 389 } 390 391 void printStatements(raw_ostream &OS, int Indent = 0) const { 392 OS.indent(Indent) << "After statements {\n"; 393 for (auto &Stmt : *S) { 394 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 395 for (auto *MA : Stmt) 396 MA->print(OS); 397 398 OS.indent(Indent + 12); 399 Stmt.printInstructions(OS); 400 } 401 OS.indent(Indent) << "}\n"; 402 } 403 404 /// Create a new MemoryAccess of type read and MemoryKind::Array. 405 /// 406 /// @param Stmt The statement in which the access occurs. 407 /// @param LI The instruction that does the access. 408 /// @param AccessRelation The array element that each statement instance 409 /// accesses. 410 /// 411 /// @param The newly created access. 412 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 413 isl::map AccessRelation) { 414 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 415 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 416 417 // Create a dummy SCEV access, to be replaced anyway. 418 SmallVector<const SCEV *, 4> Sizes; 419 Sizes.reserve(SAI->getNumberOfDimensions()); 420 SmallVector<const SCEV *, 4> Subscripts; 421 Subscripts.reserve(SAI->getNumberOfDimensions()); 422 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 423 Sizes.push_back(SAI->getDimensionSize(i)); 424 Subscripts.push_back(nullptr); 425 } 426 427 MemoryAccess *Access = 428 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 429 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 430 S->addAccessFunction(Access); 431 Stmt->addAccess(Access, true); 432 433 Access->setNewAccessRelation(AccessRelation); 434 435 return Access; 436 } 437 438 /// Forward a load by reading from an array element that contains the same 439 /// value. Typically the location it was loaded from. 440 /// 441 /// @param TargetStmt The statement the operand tree will be copied to. 442 /// @param Inst The (possibly speculatable) instruction to forward. 443 /// @param UseStmt The statement that uses @p Inst. 444 /// @param UseLoop The loop @p Inst is used in. 445 /// @param DefStmt The statement @p Inst is defined in. 446 /// @param DefLoop The loop which contains @p Inst. 447 /// 448 /// @return A ForwardingAction object describing the feasibility and 449 /// profitability evaluation and the callback carrying-out the value 450 /// forwarding. 451 ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 452 ScopStmt *UseStmt, Loop *UseLoop, 453 ScopStmt *DefStmt, Loop *DefLoop) { 454 // Cannot do anything without successful known analysis. 455 if (Known.is_null() || Translator.is_null() || 456 MaxOpGuard.hasQuotaExceeded()) 457 return ForwardingAction::notApplicable(); 458 459 LoadInst *LI = dyn_cast<LoadInst>(Inst); 460 if (!LI) 461 return ForwardingAction::notApplicable(); 462 463 ForwardingDecision OpDecision = 464 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop); 465 switch (OpDecision) { 466 case FD_CanForwardProfitably: 467 case FD_CanForwardLeaf: 468 break; 469 case FD_CannotForward: 470 return ForwardingAction::cannotForward(); 471 default: 472 llvm_unreachable("Shouldn't return this"); 473 } 474 475 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 476 if (Access) { 477 // If the load is already in the statement, no forwarding is necessary. 478 // However, it might happen that the LoadInst is already present in the 479 // statement's instruction list. In that case we do as follows: 480 // - For the evaluation, we can trivially forward it as it is 481 // benefit of forwarding an already present instruction. 482 // - For the execution, prepend the instruction (to make it 483 // available to all instructions following in the instruction list), but 484 // do not add another MemoryAccess. 485 auto ExecAction = [this, TargetStmt, LI, Access]() -> bool { 486 TargetStmt->prependInstruction(LI); 487 LLVM_DEBUG( 488 dbgs() << " forwarded known load with preexisting MemoryAccess" 489 << Access << "\n"); 490 (void)Access; 491 492 NumKnownLoadsForwarded++; 493 TotalKnownLoadsForwarded++; 494 return true; 495 }; 496 return ForwardingAction::canForward( 497 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); 498 } 499 500 // Allow the following Isl calculations (until we return the 501 // ForwardingAction, excluding the code inside the lambda that will be 502 // executed later) to fail. 503 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 504 505 // { DomainDef[] -> ValInst[] } 506 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 507 assert(!isNormalized(ExpectedVal).is_false() && 508 "LoadInsts are always normalized"); 509 510 // { DomainUse[] -> DomainTarget[] } 511 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 512 513 // { DomainTarget[] -> ValInst[] } 514 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 515 isl::union_map TranslatedExpectedVal = 516 isl::union_map(TargetExpectedVal).apply_range(Translator); 517 518 // { DomainTarget[] -> Element[] } 519 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 520 521 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 522 if (!SameVal) 523 return ForwardingAction::notApplicable(); 524 525 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 526 << "\n"); 527 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 528 << "\n"); 529 530 // { ValInst[] } 531 isl::space ValInstSpace = ExpectedVal.get_space().range(); 532 533 // After adding a new load to the SCoP, also update the Known content 534 // about it. The new load will have a known ValInst of 535 // { [DomainTarget[] -> Value[]] } 536 // but which -- because it is a copy of it -- has same value as the 537 // { [DomainDef[] -> Value[]] } 538 // that it replicates. Instead of cloning the known content of 539 // [DomainDef[] -> Value[]] 540 // for DomainTarget[], we add a 'translator' that maps 541 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 542 // before comparing to the known content. 543 // TODO: 'Translator' could also be used to map PHINodes to their incoming 544 // ValInsts. 545 isl::map LocalTranslator; 546 if (!ValInstSpace.is_wrapping().is_false()) { 547 // { DefDomain[] -> Value[] } 548 isl::map ValInsts = ExpectedVal.range().unwrap(); 549 550 // { DefDomain[] } 551 isl::set DefDomain = ValInsts.domain(); 552 553 // { Value[] } 554 isl::space ValSpace = ValInstSpace.unwrap().range(); 555 556 // { Value[] -> Value[] } 557 isl::map ValToVal = 558 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 559 560 // { DomainDef[] -> DomainTarget[] } 561 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); 562 563 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 564 LocalTranslator = DefToTarget.reverse().product(ValToVal); 565 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 566 << "\n"); 567 568 if (!LocalTranslator) 569 return ForwardingAction::notApplicable(); 570 } 571 572 auto ExecAction = [this, TargetStmt, LI, SameVal, 573 LocalTranslator]() -> bool { 574 TargetStmt->prependInstruction(LI); 575 MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 576 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 577 << Access << "\n"); 578 (void)Access; 579 580 if (LocalTranslator) 581 Translator = Translator.add_map(LocalTranslator); 582 583 NumKnownLoadsForwarded++; 584 TotalKnownLoadsForwarded++; 585 return true; 586 }; 587 return ForwardingAction::canForward( 588 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); 589 } 590 591 /// Forward a scalar by redirecting the access to an array element that stores 592 /// the same value. 593 /// 594 /// @param TargetStmt The statement the operand tree will be copied to. 595 /// @param Inst The scalar to forward. 596 /// @param UseStmt The statement that uses @p Inst. 597 /// @param UseLoop The loop @p Inst is used in. 598 /// @param DefStmt The statement @p Inst is defined in. 599 /// @param DefLoop The loop which contains @p Inst. 600 /// 601 /// @return A ForwardingAction object describing the feasibility and 602 /// profitability evaluation and the callback carrying-out the value 603 /// forwarding. 604 ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 605 ScopStmt *UseStmt, Loop *UseLoop, 606 ScopStmt *DefStmt, Loop *DefLoop) { 607 // Cannot do anything without successful known analysis. 608 if (Known.is_null() || Translator.is_null() || 609 MaxOpGuard.hasQuotaExceeded()) 610 return ForwardingAction::notApplicable(); 611 612 // Don't spend too much time analyzing whether it can be reloaded. 613 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 614 615 // { DomainDef[] -> ValInst[] } 616 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 617 618 // { DomainUse[] -> DomainTarget[] } 619 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 620 621 // { DomainTarget[] -> ValInst[] } 622 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 623 isl::union_map TranslatedExpectedVal = 624 TargetExpectedVal.apply_range(Translator); 625 626 // { DomainTarget[] -> Element[] } 627 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 628 629 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 630 simplify(SameVal); 631 if (!SameVal) 632 return ForwardingAction::notApplicable(); 633 634 auto ExecAction = [this, TargetStmt, Inst, SameVal]() { 635 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 636 if (!Access) 637 Access = TargetStmt->ensureValueRead(Inst); 638 Access->setNewAccessRelation(SameVal); 639 640 TotalReloads++; 641 NumReloads++; 642 return false; 643 }; 644 645 return ForwardingAction::canForward(ExecAction, {}, true); 646 } 647 648 /// Forwards a speculatively executable instruction. 649 /// 650 /// @param TargetStmt The statement the operand tree will be copied to. 651 /// @param UseInst The (possibly speculatable) instruction to forward. 652 /// @param DefStmt The statement @p UseInst is defined in. 653 /// @param DefLoop The loop which contains @p UseInst. 654 /// 655 /// @return A ForwardingAction object describing the feasibility and 656 /// profitability evaluation and the callback carrying-out the value 657 /// forwarding. 658 ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt, 659 Instruction *UseInst, ScopStmt *DefStmt, 660 Loop *DefLoop) { 661 // PHIs, unless synthesizable, are not yet supported. 662 if (isa<PHINode>(UseInst)) 663 return ForwardingAction::notApplicable(); 664 665 // Compatible instructions must satisfy the following conditions: 666 // 1. Idempotent (instruction will be copied, not moved; although its 667 // original instance might be removed by simplification) 668 // 2. Not access memory (There might be memory writes between) 669 // 3. Not cause undefined behaviour (we might copy to a location when the 670 // original instruction was no executed; this is currently not possible 671 // because we do not forward PHINodes) 672 // 4. Not leak memory if executed multiple times (i.e. malloc) 673 // 674 // Instruction::mayHaveSideEffects is not sufficient because it considers 675 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 676 // not sufficient because it allows memory accesses. 677 if (mayBeMemoryDependent(*UseInst)) 678 return ForwardingAction::notApplicable(); 679 680 SmallVector<ForwardingAction::KeyTy, 4> Depends; 681 Depends.reserve(UseInst->getNumOperands()); 682 for (Value *OpVal : UseInst->operand_values()) { 683 ForwardingDecision OpDecision = 684 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop); 685 switch (OpDecision) { 686 case FD_CannotForward: 687 return ForwardingAction::cannotForward(); 688 689 case FD_CanForwardLeaf: 690 case FD_CanForwardProfitably: 691 Depends.emplace_back(OpVal, DefStmt); 692 break; 693 694 case FD_NotApplicable: 695 case FD_Unknown: 696 llvm_unreachable( 697 "forwardTree should never return FD_NotApplicable/FD_Unknown"); 698 } 699 } 700 701 auto ExecAction = [this, TargetStmt, UseInst]() { 702 // To ensure the right order, prepend this instruction before its 703 // operands. This ensures that its operands are inserted before the 704 // instruction using them. 705 TargetStmt->prependInstruction(UseInst); 706 NumInstructionsCopied++; 707 TotalInstructionsCopied++; 708 return true; 709 }; 710 return ForwardingAction::canForward(ExecAction, Depends, true); 711 } 712 713 /// Determines whether an operand tree can be forwarded and returns 714 /// instructions how to do so in the form of a ForwardingAction object. 715 /// 716 /// @param TargetStmt The statement the operand tree will be copied to. 717 /// @param UseVal The value (usually an instruction) which is root of an 718 /// operand tree. 719 /// @param UseStmt The statement that uses @p UseVal. 720 /// @param UseLoop The loop @p UseVal is used in. 721 /// 722 /// @return A ForwardingAction object describing the feasibility and 723 /// profitability evaluation and the callback carrying-out the value 724 /// forwarding. 725 ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal, 726 ScopStmt *UseStmt, Loop *UseLoop) { 727 ScopStmt *DefStmt = nullptr; 728 Loop *DefLoop = nullptr; 729 730 // { DefDomain[] -> TargetDomain[] } 731 isl::map DefToTarget; 732 733 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 734 switch (VUse.getKind()) { 735 case VirtualUse::Constant: 736 case VirtualUse::Block: 737 case VirtualUse::Hoisted: 738 // These can be used anywhere without special considerations. 739 return ForwardingAction::triviallyForwardable(false); 740 741 case VirtualUse::Synthesizable: { 742 // Check if the value is synthesizable at the new location as well. This 743 // might be possible when leaving a loop for which ScalarEvolution is 744 // unable to derive the exit value for. 745 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 746 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 747 // do not forward past its loop header. This would require us to use a 748 // previous loop induction variable instead the current one. We currently 749 // do not allow forwarding PHI nodes, thus this should never occur (the 750 // only exception where no phi is necessary being an unreachable loop 751 // without edge from the outside). 752 VirtualUse TargetUse = VirtualUse::create( 753 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 754 if (TargetUse.getKind() == VirtualUse::Synthesizable) 755 return ForwardingAction::triviallyForwardable(false); 756 757 LLVM_DEBUG( 758 dbgs() << " Synthesizable would not be synthesizable anymore: " 759 << *UseVal << "\n"); 760 return ForwardingAction::cannotForward(); 761 } 762 763 case VirtualUse::ReadOnly: { 764 // If we model read-only scalars, we need to create a MemoryAccess for it. 765 auto ExecAction = [this, TargetStmt, UseVal]() { 766 if (ModelReadOnlyScalars) 767 TargetStmt->ensureValueRead(UseVal); 768 769 NumReadOnlyCopied++; 770 TotalReadOnlyCopied++; 771 772 // Note that we cannot return true here. With a operand tree 773 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 774 // With -polly-analyze-read-only-scalars=true we would ensure the 775 // existence of a MemoryAccess (which already exists for a leaf) and be 776 // removed again by tryForwardTree because it's goal is to remove this 777 // scalar MemoryAccess. It interprets FD_CanForwardTree as the 778 // permission to do so. 779 return false; 780 }; 781 return ForwardingAction::canForward(ExecAction, {}, false); 782 } 783 784 case VirtualUse::Intra: 785 // Knowing that UseStmt and DefStmt are the same statement instance, just 786 // reuse the information about UseStmt for DefStmt 787 DefStmt = UseStmt; 788 789 LLVM_FALLTHROUGH; 790 case VirtualUse::Inter: 791 Instruction *Inst = cast<Instruction>(UseVal); 792 793 if (!DefStmt) { 794 DefStmt = S->getStmtFor(Inst); 795 if (!DefStmt) 796 return ForwardingAction::cannotForward(); 797 } 798 799 DefLoop = LI->getLoopFor(Inst->getParent()); 800 801 ForwardingAction SpeculativeResult = 802 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop); 803 if (SpeculativeResult.Decision != FD_NotApplicable) 804 return SpeculativeResult; 805 806 ForwardingAction KnownResult = forwardKnownLoad( 807 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); 808 if (KnownResult.Decision != FD_NotApplicable) 809 return KnownResult; 810 811 ForwardingAction ReloadResult = reloadKnownContent( 812 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); 813 if (ReloadResult.Decision != FD_NotApplicable) 814 return ReloadResult; 815 816 // When no method is found to forward the operand tree, we effectively 817 // cannot handle it. 818 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 819 return ForwardingAction::cannotForward(); 820 } 821 822 llvm_unreachable("Case unhandled"); 823 } 824 825 /// Determines whether an operand tree can be forwarded. Previous evaluations 826 /// are cached. 827 /// 828 /// @param TargetStmt The statement the operand tree will be copied to. 829 /// @param UseVal The value (usually an instruction) which is root of an 830 /// operand tree. 831 /// @param UseStmt The statement that uses @p UseVal. 832 /// @param UseLoop The loop @p UseVal is used in. 833 /// 834 /// @return FD_CannotForward if @p UseVal cannot be forwarded. 835 /// FD_CanForwardLeaf if @p UseVal is forwardable, but not 836 /// profitable. 837 /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to 838 /// do. 839 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 840 ScopStmt *UseStmt, Loop *UseLoop) { 841 // Lookup any cached evaluation. 842 auto It = ForwardingActions.find({UseVal, UseStmt}); 843 if (It != ForwardingActions.end()) 844 return It->second.Decision; 845 846 // Make a new evaluation. 847 ForwardingAction Action = 848 forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop); 849 ForwardingDecision Result = Action.Decision; 850 851 // Remember for the next time. 852 assert(!ForwardingActions.count({UseVal, UseStmt}) && 853 "circular dependency?"); 854 ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)}); 855 856 return Result; 857 } 858 859 /// Forward an operand tree using cached actions. 860 /// 861 /// @param Stmt Statement the operand tree is moved into. 862 /// @param UseVal Root of the operand tree within @p Stmt. 863 /// @param RA The MemoryAccess for @p UseVal that the forwarding intends 864 /// to remove. 865 void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) { 866 using ChildItTy = 867 decltype(std::declval<ForwardingAction>().Depends.begin()); 868 using EdgeTy = std::pair<ForwardingAction *, ChildItTy>; 869 870 DenseSet<ForwardingAction::KeyTy> Visited; 871 SmallVector<EdgeTy, 32> Stack; 872 SmallVector<ForwardingAction *, 32> Ordered; 873 874 // Seed the tree search using the root value. 875 assert(ForwardingActions.count({UseVal, Stmt})); 876 ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}]; 877 Stack.emplace_back(RootAction, RootAction->Depends.begin()); 878 879 // Compute the postorder of the operand tree: all operands of an instruction 880 // must be visited before the instruction itself. As an additional 881 // requirement, the topological ordering must be 'compact': Any subtree node 882 // must not be interleaved with nodes from a non-shared subtree. This is 883 // because the same llvm::Instruction can be materialized multiple times as 884 // used at different ScopStmts which might be different values. Intersecting 885 // these lifetimes may result in miscompilations. 886 // FIXME: Intersecting lifetimes might still be possible for the roots 887 // themselves, since instructions are just prepended to a ScopStmt's 888 // instruction list. 889 while (!Stack.empty()) { 890 EdgeTy &Top = Stack.back(); 891 ForwardingAction *TopAction = Top.first; 892 ChildItTy &TopEdge = Top.second; 893 894 if (TopEdge == TopAction->Depends.end()) { 895 // Postorder sorting 896 Ordered.push_back(TopAction); 897 Stack.pop_back(); 898 continue; 899 } 900 ForwardingAction::KeyTy Key = *TopEdge; 901 902 // Next edge for this level 903 ++TopEdge; 904 905 auto VisitIt = Visited.insert(Key); 906 if (!VisitIt.second) 907 continue; 908 909 assert(ForwardingActions.count(Key) && 910 "Must not insert new actions during execution phase"); 911 ForwardingAction *ChildAction = &ForwardingActions[Key]; 912 Stack.emplace_back(ChildAction, ChildAction->Depends.begin()); 913 } 914 915 // Actually, we need the reverse postorder because actions prepend new 916 // instructions. Therefore, the first one will always be the action for the 917 // operand tree's root. 918 assert(Ordered.back() == RootAction); 919 if (RootAction->Execute()) 920 Stmt->removeSingleMemoryAccess(RA); 921 Ordered.pop_back(); 922 for (auto DepAction : reverse(Ordered)) { 923 assert(DepAction->Decision != FD_Unknown && 924 DepAction->Decision != FD_CannotForward); 925 assert(DepAction != RootAction); 926 DepAction->Execute(); 927 } 928 } 929 930 /// Try to forward an operand tree rooted in @p RA. 931 bool tryForwardTree(MemoryAccess *RA) { 932 assert(RA->isLatestScalarKind()); 933 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 934 935 ScopStmt *Stmt = RA->getStatement(); 936 Loop *InLoop = Stmt->getSurroundingLoop(); 937 938 isl::map TargetToUse; 939 if (!Known.is_null()) { 940 isl::space DomSpace = Stmt->getDomainSpace(); 941 TargetToUse = 942 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 943 } 944 945 ForwardingDecision Assessment = 946 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop); 947 948 // If considered feasible and profitable, forward it. 949 bool Changed = false; 950 if (Assessment == FD_CanForwardProfitably) { 951 applyForwardingActions(Stmt, RA->getAccessValue(), RA); 952 Changed = true; 953 } 954 955 ForwardingActions.clear(); 956 return Changed; 957 } 958 959 /// Return which SCoP this instance is processing. 960 Scop *getScop() const { return S; } 961 962 /// Run the algorithm: Use value read accesses as operand tree roots and try 963 /// to forward them into the statement. 964 bool forwardOperandTrees() { 965 for (ScopStmt &Stmt : *S) { 966 bool StmtModified = false; 967 968 // Because we are modifying the MemoryAccess list, collect them first to 969 // avoid iterator invalidation. 970 SmallVector<MemoryAccess *, 16> Accs; 971 for (MemoryAccess *RA : Stmt) { 972 if (!RA->isRead()) 973 continue; 974 if (!RA->isLatestScalarKind()) 975 continue; 976 977 Accs.push_back(RA); 978 } 979 980 for (MemoryAccess *RA : Accs) { 981 if (tryForwardTree(RA)) { 982 Modified = true; 983 StmtModified = true; 984 NumForwardedTrees++; 985 TotalForwardedTrees++; 986 } 987 } 988 989 if (StmtModified) { 990 NumModifiedStmts++; 991 TotalModifiedStmts++; 992 } 993 } 994 995 if (Modified) { 996 ScopsModified++; 997 S->realignParams(); 998 } 999 return Modified; 1000 } 1001 1002 /// Print the pass result, performed transformations and the SCoP after the 1003 /// transformation. 1004 void print(raw_ostream &OS, int Indent = 0) { 1005 printStatistics(OS, Indent); 1006 1007 if (!Modified) { 1008 // This line can easily be checked in regression tests. 1009 OS << "ForwardOpTree executed, but did not modify anything\n"; 1010 return; 1011 } 1012 1013 printStatements(OS, Indent); 1014 } 1015 }; 1016 1017 /// Pass that redirects scalar reads to array elements that are known to contain 1018 /// the same value. 1019 /// 1020 /// This reduces the number of scalar accesses and therefore potentially 1021 /// increases the freedom of the scheduler. In the ideal case, all reads of a 1022 /// scalar definition are redirected (We currently do not care about removing 1023 /// the write in this case). This is also useful for the main DeLICM pass as 1024 /// there are less scalars to be mapped. 1025 class ForwardOpTree : public ScopPass { 1026 private: 1027 /// The pass implementation, also holding per-scop data. 1028 std::unique_ptr<ForwardOpTreeImpl> Impl; 1029 1030 public: 1031 static char ID; 1032 1033 explicit ForwardOpTree() : ScopPass(ID) {} 1034 ForwardOpTree(const ForwardOpTree &) = delete; 1035 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 1036 1037 void getAnalysisUsage(AnalysisUsage &AU) const override { 1038 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1039 AU.addRequired<LoopInfoWrapperPass>(); 1040 AU.setPreservesAll(); 1041 } 1042 1043 bool runOnScop(Scop &S) override { 1044 // Free resources for previous SCoP's computation, if not yet done. 1045 releaseMemory(); 1046 1047 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1048 1049 { 1050 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 1051 Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 1052 1053 if (AnalyzeKnown) { 1054 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 1055 Impl->computeKnownValues(); 1056 } 1057 1058 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 1059 Impl->forwardOperandTrees(); 1060 1061 if (MaxOpGuard.hasQuotaExceeded()) { 1062 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 1063 "max_operations exceeded\n"); 1064 KnownOutOfQuota++; 1065 } 1066 } 1067 1068 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1069 LLVM_DEBUG(dbgs() << S); 1070 1071 // Update statistics 1072 auto ScopStats = S.getStatistics(); 1073 NumValueWrites += ScopStats.NumValueWrites; 1074 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1075 NumPHIWrites += ScopStats.NumPHIWrites; 1076 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1077 NumSingletonWrites += ScopStats.NumSingletonWrites; 1078 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1079 1080 return false; 1081 } 1082 1083 void printScop(raw_ostream &OS, Scop &S) const override { 1084 if (!Impl) 1085 return; 1086 1087 assert(Impl->getScop() == &S); 1088 Impl->print(OS); 1089 } 1090 1091 void releaseMemory() override { Impl.reset(); } 1092 }; // class ForwardOpTree 1093 1094 char ForwardOpTree::ID; 1095 } // namespace 1096 1097 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 1098 1099 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 1100 "Polly - Forward operand tree", false, false) 1101 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1102 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 1103 "Polly - Forward operand tree", false, false) 1104