1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Move instructions between statements. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/ForwardOpTree.h" 14 #include "polly/Options.h" 15 #include "polly/ScopBuilder.h" 16 #include "polly/ScopInfo.h" 17 #include "polly/ScopPass.h" 18 #include "polly/Support/GICHelper.h" 19 #include "polly/Support/ISLOStream.h" 20 #include "polly/Support/ISLTools.h" 21 #include "polly/Support/VirtualInstruction.h" 22 #include "polly/ZoneAlgo.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/LoopInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Value.h" 31 #include "llvm/InitializePasses.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "isl/ctx.h" 39 #include "isl/isl-noexceptions.h" 40 #include <cassert> 41 #include <memory> 42 43 #define DEBUG_TYPE "polly-optree" 44 45 using namespace llvm; 46 using namespace polly; 47 48 static cl::opt<bool> 49 AnalyzeKnown("polly-optree-analyze-known", 50 cl::desc("Analyze array contents for load forwarding"), 51 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 52 53 static cl::opt<bool> 54 NormalizePHIs("polly-optree-normalize-phi", 55 cl::desc("Replace PHIs by their incoming values"), 56 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 57 58 static cl::opt<unsigned> 59 MaxOps("polly-optree-max-ops", 60 cl::desc("Maximum number of ISL operations to invest for known " 61 "analysis; 0=no limit"), 62 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 63 64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 65 STATISTIC(KnownOutOfQuota, 66 "Analyses aborted because max_operations was reached"); 67 68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 69 STATISTIC(TotalKnownLoadsForwarded, 70 "Number of forwarded loads because their value was known"); 71 STATISTIC(TotalReloads, "Number of reloaded values"); 72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 74 STATISTIC(TotalModifiedStmts, 75 "Number of statements with at least one forwarded tree"); 76 77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 78 79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 80 STATISTIC(NumValueWritesInLoops, 81 "Number of scalar value writes nested in affine loops after OpTree"); 82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 83 STATISTIC(NumPHIWritesInLoops, 84 "Number of scalar phi writes nested in affine loops after OpTree"); 85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 86 STATISTIC(NumSingletonWritesInLoops, 87 "Number of singleton writes nested in affine loops after OpTree"); 88 89 namespace { 90 91 /// The state of whether an operand tree was/can be forwarded. 92 /// 93 /// The items apply to an instructions and its operand tree with the instruction 94 /// as the root element. If the value in question is not an instruction in the 95 /// SCoP, it can be a leaf of an instruction's operand tree. 96 enum ForwardingDecision { 97 /// An uninitialized value. 98 FD_Unknown, 99 100 /// The root instruction or value cannot be forwarded at all. 101 FD_CannotForward, 102 103 /// The root instruction or value can be forwarded as a leaf of a larger 104 /// operand tree. 105 /// It does not make sense to move the value itself, it would just replace it 106 /// by a use of itself. For instance, a constant "5" used in a statement can 107 /// be forwarded, but it would just replace it by the same constant "5". 108 /// However, it makes sense to move as an operand of 109 /// 110 /// %add = add 5, 5 111 /// 112 /// where "5" is moved as part of a larger operand tree. "5" would be placed 113 /// (disregarding for a moment that literal constants don't have a location 114 /// and can be used anywhere) into the same statement as %add would. 115 FD_CanForwardLeaf, 116 117 /// The root instruction can be forwarded and doing so avoids a scalar 118 /// dependency. 119 /// 120 /// This can be either because the operand tree can be moved to the target 121 /// statement, or a memory access is redirected to read from a different 122 /// location. 123 FD_CanForwardProfitably, 124 125 /// A forwarding method cannot be applied to the operand tree. 126 /// The difference to FD_CannotForward is that there might be other methods 127 /// that can handle it. 128 FD_NotApplicable 129 }; 130 131 /// Represents the evaluation of and action to taken when forwarding a value 132 /// from an operand tree. 133 struct ForwardingAction { 134 using KeyTy = std::pair<Value *, ScopStmt *>; 135 136 /// Evaluation of forwarding a value. 137 ForwardingDecision Decision = FD_Unknown; 138 139 /// Callback to execute the forwarding. 140 /// Returning true allows deleting the polly::MemoryAccess if the value is the 141 /// root of the operand tree (and its elimination the reason why the 142 /// forwarding is done). Return false if the MemoryAccess is reused or there 143 /// might be other users of the read accesses. In the letter case the 144 /// polly::SimplifyPass can remove dead MemoryAccesses. 145 std::function<bool()> Execute = []() -> bool { 146 llvm_unreachable("unspecified how to forward"); 147 }; 148 149 /// Other values that need to be forwarded if this action is executed. Their 150 /// actions are executed after this one. 151 SmallVector<KeyTy, 4> Depends; 152 153 /// Named ctor: The method creating this object does not apply to the kind of 154 /// value, but other methods may. 155 static ForwardingAction notApplicable() { 156 ForwardingAction Result; 157 Result.Decision = FD_NotApplicable; 158 return Result; 159 } 160 161 /// Named ctor: The value cannot be forwarded. 162 static ForwardingAction cannotForward() { 163 ForwardingAction Result; 164 Result.Decision = FD_CannotForward; 165 return Result; 166 } 167 168 /// Named ctor: The value can just be used without any preparation. 169 static ForwardingAction triviallyForwardable(bool IsProfitable, Value *Val) { 170 ForwardingAction Result; 171 Result.Decision = 172 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; 173 Result.Execute = [=]() { 174 LLVM_DEBUG(dbgs() << " trivially forwarded: " << *Val << "\n"); 175 return true; 176 }; 177 return Result; 178 } 179 180 /// Name ctor: The value can be forwarded by executing an action. 181 static ForwardingAction canForward(std::function<bool()> Execute, 182 ArrayRef<KeyTy> Depends, 183 bool IsProfitable) { 184 ForwardingAction Result; 185 Result.Decision = 186 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; 187 Result.Execute = std::move(Execute); 188 Result.Depends.append(Depends.begin(), Depends.end()); 189 return Result; 190 } 191 }; 192 193 /// Implementation of operand tree forwarding for a specific SCoP. 194 /// 195 /// For a statement that requires a scalar value (through a value read 196 /// MemoryAccess), see if its operand can be moved into the statement. If so, 197 /// the MemoryAccess is removed and the all the operand tree instructions are 198 /// moved into the statement. All original instructions are left in the source 199 /// statements. The simplification pass can clean these up. 200 class ForwardOpTreeImpl : ZoneAlgorithm { 201 private: 202 using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>; 203 204 /// Scope guard to limit the number of isl operations for this pass. 205 IslMaxOperationsGuard &MaxOpGuard; 206 207 /// How many instructions have been copied to other statements. 208 int NumInstructionsCopied = 0; 209 210 /// Number of loads forwarded because their value was known. 211 int NumKnownLoadsForwarded = 0; 212 213 /// Number of values reloaded from known array elements. 214 int NumReloads = 0; 215 216 /// How many read-only accesses have been copied. 217 int NumReadOnlyCopied = 0; 218 219 /// How many operand trees have been forwarded. 220 int NumForwardedTrees = 0; 221 222 /// Number of statements with at least one forwarded operand tree. 223 int NumModifiedStmts = 0; 224 225 /// Whether we carried out at least one change to the SCoP. 226 bool Modified = false; 227 228 /// Cache of how to forward values. 229 /// The key of this map is the llvm::Value to be forwarded and the 230 /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value 231 /// can evaluate differently depending on where it is evaluate. For instance, 232 /// a synthesizable Scev represents a recurrence with an loop but the loop's 233 /// exit value if evaluated after the loop. 234 /// The cached results are only valid for the current TargetStmt. 235 /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the 236 /// exit value when instantiated outside of the loop. The primary concern is 237 /// ambiguity when crossing PHI nodes, which currently is not supported. 238 MemoizationTy ForwardingActions; 239 240 /// Contains the zones where array elements are known to contain a specific 241 /// value. 242 /// { [Element[] -> Zone[]] -> ValInst[] } 243 /// @see computeKnown() 244 isl::union_map Known; 245 246 /// Translator for newly introduced ValInsts to already existing ValInsts such 247 /// that new introduced load instructions can reuse the Known analysis of its 248 /// original load. { ValInst[] -> ValInst[] } 249 isl::union_map Translator; 250 251 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 252 /// 253 /// @param ValInst { Domain[] -> ValInst[] } 254 /// The values for which we search for alternative locations, 255 /// per statement instance. 256 /// 257 /// @return { Domain[] -> Element[] } 258 /// For each statement instance, the array elements that contain the 259 /// same ValInst. 260 isl::union_map findSameContentElements(isl::union_map ValInst) { 261 assert(!ValInst.is_single_valued().is_false()); 262 263 // { Domain[] } 264 isl::union_set Domain = ValInst.domain(); 265 266 // { Domain[] -> Scatter[] } 267 isl::union_map Schedule = getScatterFor(Domain); 268 269 // { Element[] -> [Scatter[] -> ValInst[]] } 270 isl::union_map MustKnownCurried = 271 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 272 273 // { [Domain[] -> ValInst[]] -> Scatter[] } 274 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 275 276 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 277 isl::union_map SchedValDomVal = 278 DomValSched.range_product(ValInst.range_map()).reverse(); 279 280 // { Element[] -> [Domain[] -> ValInst[]] } 281 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 282 283 // { Domain[] -> Element[] } 284 isl::union_map MustKnownMap = 285 MustKnownInst.uncurry().domain().unwrap().reverse(); 286 simplify(MustKnownMap); 287 288 return MustKnownMap; 289 } 290 291 /// Find a single array element for each statement instance, within a single 292 /// array. 293 /// 294 /// @param MustKnown { Domain[] -> Element[] } 295 /// Set of candidate array elements. 296 /// @param Domain { Domain[] } 297 /// The statement instance for which we need elements for. 298 /// 299 /// @return { Domain[] -> Element[] } 300 /// For each statement instance, an array element out of @p MustKnown. 301 /// All array elements must be in the same array (Polly does not yet 302 /// support reading from different accesses using the same 303 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 304 /// null. 305 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 306 // { Domain[] -> Element[] } 307 isl::map Result; 308 309 // MemoryAccesses can read only elements from a single array 310 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 311 // Look through all spaces until we find one that contains at least the 312 // wanted statement instance.s 313 for (isl::map Map : MustKnown.get_map_list()) { 314 // Get the array this is accessing. 315 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 316 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 317 318 // No support for generation of indirect array accesses. 319 if (SAI->getBasePtrOriginSAI()) 320 continue; 321 322 // Determine whether this map contains all wanted values. 323 isl::set MapDom = Map.domain(); 324 if (!Domain.is_subset(MapDom).is_true()) 325 continue; 326 327 // There might be multiple array elements that contain the same value, but 328 // choose only one of them. lexmin is used because it returns a one-value 329 // mapping, we do not care about which one. 330 // TODO: Get the simplest access function. 331 Result = Map.lexmin(); 332 break; 333 } 334 335 return Result; 336 } 337 338 public: 339 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 340 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 341 342 /// Compute the zones of known array element contents. 343 /// 344 /// @return True if the computed #Known is usable. 345 bool computeKnownValues() { 346 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 347 348 // Check that nothing strange occurs. 349 collectCompatibleElts(); 350 351 { 352 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 353 354 computeCommon(); 355 if (NormalizePHIs) 356 computeNormalizedPHIs(); 357 Known = computeKnown(true, true); 358 359 // Preexisting ValInsts use the known content analysis of themselves. 360 Translator = makeIdentityMap(Known.range(), false); 361 } 362 363 if (!Known || !Translator || !NormalizeMap) { 364 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 365 Known = nullptr; 366 Translator = nullptr; 367 NormalizeMap = nullptr; 368 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 369 return false; 370 } 371 372 KnownAnalyzed++; 373 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 374 375 return true; 376 } 377 378 void printStatistics(raw_ostream &OS, int Indent = 0) { 379 OS.indent(Indent) << "Statistics {\n"; 380 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 381 << '\n'; 382 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 383 << '\n'; 384 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 385 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 386 << '\n'; 387 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 388 << '\n'; 389 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 390 << NumModifiedStmts << '\n'; 391 OS.indent(Indent) << "}\n"; 392 } 393 394 void printStatements(raw_ostream &OS, int Indent = 0) const { 395 OS.indent(Indent) << "After statements {\n"; 396 for (auto &Stmt : *S) { 397 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 398 for (auto *MA : Stmt) 399 MA->print(OS); 400 401 OS.indent(Indent + 12); 402 Stmt.printInstructions(OS); 403 } 404 OS.indent(Indent) << "}\n"; 405 } 406 407 /// Create a new MemoryAccess of type read and MemoryKind::Array. 408 /// 409 /// @param Stmt The statement in which the access occurs. 410 /// @param LI The instruction that does the access. 411 /// @param AccessRelation The array element that each statement instance 412 /// accesses. 413 /// 414 /// @param The newly created access. 415 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 416 isl::map AccessRelation) { 417 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 418 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 419 420 // Create a dummy SCEV access, to be replaced anyway. 421 SmallVector<const SCEV *, 4> Sizes; 422 Sizes.reserve(SAI->getNumberOfDimensions()); 423 SmallVector<const SCEV *, 4> Subscripts; 424 Subscripts.reserve(SAI->getNumberOfDimensions()); 425 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 426 Sizes.push_back(SAI->getDimensionSize(i)); 427 Subscripts.push_back(nullptr); 428 } 429 430 MemoryAccess *Access = 431 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 432 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 433 S->addAccessFunction(Access); 434 Stmt->addAccess(Access, true); 435 436 Access->setNewAccessRelation(AccessRelation); 437 438 return Access; 439 } 440 441 /// Forward a load by reading from an array element that contains the same 442 /// value. Typically the location it was loaded from. 443 /// 444 /// @param TargetStmt The statement the operand tree will be copied to. 445 /// @param Inst The (possibly speculatable) instruction to forward. 446 /// @param UseStmt The statement that uses @p Inst. 447 /// @param UseLoop The loop @p Inst is used in. 448 /// @param DefStmt The statement @p Inst is defined in. 449 /// @param DefLoop The loop which contains @p Inst. 450 /// 451 /// @return A ForwardingAction object describing the feasibility and 452 /// profitability evaluation and the callback carrying-out the value 453 /// forwarding. 454 ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 455 ScopStmt *UseStmt, Loop *UseLoop, 456 ScopStmt *DefStmt, Loop *DefLoop) { 457 // Cannot do anything without successful known analysis. 458 if (Known.is_null() || Translator.is_null() || 459 MaxOpGuard.hasQuotaExceeded()) 460 return ForwardingAction::notApplicable(); 461 462 LoadInst *LI = dyn_cast<LoadInst>(Inst); 463 if (!LI) 464 return ForwardingAction::notApplicable(); 465 466 ForwardingDecision OpDecision = 467 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop); 468 switch (OpDecision) { 469 case FD_CanForwardProfitably: 470 case FD_CanForwardLeaf: 471 break; 472 case FD_CannotForward: 473 return ForwardingAction::cannotForward(); 474 default: 475 llvm_unreachable("Shouldn't return this"); 476 } 477 478 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 479 if (Access) { 480 // If the load is already in the statement, no forwarding is necessary. 481 // However, it might happen that the LoadInst is already present in the 482 // statement's instruction list. In that case we do as follows: 483 // - For the evaluation, we can trivially forward it as it is 484 // benefit of forwarding an already present instruction. 485 // - For the execution, prepend the instruction (to make it 486 // available to all instructions following in the instruction list), but 487 // do not add another MemoryAccess. 488 auto ExecAction = [this, TargetStmt, LI, Access]() -> bool { 489 TargetStmt->prependInstruction(LI); 490 LLVM_DEBUG( 491 dbgs() << " forwarded known load with preexisting MemoryAccess" 492 << Access << "\n"); 493 (void)Access; 494 495 NumKnownLoadsForwarded++; 496 TotalKnownLoadsForwarded++; 497 return true; 498 }; 499 return ForwardingAction::canForward( 500 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); 501 } 502 503 // Allow the following Isl calculations (until we return the 504 // ForwardingAction, excluding the code inside the lambda that will be 505 // executed later) to fail. 506 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 507 508 // { DomainDef[] -> ValInst[] } 509 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 510 assert(!isNormalized(ExpectedVal).is_false() && 511 "LoadInsts are always normalized"); 512 513 // { DomainUse[] -> DomainTarget[] } 514 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 515 516 // { DomainTarget[] -> ValInst[] } 517 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 518 isl::union_map TranslatedExpectedVal = 519 isl::union_map(TargetExpectedVal).apply_range(Translator); 520 521 // { DomainTarget[] -> Element[] } 522 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 523 524 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 525 if (!SameVal) 526 return ForwardingAction::notApplicable(); 527 528 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 529 << "\n"); 530 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 531 << "\n"); 532 533 // { ValInst[] } 534 isl::space ValInstSpace = ExpectedVal.get_space().range(); 535 536 // After adding a new load to the SCoP, also update the Known content 537 // about it. The new load will have a known ValInst of 538 // { [DomainTarget[] -> Value[]] } 539 // but which -- because it is a copy of it -- has same value as the 540 // { [DomainDef[] -> Value[]] } 541 // that it replicates. Instead of cloning the known content of 542 // [DomainDef[] -> Value[]] 543 // for DomainTarget[], we add a 'translator' that maps 544 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 545 // before comparing to the known content. 546 // TODO: 'Translator' could also be used to map PHINodes to their incoming 547 // ValInsts. 548 isl::map LocalTranslator; 549 if (!ValInstSpace.is_wrapping().is_false()) { 550 // { DefDomain[] -> Value[] } 551 isl::map ValInsts = ExpectedVal.range().unwrap(); 552 553 // { DefDomain[] } 554 isl::set DefDomain = ValInsts.domain(); 555 556 // { Value[] } 557 isl::space ValSpace = ValInstSpace.unwrap().range(); 558 559 // { Value[] -> Value[] } 560 isl::map ValToVal = 561 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 562 563 // { DomainDef[] -> DomainTarget[] } 564 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); 565 566 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 567 LocalTranslator = DefToTarget.reverse().product(ValToVal); 568 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 569 << "\n"); 570 571 if (!LocalTranslator) 572 return ForwardingAction::notApplicable(); 573 } 574 575 auto ExecAction = [this, TargetStmt, LI, SameVal, 576 LocalTranslator]() -> bool { 577 TargetStmt->prependInstruction(LI); 578 MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 579 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 580 << Access << "\n"); 581 (void)Access; 582 583 if (LocalTranslator) 584 Translator = Translator.add_map(LocalTranslator); 585 586 NumKnownLoadsForwarded++; 587 TotalKnownLoadsForwarded++; 588 return true; 589 }; 590 return ForwardingAction::canForward( 591 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); 592 } 593 594 /// Forward a scalar by redirecting the access to an array element that stores 595 /// the same value. 596 /// 597 /// @param TargetStmt The statement the operand tree will be copied to. 598 /// @param Inst The scalar to forward. 599 /// @param UseStmt The statement that uses @p Inst. 600 /// @param UseLoop The loop @p Inst is used in. 601 /// @param DefStmt The statement @p Inst is defined in. 602 /// @param DefLoop The loop which contains @p Inst. 603 /// 604 /// @return A ForwardingAction object describing the feasibility and 605 /// profitability evaluation and the callback carrying-out the value 606 /// forwarding. 607 ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 608 ScopStmt *UseStmt, Loop *UseLoop, 609 ScopStmt *DefStmt, Loop *DefLoop) { 610 // Cannot do anything without successful known analysis. 611 if (Known.is_null() || Translator.is_null() || 612 MaxOpGuard.hasQuotaExceeded()) 613 return ForwardingAction::notApplicable(); 614 615 // Don't spend too much time analyzing whether it can be reloaded. 616 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 617 618 // { DomainDef[] -> ValInst[] } 619 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 620 621 // { DomainUse[] -> DomainTarget[] } 622 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 623 624 // { DomainTarget[] -> ValInst[] } 625 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 626 isl::union_map TranslatedExpectedVal = 627 TargetExpectedVal.apply_range(Translator); 628 629 // { DomainTarget[] -> Element[] } 630 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 631 632 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 633 simplify(SameVal); 634 if (!SameVal) 635 return ForwardingAction::notApplicable(); 636 637 auto ExecAction = [this, TargetStmt, Inst, SameVal]() { 638 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 639 if (!Access) 640 Access = TargetStmt->ensureValueRead(Inst); 641 Access->setNewAccessRelation(SameVal); 642 643 LLVM_DEBUG(dbgs() << " forwarded known content of " << *Inst 644 << " which is " << SameVal << "\n"); 645 TotalReloads++; 646 NumReloads++; 647 return false; 648 }; 649 650 return ForwardingAction::canForward(ExecAction, {}, true); 651 } 652 653 /// Forwards a speculatively executable instruction. 654 /// 655 /// @param TargetStmt The statement the operand tree will be copied to. 656 /// @param UseInst The (possibly speculatable) instruction to forward. 657 /// @param DefStmt The statement @p UseInst is defined in. 658 /// @param DefLoop The loop which contains @p UseInst. 659 /// 660 /// @return A ForwardingAction object describing the feasibility and 661 /// profitability evaluation and the callback carrying-out the value 662 /// forwarding. 663 ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt, 664 Instruction *UseInst, ScopStmt *DefStmt, 665 Loop *DefLoop) { 666 // PHIs, unless synthesizable, are not yet supported. 667 if (isa<PHINode>(UseInst)) 668 return ForwardingAction::notApplicable(); 669 670 // Compatible instructions must satisfy the following conditions: 671 // 1. Idempotent (instruction will be copied, not moved; although its 672 // original instance might be removed by simplification) 673 // 2. Not access memory (There might be memory writes between) 674 // 3. Not cause undefined behaviour (we might copy to a location when the 675 // original instruction was no executed; this is currently not possible 676 // because we do not forward PHINodes) 677 // 4. Not leak memory if executed multiple times (i.e. malloc) 678 // 679 // Instruction::mayHaveSideEffects is not sufficient because it considers 680 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 681 // not sufficient because it allows memory accesses. 682 if (mayBeMemoryDependent(*UseInst)) 683 return ForwardingAction::notApplicable(); 684 685 SmallVector<ForwardingAction::KeyTy, 4> Depends; 686 Depends.reserve(UseInst->getNumOperands()); 687 for (Value *OpVal : UseInst->operand_values()) { 688 ForwardingDecision OpDecision = 689 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop); 690 switch (OpDecision) { 691 case FD_CannotForward: 692 return ForwardingAction::cannotForward(); 693 694 case FD_CanForwardLeaf: 695 case FD_CanForwardProfitably: 696 Depends.emplace_back(OpVal, DefStmt); 697 break; 698 699 case FD_NotApplicable: 700 case FD_Unknown: 701 llvm_unreachable( 702 "forwardTree should never return FD_NotApplicable/FD_Unknown"); 703 } 704 } 705 706 auto ExecAction = [this, TargetStmt, UseInst]() { 707 // To ensure the right order, prepend this instruction before its 708 // operands. This ensures that its operands are inserted before the 709 // instruction using them. 710 TargetStmt->prependInstruction(UseInst); 711 712 LLVM_DEBUG(dbgs() << " forwarded speculable instruction: " << *UseInst 713 << "\n"); 714 NumInstructionsCopied++; 715 TotalInstructionsCopied++; 716 return true; 717 }; 718 return ForwardingAction::canForward(ExecAction, Depends, true); 719 } 720 721 /// Determines whether an operand tree can be forwarded and returns 722 /// instructions how to do so in the form of a ForwardingAction object. 723 /// 724 /// @param TargetStmt The statement the operand tree will be copied to. 725 /// @param UseVal The value (usually an instruction) which is root of an 726 /// operand tree. 727 /// @param UseStmt The statement that uses @p UseVal. 728 /// @param UseLoop The loop @p UseVal is used in. 729 /// 730 /// @return A ForwardingAction object describing the feasibility and 731 /// profitability evaluation and the callback carrying-out the value 732 /// forwarding. 733 ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal, 734 ScopStmt *UseStmt, Loop *UseLoop) { 735 ScopStmt *DefStmt = nullptr; 736 Loop *DefLoop = nullptr; 737 738 // { DefDomain[] -> TargetDomain[] } 739 isl::map DefToTarget; 740 741 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 742 switch (VUse.getKind()) { 743 case VirtualUse::Constant: 744 case VirtualUse::Block: 745 case VirtualUse::Hoisted: 746 // These can be used anywhere without special considerations. 747 return ForwardingAction::triviallyForwardable(false, UseVal); 748 749 case VirtualUse::Synthesizable: { 750 // Check if the value is synthesizable at the new location as well. This 751 // might be possible when leaving a loop for which ScalarEvolution is 752 // unable to derive the exit value for. 753 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 754 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 755 // do not forward past its loop header. This would require us to use a 756 // previous loop induction variable instead the current one. We currently 757 // do not allow forwarding PHI nodes, thus this should never occur (the 758 // only exception where no phi is necessary being an unreachable loop 759 // without edge from the outside). 760 VirtualUse TargetUse = VirtualUse::create( 761 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 762 if (TargetUse.getKind() == VirtualUse::Synthesizable) 763 return ForwardingAction::triviallyForwardable(false, UseVal); 764 765 LLVM_DEBUG( 766 dbgs() << " Synthesizable would not be synthesizable anymore: " 767 << *UseVal << "\n"); 768 return ForwardingAction::cannotForward(); 769 } 770 771 case VirtualUse::ReadOnly: { 772 if (!ModelReadOnlyScalars) 773 return ForwardingAction::triviallyForwardable(false, UseVal); 774 775 // If we model read-only scalars, we need to create a MemoryAccess for it. 776 auto ExecAction = [this, TargetStmt, UseVal]() { 777 TargetStmt->ensureValueRead(UseVal); 778 779 LLVM_DEBUG(dbgs() << " forwarded read-only value " << *UseVal 780 << "\n"); 781 NumReadOnlyCopied++; 782 TotalReadOnlyCopied++; 783 784 // Note that we cannot return true here. With a operand tree 785 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 786 // With -polly-analyze-read-only-scalars=true we would ensure the 787 // existence of a MemoryAccess (which already exists for a leaf) and be 788 // removed again by tryForwardTree because it's goal is to remove this 789 // scalar MemoryAccess. It interprets FD_CanForwardTree as the 790 // permission to do so. 791 return false; 792 }; 793 return ForwardingAction::canForward(ExecAction, {}, false); 794 } 795 796 case VirtualUse::Intra: 797 // Knowing that UseStmt and DefStmt are the same statement instance, just 798 // reuse the information about UseStmt for DefStmt 799 DefStmt = UseStmt; 800 801 LLVM_FALLTHROUGH; 802 case VirtualUse::Inter: 803 Instruction *Inst = cast<Instruction>(UseVal); 804 805 if (!DefStmt) { 806 DefStmt = S->getStmtFor(Inst); 807 if (!DefStmt) 808 return ForwardingAction::cannotForward(); 809 } 810 811 DefLoop = LI->getLoopFor(Inst->getParent()); 812 813 ForwardingAction SpeculativeResult = 814 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop); 815 if (SpeculativeResult.Decision != FD_NotApplicable) 816 return SpeculativeResult; 817 818 ForwardingAction KnownResult = forwardKnownLoad( 819 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); 820 if (KnownResult.Decision != FD_NotApplicable) 821 return KnownResult; 822 823 ForwardingAction ReloadResult = reloadKnownContent( 824 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); 825 if (ReloadResult.Decision != FD_NotApplicable) 826 return ReloadResult; 827 828 // When no method is found to forward the operand tree, we effectively 829 // cannot handle it. 830 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 831 return ForwardingAction::cannotForward(); 832 } 833 834 llvm_unreachable("Case unhandled"); 835 } 836 837 /// Determines whether an operand tree can be forwarded. Previous evaluations 838 /// are cached. 839 /// 840 /// @param TargetStmt The statement the operand tree will be copied to. 841 /// @param UseVal The value (usually an instruction) which is root of an 842 /// operand tree. 843 /// @param UseStmt The statement that uses @p UseVal. 844 /// @param UseLoop The loop @p UseVal is used in. 845 /// 846 /// @return FD_CannotForward if @p UseVal cannot be forwarded. 847 /// FD_CanForwardLeaf if @p UseVal is forwardable, but not 848 /// profitable. 849 /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to 850 /// do. 851 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 852 ScopStmt *UseStmt, Loop *UseLoop) { 853 // Lookup any cached evaluation. 854 auto It = ForwardingActions.find({UseVal, UseStmt}); 855 if (It != ForwardingActions.end()) 856 return It->second.Decision; 857 858 // Make a new evaluation. 859 ForwardingAction Action = 860 forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop); 861 ForwardingDecision Result = Action.Decision; 862 863 // Remember for the next time. 864 assert(!ForwardingActions.count({UseVal, UseStmt}) && 865 "circular dependency?"); 866 ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)}); 867 868 return Result; 869 } 870 871 /// Forward an operand tree using cached actions. 872 /// 873 /// @param Stmt Statement the operand tree is moved into. 874 /// @param UseVal Root of the operand tree within @p Stmt. 875 /// @param RA The MemoryAccess for @p UseVal that the forwarding intends 876 /// to remove. 877 void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) { 878 using ChildItTy = 879 decltype(std::declval<ForwardingAction>().Depends.begin()); 880 using EdgeTy = std::pair<ForwardingAction *, ChildItTy>; 881 882 DenseSet<ForwardingAction::KeyTy> Visited; 883 SmallVector<EdgeTy, 32> Stack; 884 SmallVector<ForwardingAction *, 32> Ordered; 885 886 // Seed the tree search using the root value. 887 assert(ForwardingActions.count({UseVal, Stmt})); 888 ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}]; 889 Stack.emplace_back(RootAction, RootAction->Depends.begin()); 890 891 // Compute the postorder of the operand tree: all operands of an instruction 892 // must be visited before the instruction itself. As an additional 893 // requirement, the topological ordering must be 'compact': Any subtree node 894 // must not be interleaved with nodes from a non-shared subtree. This is 895 // because the same llvm::Instruction can be materialized multiple times as 896 // used at different ScopStmts which might be different values. Intersecting 897 // these lifetimes may result in miscompilations. 898 // FIXME: Intersecting lifetimes might still be possible for the roots 899 // themselves, since instructions are just prepended to a ScopStmt's 900 // instruction list. 901 while (!Stack.empty()) { 902 EdgeTy &Top = Stack.back(); 903 ForwardingAction *TopAction = Top.first; 904 ChildItTy &TopEdge = Top.second; 905 906 if (TopEdge == TopAction->Depends.end()) { 907 // Postorder sorting 908 Ordered.push_back(TopAction); 909 Stack.pop_back(); 910 continue; 911 } 912 ForwardingAction::KeyTy Key = *TopEdge; 913 914 // Next edge for this level 915 ++TopEdge; 916 917 auto VisitIt = Visited.insert(Key); 918 if (!VisitIt.second) 919 continue; 920 921 assert(ForwardingActions.count(Key) && 922 "Must not insert new actions during execution phase"); 923 ForwardingAction *ChildAction = &ForwardingActions[Key]; 924 Stack.emplace_back(ChildAction, ChildAction->Depends.begin()); 925 } 926 927 // Actually, we need the reverse postorder because actions prepend new 928 // instructions. Therefore, the first one will always be the action for the 929 // operand tree's root. 930 assert(Ordered.back() == RootAction); 931 if (RootAction->Execute()) 932 Stmt->removeSingleMemoryAccess(RA); 933 Ordered.pop_back(); 934 for (auto DepAction : reverse(Ordered)) { 935 assert(DepAction->Decision != FD_Unknown && 936 DepAction->Decision != FD_CannotForward); 937 assert(DepAction != RootAction); 938 DepAction->Execute(); 939 } 940 } 941 942 /// Try to forward an operand tree rooted in @p RA. 943 bool tryForwardTree(MemoryAccess *RA) { 944 assert(RA->isLatestScalarKind()); 945 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 946 947 ScopStmt *Stmt = RA->getStatement(); 948 Loop *InLoop = Stmt->getSurroundingLoop(); 949 950 isl::map TargetToUse; 951 if (!Known.is_null()) { 952 isl::space DomSpace = Stmt->getDomainSpace(); 953 TargetToUse = 954 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 955 } 956 957 ForwardingDecision Assessment = 958 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop); 959 960 // If considered feasible and profitable, forward it. 961 bool Changed = false; 962 if (Assessment == FD_CanForwardProfitably) { 963 applyForwardingActions(Stmt, RA->getAccessValue(), RA); 964 Changed = true; 965 } 966 967 ForwardingActions.clear(); 968 return Changed; 969 } 970 971 /// Return which SCoP this instance is processing. 972 Scop *getScop() const { return S; } 973 974 /// Run the algorithm: Use value read accesses as operand tree roots and try 975 /// to forward them into the statement. 976 bool forwardOperandTrees() { 977 for (ScopStmt &Stmt : *S) { 978 bool StmtModified = false; 979 980 // Because we are modifying the MemoryAccess list, collect them first to 981 // avoid iterator invalidation. 982 SmallVector<MemoryAccess *, 16> Accs(Stmt.begin(), Stmt.end()); 983 984 for (MemoryAccess *RA : Accs) { 985 if (!RA->isRead()) 986 continue; 987 if (!RA->isLatestScalarKind()) 988 continue; 989 990 if (tryForwardTree(RA)) { 991 Modified = true; 992 StmtModified = true; 993 NumForwardedTrees++; 994 TotalForwardedTrees++; 995 } 996 } 997 998 if (StmtModified) { 999 NumModifiedStmts++; 1000 TotalModifiedStmts++; 1001 } 1002 } 1003 1004 if (Modified) { 1005 ScopsModified++; 1006 S->realignParams(); 1007 } 1008 return Modified; 1009 } 1010 1011 /// Print the pass result, performed transformations and the SCoP after the 1012 /// transformation. 1013 void print(raw_ostream &OS, int Indent = 0) { 1014 printStatistics(OS, Indent); 1015 1016 if (!Modified) { 1017 // This line can easily be checked in regression tests. 1018 OS << "ForwardOpTree executed, but did not modify anything\n"; 1019 return; 1020 } 1021 1022 printStatements(OS, Indent); 1023 } 1024 }; 1025 1026 /// Pass that redirects scalar reads to array elements that are known to contain 1027 /// the same value. 1028 /// 1029 /// This reduces the number of scalar accesses and therefore potentially 1030 /// increases the freedom of the scheduler. In the ideal case, all reads of a 1031 /// scalar definition are redirected (We currently do not care about removing 1032 /// the write in this case). This is also useful for the main DeLICM pass as 1033 /// there are less scalars to be mapped. 1034 class ForwardOpTree : public ScopPass { 1035 private: 1036 /// The pass implementation, also holding per-scop data. 1037 std::unique_ptr<ForwardOpTreeImpl> Impl; 1038 1039 public: 1040 static char ID; 1041 1042 explicit ForwardOpTree() : ScopPass(ID) {} 1043 ForwardOpTree(const ForwardOpTree &) = delete; 1044 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 1045 1046 void getAnalysisUsage(AnalysisUsage &AU) const override { 1047 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1048 AU.addRequired<LoopInfoWrapperPass>(); 1049 AU.setPreservesAll(); 1050 } 1051 1052 bool runOnScop(Scop &S) override { 1053 // Free resources for previous SCoP's computation, if not yet done. 1054 releaseMemory(); 1055 1056 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1057 1058 { 1059 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 1060 Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 1061 1062 if (AnalyzeKnown) { 1063 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 1064 Impl->computeKnownValues(); 1065 } 1066 1067 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 1068 Impl->forwardOperandTrees(); 1069 1070 if (MaxOpGuard.hasQuotaExceeded()) { 1071 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 1072 "max_operations exceeded\n"); 1073 KnownOutOfQuota++; 1074 } 1075 } 1076 1077 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1078 LLVM_DEBUG(dbgs() << S); 1079 1080 // Update statistics 1081 auto ScopStats = S.getStatistics(); 1082 NumValueWrites += ScopStats.NumValueWrites; 1083 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1084 NumPHIWrites += ScopStats.NumPHIWrites; 1085 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1086 NumSingletonWrites += ScopStats.NumSingletonWrites; 1087 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1088 1089 return false; 1090 } 1091 1092 void printScop(raw_ostream &OS, Scop &S) const override { 1093 if (!Impl) 1094 return; 1095 1096 assert(Impl->getScop() == &S); 1097 Impl->print(OS); 1098 } 1099 1100 void releaseMemory() override { Impl.reset(); } 1101 }; // class ForwardOpTree 1102 1103 char ForwardOpTree::ID; 1104 } // namespace 1105 1106 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 1107 1108 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 1109 "Polly - Forward operand tree", false, false) 1110 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1111 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 1112 "Polly - Forward operand tree", false, false) 1113