1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Move instructions between statements. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/ForwardOpTree.h" 14 #include "polly/Options.h" 15 #include "polly/ScopBuilder.h" 16 #include "polly/ScopInfo.h" 17 #include "polly/ScopPass.h" 18 #include "polly/Support/GICHelper.h" 19 #include "polly/Support/ISLOStream.h" 20 #include "polly/Support/ISLTools.h" 21 #include "polly/Support/VirtualInstruction.h" 22 #include "polly/ZoneAlgo.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/LoopInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Value.h" 31 #include "llvm/InitializePasses.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "isl/ctx.h" 39 #include "isl/isl-noexceptions.h" 40 #include <cassert> 41 #include <memory> 42 43 #define DEBUG_TYPE "polly-optree" 44 45 using namespace llvm; 46 using namespace polly; 47 48 static cl::opt<bool> 49 AnalyzeKnown("polly-optree-analyze-known", 50 cl::desc("Analyze array contents for load forwarding"), 51 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 52 53 static cl::opt<bool> 54 NormalizePHIs("polly-optree-normalize-phi", 55 cl::desc("Replace PHIs by their incoming values"), 56 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 57 58 static cl::opt<unsigned> 59 MaxOps("polly-optree-max-ops", 60 cl::desc("Maximum number of ISL operations to invest for known " 61 "analysis; 0=no limit"), 62 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 63 64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 65 STATISTIC(KnownOutOfQuota, 66 "Analyses aborted because max_operations was reached"); 67 68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 69 STATISTIC(TotalKnownLoadsForwarded, 70 "Number of forwarded loads because their value was known"); 71 STATISTIC(TotalReloads, "Number of reloaded values"); 72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 74 STATISTIC(TotalModifiedStmts, 75 "Number of statements with at least one forwarded tree"); 76 77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 78 79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 80 STATISTIC(NumValueWritesInLoops, 81 "Number of scalar value writes nested in affine loops after OpTree"); 82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 83 STATISTIC(NumPHIWritesInLoops, 84 "Number of scalar phi writes nested in affine loops after OpTree"); 85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 86 STATISTIC(NumSingletonWritesInLoops, 87 "Number of singleton writes nested in affine loops after OpTree"); 88 89 namespace { 90 91 /// The state of whether an operand tree was/can be forwarded. 92 /// 93 /// The items apply to an instructions and its operand tree with the instruction 94 /// as the root element. If the value in question is not an instruction in the 95 /// SCoP, it can be a leaf of an instruction's operand tree. 96 enum ForwardingDecision { 97 /// An uninitialized value. 98 FD_Unknown, 99 100 /// The root instruction or value cannot be forwarded at all. 101 FD_CannotForward, 102 103 /// The root instruction or value can be forwarded as a leaf of a larger 104 /// operand tree. 105 /// It does not make sense to move the value itself, it would just replace it 106 /// by a use of itself. For instance, a constant "5" used in a statement can 107 /// be forwarded, but it would just replace it by the same constant "5". 108 /// However, it makes sense to move as an operand of 109 /// 110 /// %add = add 5, 5 111 /// 112 /// where "5" is moved as part of a larger operand tree. "5" would be placed 113 /// (disregarding for a moment that literal constants don't have a location 114 /// and can be used anywhere) into the same statement as %add would. 115 FD_CanForwardLeaf, 116 117 /// The root instruction can be forwarded and doing so avoids a scalar 118 /// dependency. 119 /// 120 /// This can be either because the operand tree can be moved to the target 121 /// statement, or a memory access is redirected to read from a different 122 /// location. 123 FD_CanForwardProfitably, 124 125 /// A forwarding method cannot be applied to the operand tree. 126 /// The difference to FD_CannotForward is that there might be other methods 127 /// that can handle it. 128 FD_NotApplicable 129 }; 130 131 /// Represents the evaluation of and action to taken when forwarding a value 132 /// from an operand tree. 133 struct ForwardingAction { 134 using KeyTy = std::pair<Value *, ScopStmt *>; 135 136 /// Evaluation of forwarding a value. 137 ForwardingDecision Decision = FD_Unknown; 138 139 /// Callback to execute the forwarding. 140 /// Returning true allows deleting the polly::MemoryAccess if the value is the 141 /// root of the operand tree (and its elimination the reason why the 142 /// forwarding is done). Return false if the MemoryAccess is reused or there 143 /// might be other users of the read accesses. In the letter case the 144 /// polly::SimplifyPass can remove dead MemoryAccesses. 145 std::function<bool()> Execute = []() -> bool { 146 llvm_unreachable("unspecified how to forward"); 147 }; 148 149 /// Other values that need to be forwarded if this action is executed. Their 150 /// actions are executed after this one. 151 SmallVector<KeyTy, 4> Depends; 152 153 /// Named ctor: The method creating this object does not apply to the kind of 154 /// value, but other methods may. 155 static ForwardingAction notApplicable() { 156 ForwardingAction Result; 157 Result.Decision = FD_NotApplicable; 158 return Result; 159 } 160 161 /// Named ctor: The value cannot be forwarded. 162 static ForwardingAction cannotForward() { 163 ForwardingAction Result; 164 Result.Decision = FD_CannotForward; 165 return Result; 166 } 167 168 /// Named ctor: The value can just be used without any preparation. 169 static ForwardingAction triviallyForwardable(bool IsProfitable) { 170 ForwardingAction Result; 171 Result.Decision = 172 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; 173 Result.Execute = []() { return true; }; 174 return Result; 175 } 176 177 /// Name ctor: The value can be forwarded by executing an action. 178 static ForwardingAction canForward(std::function<bool()> Execute, 179 ArrayRef<KeyTy> Depends, 180 bool IsProfitable) { 181 ForwardingAction Result; 182 Result.Decision = 183 IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; 184 Result.Execute = std::move(Execute); 185 Result.Depends.append(Depends.begin(), Depends.end()); 186 return Result; 187 } 188 }; 189 190 /// Implementation of operand tree forwarding for a specific SCoP. 191 /// 192 /// For a statement that requires a scalar value (through a value read 193 /// MemoryAccess), see if its operand can be moved into the statement. If so, 194 /// the MemoryAccess is removed and the all the operand tree instructions are 195 /// moved into the statement. All original instructions are left in the source 196 /// statements. The simplification pass can clean these up. 197 class ForwardOpTreeImpl : ZoneAlgorithm { 198 private: 199 using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>; 200 201 /// Scope guard to limit the number of isl operations for this pass. 202 IslMaxOperationsGuard &MaxOpGuard; 203 204 /// How many instructions have been copied to other statements. 205 int NumInstructionsCopied = 0; 206 207 /// Number of loads forwarded because their value was known. 208 int NumKnownLoadsForwarded = 0; 209 210 /// Number of values reloaded from known array elements. 211 int NumReloads = 0; 212 213 /// How many read-only accesses have been copied. 214 int NumReadOnlyCopied = 0; 215 216 /// How many operand trees have been forwarded. 217 int NumForwardedTrees = 0; 218 219 /// Number of statements with at least one forwarded operand tree. 220 int NumModifiedStmts = 0; 221 222 /// Whether we carried out at least one change to the SCoP. 223 bool Modified = false; 224 225 /// Cache of how to forward values. 226 /// The key of this map is the llvm::Value to be forwarded and the 227 /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value 228 /// can evaluate differently depending on where it is evaluate. For instance, 229 /// a synthesizable Scev represents a recurrence with an loop but the loop's 230 /// exit value if evaluated after the loop. 231 /// The cached results are only valid for the current TargetStmt. 232 /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the 233 /// exit value when instantiated outside of the loop. The primary concern is 234 /// ambiguity when crossing PHI nodes, which currently is not supported. 235 MemoizationTy ForwardingActions; 236 237 /// Contains the zones where array elements are known to contain a specific 238 /// value. 239 /// { [Element[] -> Zone[]] -> ValInst[] } 240 /// @see computeKnown() 241 isl::union_map Known; 242 243 /// Translator for newly introduced ValInsts to already existing ValInsts such 244 /// that new introduced load instructions can reuse the Known analysis of its 245 /// original load. { ValInst[] -> ValInst[] } 246 isl::union_map Translator; 247 248 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 249 /// 250 /// @param ValInst { Domain[] -> ValInst[] } 251 /// The values for which we search for alternative locations, 252 /// per statement instance. 253 /// 254 /// @return { Domain[] -> Element[] } 255 /// For each statement instance, the array elements that contain the 256 /// same ValInst. 257 isl::union_map findSameContentElements(isl::union_map ValInst) { 258 assert(!ValInst.is_single_valued().is_false()); 259 260 // { Domain[] } 261 isl::union_set Domain = ValInst.domain(); 262 263 // { Domain[] -> Scatter[] } 264 isl::union_map Schedule = getScatterFor(Domain); 265 266 // { Element[] -> [Scatter[] -> ValInst[]] } 267 isl::union_map MustKnownCurried = 268 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 269 270 // { [Domain[] -> ValInst[]] -> Scatter[] } 271 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 272 273 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 274 isl::union_map SchedValDomVal = 275 DomValSched.range_product(ValInst.range_map()).reverse(); 276 277 // { Element[] -> [Domain[] -> ValInst[]] } 278 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 279 280 // { Domain[] -> Element[] } 281 isl::union_map MustKnownMap = 282 MustKnownInst.uncurry().domain().unwrap().reverse(); 283 simplify(MustKnownMap); 284 285 return MustKnownMap; 286 } 287 288 /// Find a single array element for each statement instance, within a single 289 /// array. 290 /// 291 /// @param MustKnown { Domain[] -> Element[] } 292 /// Set of candidate array elements. 293 /// @param Domain { Domain[] } 294 /// The statement instance for which we need elements for. 295 /// 296 /// @return { Domain[] -> Element[] } 297 /// For each statement instance, an array element out of @p MustKnown. 298 /// All array elements must be in the same array (Polly does not yet 299 /// support reading from different accesses using the same 300 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 301 /// null. 302 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 303 // { Domain[] -> Element[] } 304 isl::map Result; 305 306 // MemoryAccesses can read only elements from a single array 307 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 308 // Look through all spaces until we find one that contains at least the 309 // wanted statement instance.s 310 for (isl::map Map : MustKnown.get_map_list()) { 311 // Get the array this is accessing. 312 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 313 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 314 315 // No support for generation of indirect array accesses. 316 if (SAI->getBasePtrOriginSAI()) 317 continue; 318 319 // Determine whether this map contains all wanted values. 320 isl::set MapDom = Map.domain(); 321 if (!Domain.is_subset(MapDom).is_true()) 322 continue; 323 324 // There might be multiple array elements that contain the same value, but 325 // choose only one of them. lexmin is used because it returns a one-value 326 // mapping, we do not care about which one. 327 // TODO: Get the simplest access function. 328 Result = Map.lexmin(); 329 break; 330 } 331 332 return Result; 333 } 334 335 public: 336 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 337 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 338 339 /// Compute the zones of known array element contents. 340 /// 341 /// @return True if the computed #Known is usable. 342 bool computeKnownValues() { 343 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 344 345 // Check that nothing strange occurs. 346 collectCompatibleElts(); 347 348 { 349 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 350 351 computeCommon(); 352 if (NormalizePHIs) 353 computeNormalizedPHIs(); 354 Known = computeKnown(true, true); 355 356 // Preexisting ValInsts use the known content analysis of themselves. 357 Translator = makeIdentityMap(Known.range(), false); 358 } 359 360 if (!Known || !Translator || !NormalizeMap) { 361 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 362 Known = nullptr; 363 Translator = nullptr; 364 NormalizeMap = nullptr; 365 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 366 return false; 367 } 368 369 KnownAnalyzed++; 370 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 371 372 return true; 373 } 374 375 void printStatistics(raw_ostream &OS, int Indent = 0) { 376 OS.indent(Indent) << "Statistics {\n"; 377 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 378 << '\n'; 379 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 380 << '\n'; 381 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 382 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 383 << '\n'; 384 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 385 << '\n'; 386 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 387 << NumModifiedStmts << '\n'; 388 OS.indent(Indent) << "}\n"; 389 } 390 391 void printStatements(raw_ostream &OS, int Indent = 0) const { 392 OS.indent(Indent) << "After statements {\n"; 393 for (auto &Stmt : *S) { 394 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 395 for (auto *MA : Stmt) 396 MA->print(OS); 397 398 OS.indent(Indent + 12); 399 Stmt.printInstructions(OS); 400 } 401 OS.indent(Indent) << "}\n"; 402 } 403 404 /// Create a new MemoryAccess of type read and MemoryKind::Array. 405 /// 406 /// @param Stmt The statement in which the access occurs. 407 /// @param LI The instruction that does the access. 408 /// @param AccessRelation The array element that each statement instance 409 /// accesses. 410 /// 411 /// @param The newly created access. 412 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 413 isl::map AccessRelation) { 414 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 415 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 416 417 // Create a dummy SCEV access, to be replaced anyway. 418 SmallVector<const SCEV *, 4> Sizes; 419 Sizes.reserve(SAI->getNumberOfDimensions()); 420 SmallVector<const SCEV *, 4> Subscripts; 421 Subscripts.reserve(SAI->getNumberOfDimensions()); 422 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 423 Sizes.push_back(SAI->getDimensionSize(i)); 424 Subscripts.push_back(nullptr); 425 } 426 427 MemoryAccess *Access = 428 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 429 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 430 S->addAccessFunction(Access); 431 Stmt->addAccess(Access, true); 432 433 Access->setNewAccessRelation(AccessRelation); 434 435 return Access; 436 } 437 438 /// Forward a load by reading from an array element that contains the same 439 /// value. Typically the location it was loaded from. 440 /// 441 /// @param TargetStmt The statement the operand tree will be copied to. 442 /// @param Inst The (possibly speculatable) instruction to forward. 443 /// @param UseStmt The statement that uses @p Inst. 444 /// @param UseLoop The loop @p Inst is used in. 445 /// @param DefStmt The statement @p Inst is defined in. 446 /// @param DefLoop The loop which contains @p Inst. 447 /// 448 /// @return A ForwardingAction object describing the feasibility and 449 /// profitability evaluation and the callback carrying-out the value 450 /// forwarding. 451 ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 452 ScopStmt *UseStmt, Loop *UseLoop, 453 ScopStmt *DefStmt, Loop *DefLoop) { 454 // Cannot do anything without successful known analysis. 455 if (Known.is_null() || Translator.is_null() || 456 MaxOpGuard.hasQuotaExceeded()) 457 return ForwardingAction::notApplicable(); 458 459 LoadInst *LI = dyn_cast<LoadInst>(Inst); 460 if (!LI) 461 return ForwardingAction::notApplicable(); 462 463 ForwardingDecision OpDecision = 464 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop); 465 switch (OpDecision) { 466 case FD_CanForwardProfitably: 467 case FD_CanForwardLeaf: 468 break; 469 case FD_CannotForward: 470 return ForwardingAction::cannotForward(); 471 default: 472 llvm_unreachable("Shouldn't return this"); 473 } 474 475 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 476 if (Access) { 477 // If the load is already in the statement, no forwarding is necessary. 478 // However, it might happen that the LoadInst is already present in the 479 // statement's instruction list. In that case we do as follows: 480 // - For the evaluation, we can trivially forward it as it is 481 // benefit of forwarding an already present instruction. 482 // - For the execution, prepend the instruction (to make it 483 // available to all instructions following in the instruction list), but 484 // do not add another MemoryAccess. 485 auto ExecAction = [this, TargetStmt, LI, Access]() -> bool { 486 TargetStmt->prependInstruction(LI); 487 LLVM_DEBUG( 488 dbgs() << " forwarded known load with preexisting MemoryAccess" 489 << Access << "\n"); 490 491 NumKnownLoadsForwarded++; 492 TotalKnownLoadsForwarded++; 493 return true; 494 }; 495 return ForwardingAction::canForward( 496 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); 497 } 498 499 // Allow the following Isl calculations (until we return the 500 // ForwardingAction, excluding the code inside the lambda that will be 501 // executed later) to fail. 502 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 503 504 // { DomainDef[] -> ValInst[] } 505 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 506 assert(!isNormalized(ExpectedVal).is_false() && 507 "LoadInsts are always normalized"); 508 509 // { DomainUse[] -> DomainTarget[] } 510 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 511 512 // { DomainTarget[] -> ValInst[] } 513 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 514 isl::union_map TranslatedExpectedVal = 515 isl::union_map(TargetExpectedVal).apply_range(Translator); 516 517 // { DomainTarget[] -> Element[] } 518 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 519 520 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 521 if (!SameVal) 522 return ForwardingAction::notApplicable(); 523 524 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 525 << "\n"); 526 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 527 << "\n"); 528 529 // { ValInst[] } 530 isl::space ValInstSpace = ExpectedVal.get_space().range(); 531 532 // After adding a new load to the SCoP, also update the Known content 533 // about it. The new load will have a known ValInst of 534 // { [DomainTarget[] -> Value[]] } 535 // but which -- because it is a copy of it -- has same value as the 536 // { [DomainDef[] -> Value[]] } 537 // that it replicates. Instead of cloning the known content of 538 // [DomainDef[] -> Value[]] 539 // for DomainTarget[], we add a 'translator' that maps 540 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 541 // before comparing to the known content. 542 // TODO: 'Translator' could also be used to map PHINodes to their incoming 543 // ValInsts. 544 isl::map LocalTranslator; 545 if (!ValInstSpace.is_wrapping().is_false()) { 546 // { DefDomain[] -> Value[] } 547 isl::map ValInsts = ExpectedVal.range().unwrap(); 548 549 // { DefDomain[] } 550 isl::set DefDomain = ValInsts.domain(); 551 552 // { Value[] } 553 isl::space ValSpace = ValInstSpace.unwrap().range(); 554 555 // { Value[] -> Value[] } 556 isl::map ValToVal = 557 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 558 559 // { DomainDef[] -> DomainTarget[] } 560 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); 561 562 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 563 LocalTranslator = DefToTarget.reverse().product(ValToVal); 564 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 565 << "\n"); 566 567 if (!LocalTranslator) 568 return ForwardingAction::notApplicable(); 569 } 570 571 auto ExecAction = [this, TargetStmt, LI, SameVal, 572 LocalTranslator]() -> bool { 573 TargetStmt->prependInstruction(LI); 574 MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 575 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 576 << Access << "\n"); 577 578 if (LocalTranslator) 579 Translator = Translator.add_map(LocalTranslator); 580 581 NumKnownLoadsForwarded++; 582 TotalKnownLoadsForwarded++; 583 return true; 584 }; 585 return ForwardingAction::canForward( 586 ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); 587 } 588 589 /// Forward a scalar by redirecting the access to an array element that stores 590 /// the same value. 591 /// 592 /// @param TargetStmt The statement the operand tree will be copied to. 593 /// @param Inst The scalar to forward. 594 /// @param UseStmt The statement that uses @p Inst. 595 /// @param UseLoop The loop @p Inst is used in. 596 /// @param DefStmt The statement @p Inst is defined in. 597 /// @param DefLoop The loop which contains @p Inst. 598 /// 599 /// @return A ForwardingAction object describing the feasibility and 600 /// profitability evaluation and the callback carrying-out the value 601 /// forwarding. 602 ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 603 ScopStmt *UseStmt, Loop *UseLoop, 604 ScopStmt *DefStmt, Loop *DefLoop) { 605 // Cannot do anything without successful known analysis. 606 if (Known.is_null() || Translator.is_null() || 607 MaxOpGuard.hasQuotaExceeded()) 608 return ForwardingAction::notApplicable(); 609 610 // Don't spend too much time analyzing whether it can be reloaded. 611 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 612 613 // { DomainDef[] -> ValInst[] } 614 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 615 616 // { DomainUse[] -> DomainTarget[] } 617 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 618 619 // { DomainTarget[] -> ValInst[] } 620 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 621 isl::union_map TranslatedExpectedVal = 622 TargetExpectedVal.apply_range(Translator); 623 624 // { DomainTarget[] -> Element[] } 625 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 626 627 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 628 simplify(SameVal); 629 if (!SameVal) 630 return ForwardingAction::notApplicable(); 631 632 auto ExecAction = [this, TargetStmt, Inst, SameVal]() { 633 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 634 if (!Access) 635 Access = TargetStmt->ensureValueRead(Inst); 636 Access->setNewAccessRelation(SameVal); 637 638 TotalReloads++; 639 NumReloads++; 640 return false; 641 }; 642 643 return ForwardingAction::canForward(ExecAction, {}, true); 644 } 645 646 /// Forwards a speculatively executable instruction. 647 /// 648 /// @param TargetStmt The statement the operand tree will be copied to. 649 /// @param UseInst The (possibly speculatable) instruction to forward. 650 /// @param DefStmt The statement @p UseInst is defined in. 651 /// @param DefLoop The loop which contains @p UseInst. 652 /// 653 /// @return A ForwardingAction object describing the feasibility and 654 /// profitability evaluation and the callback carrying-out the value 655 /// forwarding. 656 ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt, 657 Instruction *UseInst, ScopStmt *DefStmt, 658 Loop *DefLoop) { 659 // PHIs, unless synthesizable, are not yet supported. 660 if (isa<PHINode>(UseInst)) 661 return ForwardingAction::notApplicable(); 662 663 // Compatible instructions must satisfy the following conditions: 664 // 1. Idempotent (instruction will be copied, not moved; although its 665 // original instance might be removed by simplification) 666 // 2. Not access memory (There might be memory writes between) 667 // 3. Not cause undefined behaviour (we might copy to a location when the 668 // original instruction was no executed; this is currently not possible 669 // because we do not forward PHINodes) 670 // 4. Not leak memory if executed multiple times (i.e. malloc) 671 // 672 // Instruction::mayHaveSideEffects is not sufficient because it considers 673 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 674 // not sufficient because it allows memory accesses. 675 if (mayBeMemoryDependent(*UseInst)) 676 return ForwardingAction::notApplicable(); 677 678 SmallVector<ForwardingAction::KeyTy, 4> Depends; 679 Depends.reserve(UseInst->getNumOperands()); 680 for (Value *OpVal : UseInst->operand_values()) { 681 ForwardingDecision OpDecision = 682 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop); 683 switch (OpDecision) { 684 case FD_CannotForward: 685 return ForwardingAction::cannotForward(); 686 687 case FD_CanForwardLeaf: 688 case FD_CanForwardProfitably: 689 Depends.emplace_back(OpVal, DefStmt); 690 break; 691 692 case FD_NotApplicable: 693 case FD_Unknown: 694 llvm_unreachable( 695 "forwardTree should never return FD_NotApplicable/FD_Unknown"); 696 } 697 } 698 699 auto ExecAction = [this, TargetStmt, UseInst]() { 700 // To ensure the right order, prepend this instruction before its 701 // operands. This ensures that its operands are inserted before the 702 // instruction using them. 703 TargetStmt->prependInstruction(UseInst); 704 NumInstructionsCopied++; 705 TotalInstructionsCopied++; 706 return true; 707 }; 708 return ForwardingAction::canForward(ExecAction, Depends, true); 709 } 710 711 /// Determines whether an operand tree can be forwarded and returns 712 /// instructions how to do so in the form of a ForwardingAction object. 713 /// 714 /// @param TargetStmt The statement the operand tree will be copied to. 715 /// @param UseVal The value (usually an instruction) which is root of an 716 /// operand tree. 717 /// @param UseStmt The statement that uses @p UseVal. 718 /// @param UseLoop The loop @p UseVal is used in. 719 /// 720 /// @return A ForwardingAction object describing the feasibility and 721 /// profitability evaluation and the callback carrying-out the value 722 /// forwarding. 723 ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal, 724 ScopStmt *UseStmt, Loop *UseLoop) { 725 ScopStmt *DefStmt = nullptr; 726 Loop *DefLoop = nullptr; 727 728 // { DefDomain[] -> TargetDomain[] } 729 isl::map DefToTarget; 730 731 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 732 switch (VUse.getKind()) { 733 case VirtualUse::Constant: 734 case VirtualUse::Block: 735 case VirtualUse::Hoisted: 736 // These can be used anywhere without special considerations. 737 return ForwardingAction::triviallyForwardable(false); 738 739 case VirtualUse::Synthesizable: { 740 // Check if the value is synthesizable at the new location as well. This 741 // might be possible when leaving a loop for which ScalarEvolution is 742 // unable to derive the exit value for. 743 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 744 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 745 // do not forward past its loop header. This would require us to use a 746 // previous loop induction variable instead the current one. We currently 747 // do not allow forwarding PHI nodes, thus this should never occur (the 748 // only exception where no phi is necessary being an unreachable loop 749 // without edge from the outside). 750 VirtualUse TargetUse = VirtualUse::create( 751 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 752 if (TargetUse.getKind() == VirtualUse::Synthesizable) 753 return ForwardingAction::triviallyForwardable(false); 754 755 LLVM_DEBUG( 756 dbgs() << " Synthesizable would not be synthesizable anymore: " 757 << *UseVal << "\n"); 758 return ForwardingAction::cannotForward(); 759 } 760 761 case VirtualUse::ReadOnly: { 762 // If we model read-only scalars, we need to create a MemoryAccess for it. 763 auto ExecAction = [this, TargetStmt, UseVal]() { 764 if (ModelReadOnlyScalars) 765 TargetStmt->ensureValueRead(UseVal); 766 767 NumReadOnlyCopied++; 768 TotalReadOnlyCopied++; 769 770 // Note that we cannot return true here. With a operand tree 771 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 772 // With -polly-analyze-read-only-scalars=true we would ensure the 773 // existence of a MemoryAccess (which already exists for a leaf) and be 774 // removed again by tryForwardTree because it's goal is to remove this 775 // scalar MemoryAccess. It interprets FD_CanForwardTree as the 776 // permission to do so. 777 return false; 778 }; 779 return ForwardingAction::canForward(ExecAction, {}, false); 780 } 781 782 case VirtualUse::Intra: 783 // Knowing that UseStmt and DefStmt are the same statement instance, just 784 // reuse the information about UseStmt for DefStmt 785 DefStmt = UseStmt; 786 787 LLVM_FALLTHROUGH; 788 case VirtualUse::Inter: 789 Instruction *Inst = cast<Instruction>(UseVal); 790 791 if (!DefStmt) { 792 DefStmt = S->getStmtFor(Inst); 793 if (!DefStmt) 794 return ForwardingAction::cannotForward(); 795 } 796 797 DefLoop = LI->getLoopFor(Inst->getParent()); 798 799 ForwardingAction SpeculativeResult = 800 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop); 801 if (SpeculativeResult.Decision != FD_NotApplicable) 802 return SpeculativeResult; 803 804 ForwardingAction KnownResult = forwardKnownLoad( 805 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); 806 if (KnownResult.Decision != FD_NotApplicable) 807 return KnownResult; 808 809 ForwardingAction ReloadResult = reloadKnownContent( 810 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); 811 if (ReloadResult.Decision != FD_NotApplicable) 812 return ReloadResult; 813 814 // When no method is found to forward the operand tree, we effectively 815 // cannot handle it. 816 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 817 return ForwardingAction::cannotForward(); 818 } 819 820 llvm_unreachable("Case unhandled"); 821 } 822 823 /// Determines whether an operand tree can be forwarded. Previous evaluations 824 /// are cached. 825 /// 826 /// @param TargetStmt The statement the operand tree will be copied to. 827 /// @param UseVal The value (usually an instruction) which is root of an 828 /// operand tree. 829 /// @param UseStmt The statement that uses @p UseVal. 830 /// @param UseLoop The loop @p UseVal is used in. 831 /// 832 /// @return FD_CannotForward if @p UseVal cannot be forwarded. 833 /// FD_CanForwardLeaf if @p UseVal is forwardable, but not 834 /// profitable. 835 /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to 836 /// do. 837 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 838 ScopStmt *UseStmt, Loop *UseLoop) { 839 // Lookup any cached evaluation. 840 auto It = ForwardingActions.find({UseVal, UseStmt}); 841 if (It != ForwardingActions.end()) 842 return It->second.Decision; 843 844 // Make a new evaluation. 845 ForwardingAction Action = 846 forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop); 847 ForwardingDecision Result = Action.Decision; 848 849 // Remember for the next time. 850 assert(!ForwardingActions.count({UseVal, UseStmt}) && 851 "circular dependency?"); 852 ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)}); 853 854 return Result; 855 } 856 857 /// Forward an operand tree using cached actions. 858 /// 859 /// @param Stmt Statement the operand tree is moved into. 860 /// @param UseVal Root of the operand tree within @p Stmt. 861 /// @param RA The MemoryAccess for @p UseVal that the forwarding intends 862 /// to remove. 863 void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) { 864 using ChildItTy = 865 decltype(std::declval<ForwardingAction>().Depends.begin()); 866 using EdgeTy = std::pair<ForwardingAction *, ChildItTy>; 867 868 DenseSet<ForwardingAction::KeyTy> Visited; 869 SmallVector<EdgeTy, 32> Stack; 870 SmallVector<ForwardingAction *, 32> Ordered; 871 872 // Seed the tree search using the root value. 873 assert(ForwardingActions.count({UseVal, Stmt})); 874 ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}]; 875 Stack.emplace_back(RootAction, RootAction->Depends.begin()); 876 877 // Compute the postorder of the operand tree: all operands of an instruction 878 // must be visited before the instruction itself. As an additional 879 // requirement, the topological ordering must be 'compact': Any subtree node 880 // must not be interleaved with nodes from a non-shared subtree. This is 881 // because the same llvm::Instruction can be materialized multiple times as 882 // used at different ScopStmts which might be different values. Intersecting 883 // these lifetimes may result in miscompilations. 884 // FIXME: Intersecting lifetimes might still be possible for the roots 885 // themselves, since instructions are just prepended to a ScopStmt's 886 // instruction list. 887 while (!Stack.empty()) { 888 EdgeTy &Top = Stack.back(); 889 ForwardingAction *TopAction = Top.first; 890 ChildItTy &TopEdge = Top.second; 891 892 if (TopEdge == TopAction->Depends.end()) { 893 // Postorder sorting 894 Ordered.push_back(TopAction); 895 Stack.pop_back(); 896 continue; 897 } 898 ForwardingAction::KeyTy Key = *TopEdge; 899 900 // Next edge for this level 901 ++TopEdge; 902 903 auto VisitIt = Visited.insert(Key); 904 if (!VisitIt.second) 905 continue; 906 907 assert(ForwardingActions.count(Key) && 908 "Must not insert new actions during execution phase"); 909 ForwardingAction *ChildAction = &ForwardingActions[Key]; 910 Stack.emplace_back(ChildAction, ChildAction->Depends.begin()); 911 } 912 913 // Actually, we need the reverse postorder because actions prepend new 914 // instructions. Therefore, the first one will always be the action for the 915 // operand tree's root. 916 assert(Ordered.back() == RootAction); 917 if (RootAction->Execute()) 918 Stmt->removeSingleMemoryAccess(RA); 919 Ordered.pop_back(); 920 for (auto DepAction : reverse(Ordered)) { 921 assert(DepAction->Decision != FD_Unknown && 922 DepAction->Decision != FD_CannotForward); 923 assert(DepAction != RootAction); 924 DepAction->Execute(); 925 } 926 } 927 928 /// Try to forward an operand tree rooted in @p RA. 929 bool tryForwardTree(MemoryAccess *RA) { 930 assert(RA->isLatestScalarKind()); 931 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 932 933 ScopStmt *Stmt = RA->getStatement(); 934 Loop *InLoop = Stmt->getSurroundingLoop(); 935 936 isl::map TargetToUse; 937 if (!Known.is_null()) { 938 isl::space DomSpace = Stmt->getDomainSpace(); 939 TargetToUse = 940 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 941 } 942 943 ForwardingDecision Assessment = 944 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop); 945 946 // If considered feasible and profitable, forward it. 947 bool Changed = false; 948 if (Assessment == FD_CanForwardProfitably) { 949 applyForwardingActions(Stmt, RA->getAccessValue(), RA); 950 Changed = true; 951 } 952 953 ForwardingActions.clear(); 954 return Changed; 955 } 956 957 /// Return which SCoP this instance is processing. 958 Scop *getScop() const { return S; } 959 960 /// Run the algorithm: Use value read accesses as operand tree roots and try 961 /// to forward them into the statement. 962 bool forwardOperandTrees() { 963 for (ScopStmt &Stmt : *S) { 964 bool StmtModified = false; 965 966 // Because we are modifying the MemoryAccess list, collect them first to 967 // avoid iterator invalidation. 968 SmallVector<MemoryAccess *, 16> Accs; 969 for (MemoryAccess *RA : Stmt) { 970 if (!RA->isRead()) 971 continue; 972 if (!RA->isLatestScalarKind()) 973 continue; 974 975 Accs.push_back(RA); 976 } 977 978 for (MemoryAccess *RA : Accs) { 979 if (tryForwardTree(RA)) { 980 Modified = true; 981 StmtModified = true; 982 NumForwardedTrees++; 983 TotalForwardedTrees++; 984 } 985 } 986 987 if (StmtModified) { 988 NumModifiedStmts++; 989 TotalModifiedStmts++; 990 } 991 } 992 993 if (Modified) { 994 ScopsModified++; 995 S->realignParams(); 996 } 997 return Modified; 998 } 999 1000 /// Print the pass result, performed transformations and the SCoP after the 1001 /// transformation. 1002 void print(raw_ostream &OS, int Indent = 0) { 1003 printStatistics(OS, Indent); 1004 1005 if (!Modified) { 1006 // This line can easily be checked in regression tests. 1007 OS << "ForwardOpTree executed, but did not modify anything\n"; 1008 return; 1009 } 1010 1011 printStatements(OS, Indent); 1012 } 1013 }; 1014 1015 /// Pass that redirects scalar reads to array elements that are known to contain 1016 /// the same value. 1017 /// 1018 /// This reduces the number of scalar accesses and therefore potentially 1019 /// increases the freedom of the scheduler. In the ideal case, all reads of a 1020 /// scalar definition are redirected (We currently do not care about removing 1021 /// the write in this case). This is also useful for the main DeLICM pass as 1022 /// there are less scalars to be mapped. 1023 class ForwardOpTree : public ScopPass { 1024 private: 1025 /// The pass implementation, also holding per-scop data. 1026 std::unique_ptr<ForwardOpTreeImpl> Impl; 1027 1028 public: 1029 static char ID; 1030 1031 explicit ForwardOpTree() : ScopPass(ID) {} 1032 ForwardOpTree(const ForwardOpTree &) = delete; 1033 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 1034 1035 void getAnalysisUsage(AnalysisUsage &AU) const override { 1036 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1037 AU.addRequired<LoopInfoWrapperPass>(); 1038 AU.setPreservesAll(); 1039 } 1040 1041 bool runOnScop(Scop &S) override { 1042 // Free resources for previous SCoP's computation, if not yet done. 1043 releaseMemory(); 1044 1045 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1046 1047 { 1048 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 1049 Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 1050 1051 if (AnalyzeKnown) { 1052 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 1053 Impl->computeKnownValues(); 1054 } 1055 1056 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 1057 Impl->forwardOperandTrees(); 1058 1059 if (MaxOpGuard.hasQuotaExceeded()) { 1060 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 1061 "max_operations exceeded\n"); 1062 KnownOutOfQuota++; 1063 } 1064 } 1065 1066 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1067 LLVM_DEBUG(dbgs() << S); 1068 1069 // Update statistics 1070 auto ScopStats = S.getStatistics(); 1071 NumValueWrites += ScopStats.NumValueWrites; 1072 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1073 NumPHIWrites += ScopStats.NumPHIWrites; 1074 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1075 NumSingletonWrites += ScopStats.NumSingletonWrites; 1076 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1077 1078 return false; 1079 } 1080 1081 void printScop(raw_ostream &OS, Scop &S) const override { 1082 if (!Impl) 1083 return; 1084 1085 assert(Impl->getScop() == &S); 1086 Impl->print(OS); 1087 } 1088 1089 void releaseMemory() override { Impl.reset(); } 1090 }; // class ForwardOpTree 1091 1092 char ForwardOpTree::ID; 1093 } // namespace 1094 1095 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 1096 1097 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 1098 "Polly - Forward operand tree", false, false) 1099 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1100 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 1101 "Polly - Forward operand tree", false, false) 1102