1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Move instructions between statements. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/ForwardOpTree.h" 14 #include "polly/Options.h" 15 #include "polly/ScopBuilder.h" 16 #include "polly/ScopInfo.h" 17 #include "polly/ScopPass.h" 18 #include "polly/Support/GICHelper.h" 19 #include "polly/Support/ISLOStream.h" 20 #include "polly/Support/ISLTools.h" 21 #include "polly/Support/VirtualInstruction.h" 22 #include "polly/ZoneAlgo.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/LoopInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Value.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Compiler.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "isl/ctx.h" 38 #include "isl/isl-noexceptions.h" 39 #include <cassert> 40 #include <memory> 41 42 #define DEBUG_TYPE "polly-optree" 43 44 using namespace llvm; 45 using namespace polly; 46 47 static cl::opt<bool> 48 AnalyzeKnown("polly-optree-analyze-known", 49 cl::desc("Analyze array contents for load forwarding"), 50 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 51 52 static cl::opt<bool> 53 NormalizePHIs("polly-optree-normalize-phi", 54 cl::desc("Replace PHIs by their incoming values"), 55 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 56 57 static cl::opt<unsigned> 58 MaxOps("polly-optree-max-ops", 59 cl::desc("Maximum number of ISL operations to invest for known " 60 "analysis; 0=no limit"), 61 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 62 63 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 64 STATISTIC(KnownOutOfQuota, 65 "Analyses aborted because max_operations was reached"); 66 67 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 68 STATISTIC(TotalKnownLoadsForwarded, 69 "Number of forwarded loads because their value was known"); 70 STATISTIC(TotalReloads, "Number of reloaded values"); 71 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 72 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 73 STATISTIC(TotalModifiedStmts, 74 "Number of statements with at least one forwarded tree"); 75 76 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 77 78 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 79 STATISTIC(NumValueWritesInLoops, 80 "Number of scalar value writes nested in affine loops after OpTree"); 81 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 82 STATISTIC(NumPHIWritesInLoops, 83 "Number of scalar phi writes nested in affine loops after OpTree"); 84 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 85 STATISTIC(NumSingletonWritesInLoops, 86 "Number of singleton writes nested in affine loops after OpTree"); 87 88 namespace { 89 90 /// The state of whether an operand tree was/can be forwarded. 91 /// 92 /// The items apply to an instructions and its operand tree with the instruction 93 /// as the root element. If the value in question is not an instruction in the 94 /// SCoP, it can be a leaf of an instruction's operand tree. 95 enum ForwardingDecision { 96 /// The root instruction or value cannot be forwarded at all. 97 FD_CannotForward, 98 99 /// The root instruction or value can be forwarded as a leaf of a larger 100 /// operand tree. 101 /// It does not make sense to move the value itself, it would just replace it 102 /// by a use of itself. For instance, a constant "5" used in a statement can 103 /// be forwarded, but it would just replace it by the same constant "5". 104 /// However, it makes sense to move as an operand of 105 /// 106 /// %add = add 5, 5 107 /// 108 /// where "5" is moved as part of a larger operand tree. "5" would be placed 109 /// (disregarding for a moment that literal constants don't have a location 110 /// and can be used anywhere) into the same statement as %add would. 111 FD_CanForwardLeaf, 112 113 /// The root instruction can be forwarded and doing so avoids a scalar 114 /// dependency. 115 /// 116 /// This can be either because the operand tree can be moved to the target 117 /// statement, or a memory access is redirected to read from a different 118 /// location. 119 FD_CanForwardProfitably, 120 121 /// Used to indicate that a forwarding has be carried out successfully, and 122 /// the forwarded memory access can be deleted. 123 FD_DidForwardTree, 124 125 /// Used to indicate that a forwarding has be carried out successfully, and 126 /// the forwarded memory access is being reused. 127 FD_DidForwardLeaf, 128 129 /// A forwarding method cannot be applied to the operand tree. 130 /// The difference to FD_CannotForward is that there might be other methods 131 /// that can handle it. 132 /// The conditions that make an operand tree applicable must be checked even 133 /// with DoIt==true because a method following the one that returned 134 /// FD_NotApplicable might have returned FD_CanForwardTree. 135 FD_NotApplicable 136 }; 137 138 /// Implementation of operand tree forwarding for a specific SCoP. 139 /// 140 /// For a statement that requires a scalar value (through a value read 141 /// MemoryAccess), see if its operand can be moved into the statement. If so, 142 /// the MemoryAccess is removed and the all the operand tree instructions are 143 /// moved into the statement. All original instructions are left in the source 144 /// statements. The simplification pass can clean these up. 145 class ForwardOpTreeImpl : ZoneAlgorithm { 146 private: 147 /// Scope guard to limit the number of isl operations for this pass. 148 IslMaxOperationsGuard &MaxOpGuard; 149 150 /// How many instructions have been copied to other statements. 151 int NumInstructionsCopied = 0; 152 153 /// Number of loads forwarded because their value was known. 154 int NumKnownLoadsForwarded = 0; 155 156 /// Number of values reloaded from known array elements. 157 int NumReloads = 0; 158 159 /// How many read-only accesses have been copied. 160 int NumReadOnlyCopied = 0; 161 162 /// How many operand trees have been forwarded. 163 int NumForwardedTrees = 0; 164 165 /// Number of statements with at least one forwarded operand tree. 166 int NumModifiedStmts = 0; 167 168 /// Whether we carried out at least one change to the SCoP. 169 bool Modified = false; 170 171 /// Contains the zones where array elements are known to contain a specific 172 /// value. 173 /// { [Element[] -> Zone[]] -> ValInst[] } 174 /// @see computeKnown() 175 isl::union_map Known; 176 177 /// Translator for newly introduced ValInsts to already existing ValInsts such 178 /// that new introduced load instructions can reuse the Known analysis of its 179 /// original load. { ValInst[] -> ValInst[] } 180 isl::union_map Translator; 181 182 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 183 /// 184 /// @param ValInst { Domain[] -> ValInst[] } 185 /// The values for which we search for alternative locations, 186 /// per statement instance. 187 /// 188 /// @return { Domain[] -> Element[] } 189 /// For each statement instance, the array elements that contain the 190 /// same ValInst. 191 isl::union_map findSameContentElements(isl::union_map ValInst) { 192 assert(!ValInst.is_single_valued().is_false()); 193 194 // { Domain[] } 195 isl::union_set Domain = ValInst.domain(); 196 197 // { Domain[] -> Scatter[] } 198 isl::union_map Schedule = getScatterFor(Domain); 199 200 // { Element[] -> [Scatter[] -> ValInst[]] } 201 isl::union_map MustKnownCurried = 202 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 203 204 // { [Domain[] -> ValInst[]] -> Scatter[] } 205 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 206 207 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 208 isl::union_map SchedValDomVal = 209 DomValSched.range_product(ValInst.range_map()).reverse(); 210 211 // { Element[] -> [Domain[] -> ValInst[]] } 212 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 213 214 // { Domain[] -> Element[] } 215 isl::union_map MustKnownMap = 216 MustKnownInst.uncurry().domain().unwrap().reverse(); 217 simplify(MustKnownMap); 218 219 return MustKnownMap; 220 } 221 222 /// Find a single array element for each statement instance, within a single 223 /// array. 224 /// 225 /// @param MustKnown { Domain[] -> Element[] } 226 /// Set of candidate array elements. 227 /// @param Domain { Domain[] } 228 /// The statement instance for which we need elements for. 229 /// 230 /// @return { Domain[] -> Element[] } 231 /// For each statement instance, an array element out of @p MustKnown. 232 /// All array elements must be in the same array (Polly does not yet 233 /// support reading from different accesses using the same 234 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 235 /// null. 236 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 237 // { Domain[] -> Element[] } 238 isl::map Result; 239 240 // MemoryAccesses can read only elements from a single array 241 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 242 // Look through all spaces until we find one that contains at least the 243 // wanted statement instance.s 244 for (isl::map Map : MustKnown.get_map_list()) { 245 // Get the array this is accessing. 246 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 247 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 248 249 // No support for generation of indirect array accesses. 250 if (SAI->getBasePtrOriginSAI()) 251 continue; 252 253 // Determine whether this map contains all wanted values. 254 isl::set MapDom = Map.domain(); 255 if (!Domain.is_subset(MapDom).is_true()) 256 continue; 257 258 // There might be multiple array elements that contain the same value, but 259 // choose only one of them. lexmin is used because it returns a one-value 260 // mapping, we do not care about which one. 261 // TODO: Get the simplest access function. 262 Result = Map.lexmin(); 263 break; 264 } 265 266 return Result; 267 } 268 269 public: 270 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 271 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 272 273 /// Compute the zones of known array element contents. 274 /// 275 /// @return True if the computed #Known is usable. 276 bool computeKnownValues() { 277 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 278 279 // Check that nothing strange occurs. 280 collectCompatibleElts(); 281 282 { 283 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 284 285 computeCommon(); 286 if (NormalizePHIs) 287 computeNormalizedPHIs(); 288 Known = computeKnown(true, true); 289 290 // Preexisting ValInsts use the known content analysis of themselves. 291 Translator = makeIdentityMap(Known.range(), false); 292 } 293 294 if (!Known || !Translator || !NormalizeMap) { 295 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 296 Known = nullptr; 297 Translator = nullptr; 298 NormalizeMap = nullptr; 299 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 300 return false; 301 } 302 303 KnownAnalyzed++; 304 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 305 306 return true; 307 } 308 309 void printStatistics(raw_ostream &OS, int Indent = 0) { 310 OS.indent(Indent) << "Statistics {\n"; 311 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 312 << '\n'; 313 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 314 << '\n'; 315 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 316 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 317 << '\n'; 318 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 319 << '\n'; 320 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 321 << NumModifiedStmts << '\n'; 322 OS.indent(Indent) << "}\n"; 323 } 324 325 void printStatements(raw_ostream &OS, int Indent = 0) const { 326 OS.indent(Indent) << "After statements {\n"; 327 for (auto &Stmt : *S) { 328 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 329 for (auto *MA : Stmt) 330 MA->print(OS); 331 332 OS.indent(Indent + 12); 333 Stmt.printInstructions(OS); 334 } 335 OS.indent(Indent) << "}\n"; 336 } 337 338 /// Create a new MemoryAccess of type read and MemoryKind::Array. 339 /// 340 /// @param Stmt The statement in which the access occurs. 341 /// @param LI The instruction that does the access. 342 /// @param AccessRelation The array element that each statement instance 343 /// accesses. 344 /// 345 /// @param The newly created access. 346 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 347 isl::map AccessRelation) { 348 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 349 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 350 351 // Create a dummy SCEV access, to be replaced anyway. 352 SmallVector<const SCEV *, 4> Sizes; 353 Sizes.reserve(SAI->getNumberOfDimensions()); 354 SmallVector<const SCEV *, 4> Subscripts; 355 Subscripts.reserve(SAI->getNumberOfDimensions()); 356 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 357 Sizes.push_back(SAI->getDimensionSize(i)); 358 Subscripts.push_back(nullptr); 359 } 360 361 MemoryAccess *Access = 362 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 363 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 364 S->addAccessFunction(Access); 365 Stmt->addAccess(Access, true); 366 367 Access->setNewAccessRelation(AccessRelation); 368 369 return Access; 370 } 371 372 /// Forward a load by reading from an array element that contains the same 373 /// value. Typically the location it was loaded from. 374 /// 375 /// @param TargetStmt The statement the operand tree will be copied to. 376 /// @param Inst The (possibly speculatable) instruction to forward. 377 /// @param UseStmt The statement that uses @p Inst. 378 /// @param UseLoop The loop @p Inst is used in. 379 /// @param DefStmt The statement @p Inst is defined in. 380 /// @param DefLoop The loop which contains @p Inst. 381 /// @param DoIt If false, only determine whether an operand tree can be 382 /// forwarded. If true, carry out the forwarding. Do not 383 /// use DoIt==true if an operand tree is not known to be 384 /// forwardable. 385 /// 386 /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new 387 /// load. 388 /// FD_CannotForward if the pointer operand cannot be forwarded. 389 /// FD_CanForwardProfitably if @p Inst is forwardable. 390 /// FD_DidForwardTree if @p DoIt was true. 391 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 392 ScopStmt *UseStmt, Loop *UseLoop, 393 ScopStmt *DefStmt, Loop *DefLoop, 394 bool DoIt) { 395 // Cannot do anything without successful known analysis. 396 if (Known.is_null() || Translator.is_null() || 397 MaxOpGuard.hasQuotaExceeded()) 398 return FD_NotApplicable; 399 400 LoadInst *LI = dyn_cast<LoadInst>(Inst); 401 if (!LI) 402 return FD_NotApplicable; 403 404 // If the load is already in the statement, no forwarding is necessary. 405 // However, it might happen that the LoadInst is already present in the 406 // statement's instruction list. In that case we do as follows: 407 // - For the evaluation (DoIt==false), we can trivially forward it as it is 408 // benefit of forwarding an already present instruction. 409 // - For the execution (DoIt==true), prepend the instruction (to make it 410 // available to all instructions following in the instruction list), but 411 // do not add another MemoryAccess. 412 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 413 if (Access && !DoIt) 414 return FD_CanForwardProfitably; 415 416 ForwardingDecision OpDecision = forwardTree( 417 TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt); 418 switch (OpDecision) { 419 case FD_CannotForward: 420 assert(!DoIt); 421 return OpDecision; 422 423 case FD_CanForwardLeaf: 424 case FD_CanForwardProfitably: 425 assert(!DoIt); 426 break; 427 428 case FD_DidForwardLeaf: 429 case FD_DidForwardTree: 430 assert(DoIt); 431 break; 432 433 default: 434 llvm_unreachable("Shouldn't return this"); 435 } 436 437 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 438 439 // { DomainDef[] -> ValInst[] } 440 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 441 assert(!isNormalized(ExpectedVal).is_false() && 442 "LoadInsts are always normalized"); 443 444 // { DomainUse[] -> DomainTarget[] } 445 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 446 447 // { DomainTarget[] -> ValInst[] } 448 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 449 isl::union_map TranslatedExpectedVal = 450 isl::union_map(TargetExpectedVal).apply_range(Translator); 451 452 // { DomainTarget[] -> Element[] } 453 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 454 455 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 456 if (!SameVal) 457 return FD_NotApplicable; 458 459 if (DoIt) 460 TargetStmt->prependInstruction(LI); 461 462 if (!DoIt) 463 return FD_CanForwardProfitably; 464 465 if (Access) { 466 LLVM_DEBUG( 467 dbgs() << " forwarded known load with preexisting MemoryAccess" 468 << Access << "\n"); 469 } else { 470 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 471 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 472 << Access << "\n"); 473 474 // { ValInst[] } 475 isl::space ValInstSpace = ExpectedVal.get_space().range(); 476 477 // After adding a new load to the SCoP, also update the Known content 478 // about it. The new load will have a known ValInst of 479 // { [DomainTarget[] -> Value[]] } 480 // but which -- because it is a copy of it -- has same value as the 481 // { [DomainDef[] -> Value[]] } 482 // that it replicates. Instead of cloning the known content of 483 // [DomainDef[] -> Value[]] 484 // for DomainTarget[], we add a 'translator' that maps 485 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 486 // before comparing to the known content. 487 // TODO: 'Translator' could also be used to map PHINodes to their incoming 488 // ValInsts. 489 if (ValInstSpace.is_wrapping()) { 490 // { DefDomain[] -> Value[] } 491 isl::map ValInsts = ExpectedVal.range().unwrap(); 492 493 // { DefDomain[] } 494 isl::set DefDomain = ValInsts.domain(); 495 496 // { Value[] } 497 isl::space ValSpace = ValInstSpace.unwrap().range(); 498 499 // { Value[] -> Value[] } 500 isl::map ValToVal = 501 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 502 503 // { DomainDef[] -> DomainTarget[] } 504 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); 505 506 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 507 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 508 509 Translator = Translator.add_map(LocalTranslator); 510 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 511 << "\n"); 512 } 513 } 514 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 515 << "\n"); 516 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 517 << "\n"); 518 assert(Access); 519 520 NumKnownLoadsForwarded++; 521 TotalKnownLoadsForwarded++; 522 return FD_DidForwardTree; 523 } 524 525 /// Forward a scalar by redirecting the access to an array element that stores 526 /// the same value. 527 /// 528 /// @param TargetStmt The statement the operand tree will be copied to. 529 /// @param Inst The scalar to forward. 530 /// @param UseStmt The statement that uses @p Inst. 531 /// @param UseLoop The loop @p Inst is used in. 532 /// @param DefStmt The statement @p Inst is defined in. 533 /// @param DefLoop The loop which contains @p Inst. 534 /// @param DoIt If false, only determine whether an operand tree can be 535 /// forwarded. If true, carry out the forwarding. Do not 536 /// use DoIt==true if an operand tree is not known to be 537 /// forwardable. 538 /// 539 /// @return FD_NotApplicable if @p Inst cannot be reloaded. 540 /// FD_CanForwardLeaf if @p Inst can be reloaded. 541 /// FD_CanForwardProfitably if @p Inst has been reloaded. 542 /// FD_DidForwardLeaf if @p DoIt was true. 543 ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 544 ScopStmt *UseStmt, Loop *UseLoop, 545 ScopStmt *DefStmt, Loop *DefLoop, 546 bool DoIt) { 547 // Cannot do anything without successful known analysis. 548 if (Known.is_null() || Translator.is_null() || 549 MaxOpGuard.hasQuotaExceeded()) 550 return FD_NotApplicable; 551 552 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 553 if (Access && Access->isLatestArrayKind()) { 554 if (DoIt) 555 return FD_DidForwardLeaf; 556 return FD_CanForwardLeaf; 557 } 558 559 // Don't spend too much time analyzing whether it can be reloaded. When 560 // carrying-out the forwarding, we cannot bail-out in the middle of the 561 // transformation. It also shouldn't take as long because some results are 562 // cached. 563 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 564 565 // { DomainDef[] -> ValInst[] } 566 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 567 568 // { DomainUse[] -> DomainTarget[] } 569 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 570 571 // { DomainTarget[] -> ValInst[] } 572 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 573 isl::union_map TranslatedExpectedVal = 574 TargetExpectedVal.apply_range(Translator); 575 576 // { DomainTarget[] -> Element[] } 577 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 578 579 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 580 if (!SameVal) 581 return FD_NotApplicable; 582 583 if (!DoIt) 584 return FD_CanForwardProfitably; 585 586 if (!Access) 587 Access = TargetStmt->ensureValueRead(Inst); 588 589 simplify(SameVal); 590 Access->setNewAccessRelation(SameVal); 591 592 TotalReloads++; 593 NumReloads++; 594 return FD_DidForwardLeaf; 595 } 596 597 /// Forwards a speculatively executable instruction. 598 /// 599 /// @param TargetStmt The statement the operand tree will be copied to. 600 /// @param UseInst The (possibly speculatable) instruction to forward. 601 /// @param DefStmt The statement @p UseInst is defined in. 602 /// @param DefLoop The loop which contains @p UseInst. 603 /// @param DoIt If false, only determine whether an operand tree can be 604 /// forwarded. If true, carry out the forwarding. Do not 605 /// use DoIt==true if an operand tree is not known to be 606 /// forwardable. 607 /// 608 /// @return FD_NotApplicable if @p UseInst is not speculatable. 609 /// FD_CannotForward if one of @p UseInst's operands is not 610 /// forwardable. 611 /// FD_CanForwardTree if @p UseInst is forwardable. 612 /// FD_DidForward if @p DoIt was true. 613 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 614 Instruction *UseInst, 615 ScopStmt *DefStmt, Loop *DefLoop, 616 bool DoIt) { 617 // PHIs, unless synthesizable, are not yet supported. 618 if (isa<PHINode>(UseInst)) 619 return FD_NotApplicable; 620 621 // Compatible instructions must satisfy the following conditions: 622 // 1. Idempotent (instruction will be copied, not moved; although its 623 // original instance might be removed by simplification) 624 // 2. Not access memory (There might be memory writes between) 625 // 3. Not cause undefined behaviour (we might copy to a location when the 626 // original instruction was no executed; this is currently not possible 627 // because we do not forward PHINodes) 628 // 4. Not leak memory if executed multiple times (i.e. malloc) 629 // 630 // Instruction::mayHaveSideEffects is not sufficient because it considers 631 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 632 // not sufficient because it allows memory accesses. 633 if (mayBeMemoryDependent(*UseInst)) 634 return FD_NotApplicable; 635 636 if (DoIt) { 637 // To ensure the right order, prepend this instruction before its 638 // operands. This ensures that its operands are inserted before the 639 // instruction using them. 640 // TODO: The operand tree is not really a tree, but a DAG. We should be 641 // able to handle DAGs without duplication. 642 TargetStmt->prependInstruction(UseInst); 643 NumInstructionsCopied++; 644 TotalInstructionsCopied++; 645 } 646 647 for (Value *OpVal : UseInst->operand_values()) { 648 ForwardingDecision OpDecision = 649 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt); 650 switch (OpDecision) { 651 case FD_CannotForward: 652 assert(!DoIt); 653 return FD_CannotForward; 654 655 case FD_CanForwardLeaf: 656 case FD_CanForwardProfitably: 657 assert(!DoIt); 658 break; 659 660 case FD_DidForwardLeaf: 661 case FD_DidForwardTree: 662 assert(DoIt); 663 break; 664 665 case FD_NotApplicable: 666 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 667 } 668 } 669 670 if (DoIt) 671 return FD_DidForwardTree; 672 return FD_CanForwardProfitably; 673 } 674 675 /// Determines whether an operand tree can be forwarded or carries out a 676 /// forwarding, depending on the @p DoIt flag. 677 /// 678 /// @param TargetStmt The statement the operand tree will be copied to. 679 /// @param UseVal The value (usually an instruction) which is root of an 680 /// operand tree. 681 /// @param UseStmt The statement that uses @p UseVal. 682 /// @param UseLoop The loop @p UseVal is used in. 683 /// @param DoIt If false, only determine whether an operand tree can be 684 /// forwarded. If true, carry out the forwarding. Do not 685 /// use DoIt==true if an operand tree is not known to be 686 /// forwardable. 687 /// 688 /// @return If DoIt==false, return whether the operand tree can be forwarded. 689 /// If DoIt==true, return FD_DidForward. 690 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 691 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) { 692 ScopStmt *DefStmt = nullptr; 693 Loop *DefLoop = nullptr; 694 695 // { DefDomain[] -> TargetDomain[] } 696 isl::map DefToTarget; 697 698 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 699 switch (VUse.getKind()) { 700 case VirtualUse::Constant: 701 case VirtualUse::Block: 702 case VirtualUse::Hoisted: 703 // These can be used anywhere without special considerations. 704 if (DoIt) 705 return FD_DidForwardTree; 706 return FD_CanForwardLeaf; 707 708 case VirtualUse::Synthesizable: { 709 // ScopExpander will take care for of generating the code at the new 710 // location. 711 if (DoIt) 712 return FD_DidForwardTree; 713 714 // Check if the value is synthesizable at the new location as well. This 715 // might be possible when leaving a loop for which ScalarEvolution is 716 // unable to derive the exit value for. 717 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 718 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 719 // do not forward past its loop header. This would require us to use a 720 // previous loop induction variable instead the current one. We currently 721 // do not allow forwarding PHI nodes, thus this should never occur (the 722 // only exception where no phi is necessary being an unreachable loop 723 // without edge from the outside). 724 VirtualUse TargetUse = VirtualUse::create( 725 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 726 if (TargetUse.getKind() == VirtualUse::Synthesizable) 727 return FD_CanForwardLeaf; 728 729 LLVM_DEBUG( 730 dbgs() << " Synthesizable would not be synthesizable anymore: " 731 << *UseVal << "\n"); 732 return FD_CannotForward; 733 } 734 735 case VirtualUse::ReadOnly: 736 // Note that we cannot return FD_CanForwardTree here. With a operand tree 737 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 738 // With -polly-analyze-read-only-scalars=true we would ensure the 739 // existence of a MemoryAccess (which already exists for a leaf) and be 740 // removed again by tryForwardTree because it's goal is to remove this 741 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 742 // to do so. 743 if (!DoIt) 744 return FD_CanForwardLeaf; 745 746 // If we model read-only scalars, we need to create a MemoryAccess for it. 747 if (ModelReadOnlyScalars) 748 TargetStmt->ensureValueRead(UseVal); 749 750 NumReadOnlyCopied++; 751 TotalReadOnlyCopied++; 752 return FD_DidForwardLeaf; 753 754 case VirtualUse::Intra: 755 // Knowing that UseStmt and DefStmt are the same statement instance, just 756 // reuse the information about UseStmt for DefStmt 757 DefStmt = UseStmt; 758 759 LLVM_FALLTHROUGH; 760 case VirtualUse::Inter: 761 Instruction *Inst = cast<Instruction>(UseVal); 762 763 if (!DefStmt) { 764 DefStmt = S->getStmtFor(Inst); 765 if (!DefStmt) 766 return FD_CannotForward; 767 } 768 769 DefLoop = LI->getLoopFor(Inst->getParent()); 770 771 ForwardingDecision SpeculativeResult = 772 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt); 773 if (SpeculativeResult != FD_NotApplicable) 774 return SpeculativeResult; 775 776 ForwardingDecision KnownResult = forwardKnownLoad( 777 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); 778 if (KnownResult != FD_NotApplicable) 779 return KnownResult; 780 781 ForwardingDecision ReloadResult = reloadKnownContent( 782 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); 783 if (ReloadResult != FD_NotApplicable) 784 return ReloadResult; 785 786 // When no method is found to forward the operand tree, we effectively 787 // cannot handle it. 788 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 789 return FD_CannotForward; 790 } 791 792 llvm_unreachable("Case unhandled"); 793 } 794 795 /// Try to forward an operand tree rooted in @p RA. 796 bool tryForwardTree(MemoryAccess *RA) { 797 assert(RA->isLatestScalarKind()); 798 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 799 800 ScopStmt *Stmt = RA->getStatement(); 801 Loop *InLoop = Stmt->getSurroundingLoop(); 802 803 isl::map TargetToUse; 804 if (!Known.is_null()) { 805 isl::space DomSpace = Stmt->getDomainSpace(); 806 TargetToUse = 807 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 808 } 809 810 ForwardingDecision Assessment = 811 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false); 812 assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf); 813 if (Assessment != FD_CanForwardProfitably) 814 return false; 815 816 ForwardingDecision Execution = 817 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true); 818 assert(((Execution == FD_DidForwardTree) || 819 (Execution == FD_DidForwardLeaf)) && 820 "A previous positive assessment must also be executable"); 821 822 if (Execution == FD_DidForwardTree) 823 Stmt->removeSingleMemoryAccess(RA); 824 return true; 825 } 826 827 /// Return which SCoP this instance is processing. 828 Scop *getScop() const { return S; } 829 830 /// Run the algorithm: Use value read accesses as operand tree roots and try 831 /// to forward them into the statement. 832 bool forwardOperandTrees() { 833 for (ScopStmt &Stmt : *S) { 834 bool StmtModified = false; 835 836 // Because we are modifying the MemoryAccess list, collect them first to 837 // avoid iterator invalidation. 838 SmallVector<MemoryAccess *, 16> Accs; 839 for (MemoryAccess *RA : Stmt) { 840 if (!RA->isRead()) 841 continue; 842 if (!RA->isLatestScalarKind()) 843 continue; 844 845 Accs.push_back(RA); 846 } 847 848 for (MemoryAccess *RA : Accs) { 849 if (tryForwardTree(RA)) { 850 Modified = true; 851 StmtModified = true; 852 NumForwardedTrees++; 853 TotalForwardedTrees++; 854 } 855 } 856 857 if (StmtModified) { 858 NumModifiedStmts++; 859 TotalModifiedStmts++; 860 } 861 } 862 863 if (Modified) 864 ScopsModified++; 865 return Modified; 866 } 867 868 /// Print the pass result, performed transformations and the SCoP after the 869 /// transformation. 870 void print(raw_ostream &OS, int Indent = 0) { 871 printStatistics(OS, Indent); 872 873 if (!Modified) { 874 // This line can easily be checked in regression tests. 875 OS << "ForwardOpTree executed, but did not modify anything\n"; 876 return; 877 } 878 879 printStatements(OS, Indent); 880 } 881 }; 882 883 /// Pass that redirects scalar reads to array elements that are known to contain 884 /// the same value. 885 /// 886 /// This reduces the number of scalar accesses and therefore potentially 887 /// increases the freedom of the scheduler. In the ideal case, all reads of a 888 /// scalar definition are redirected (We currently do not care about removing 889 /// the write in this case). This is also useful for the main DeLICM pass as 890 /// there are less scalars to be mapped. 891 class ForwardOpTree : public ScopPass { 892 private: 893 /// The pass implementation, also holding per-scop data. 894 std::unique_ptr<ForwardOpTreeImpl> Impl; 895 896 public: 897 static char ID; 898 899 explicit ForwardOpTree() : ScopPass(ID) {} 900 ForwardOpTree(const ForwardOpTree &) = delete; 901 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 902 903 void getAnalysisUsage(AnalysisUsage &AU) const override { 904 AU.addRequiredTransitive<ScopInfoRegionPass>(); 905 AU.addRequired<LoopInfoWrapperPass>(); 906 AU.setPreservesAll(); 907 } 908 909 bool runOnScop(Scop &S) override { 910 // Free resources for previous SCoP's computation, if not yet done. 911 releaseMemory(); 912 913 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 914 915 { 916 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 917 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 918 919 if (AnalyzeKnown) { 920 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 921 Impl->computeKnownValues(); 922 } 923 924 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 925 Impl->forwardOperandTrees(); 926 927 if (MaxOpGuard.hasQuotaExceeded()) { 928 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 929 "max_operations exceeded\n"); 930 KnownOutOfQuota++; 931 } 932 } 933 934 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 935 LLVM_DEBUG(dbgs() << S); 936 937 // Update statistics 938 auto ScopStats = S.getStatistics(); 939 NumValueWrites += ScopStats.NumValueWrites; 940 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 941 NumPHIWrites += ScopStats.NumPHIWrites; 942 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 943 NumSingletonWrites += ScopStats.NumSingletonWrites; 944 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 945 946 return false; 947 } 948 949 void printScop(raw_ostream &OS, Scop &S) const override { 950 if (!Impl) 951 return; 952 953 assert(Impl->getScop() == &S); 954 Impl->print(OS); 955 } 956 957 void releaseMemory() override { Impl.reset(); } 958 }; // class ForwardOpTree 959 960 char ForwardOpTree::ID; 961 } // namespace 962 963 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 964 965 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 966 "Polly - Forward operand tree", false, false) 967 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 968 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 969 "Polly - Forward operand tree", false, false) 970