1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Move instructions between statements. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/ForwardOpTree.h" 14 #include "polly/Options.h" 15 #include "polly/ScopBuilder.h" 16 #include "polly/ScopInfo.h" 17 #include "polly/ScopPass.h" 18 #include "polly/Support/GICHelper.h" 19 #include "polly/Support/ISLOStream.h" 20 #include "polly/Support/ISLTools.h" 21 #include "polly/Support/VirtualInstruction.h" 22 #include "polly/ZoneAlgo.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/LoopInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Value.h" 31 #include "llvm/InitializePasses.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "isl/ctx.h" 39 #include "isl/isl-noexceptions.h" 40 #include <cassert> 41 #include <memory> 42 43 #define DEBUG_TYPE "polly-optree" 44 45 using namespace llvm; 46 using namespace polly; 47 48 static cl::opt<bool> 49 AnalyzeKnown("polly-optree-analyze-known", 50 cl::desc("Analyze array contents for load forwarding"), 51 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 52 53 static cl::opt<bool> 54 NormalizePHIs("polly-optree-normalize-phi", 55 cl::desc("Replace PHIs by their incoming values"), 56 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 57 58 static cl::opt<unsigned> 59 MaxOps("polly-optree-max-ops", 60 cl::desc("Maximum number of ISL operations to invest for known " 61 "analysis; 0=no limit"), 62 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 63 64 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 65 STATISTIC(KnownOutOfQuota, 66 "Analyses aborted because max_operations was reached"); 67 68 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 69 STATISTIC(TotalKnownLoadsForwarded, 70 "Number of forwarded loads because their value was known"); 71 STATISTIC(TotalReloads, "Number of reloaded values"); 72 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 73 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 74 STATISTIC(TotalModifiedStmts, 75 "Number of statements with at least one forwarded tree"); 76 77 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 78 79 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 80 STATISTIC(NumValueWritesInLoops, 81 "Number of scalar value writes nested in affine loops after OpTree"); 82 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 83 STATISTIC(NumPHIWritesInLoops, 84 "Number of scalar phi writes nested in affine loops after OpTree"); 85 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 86 STATISTIC(NumSingletonWritesInLoops, 87 "Number of singleton writes nested in affine loops after OpTree"); 88 89 namespace { 90 91 /// The state of whether an operand tree was/can be forwarded. 92 /// 93 /// The items apply to an instructions and its operand tree with the instruction 94 /// as the root element. If the value in question is not an instruction in the 95 /// SCoP, it can be a leaf of an instruction's operand tree. 96 enum ForwardingDecision { 97 /// The root instruction or value cannot be forwarded at all. 98 FD_CannotForward, 99 100 /// The root instruction or value can be forwarded as a leaf of a larger 101 /// operand tree. 102 /// It does not make sense to move the value itself, it would just replace it 103 /// by a use of itself. For instance, a constant "5" used in a statement can 104 /// be forwarded, but it would just replace it by the same constant "5". 105 /// However, it makes sense to move as an operand of 106 /// 107 /// %add = add 5, 5 108 /// 109 /// where "5" is moved as part of a larger operand tree. "5" would be placed 110 /// (disregarding for a moment that literal constants don't have a location 111 /// and can be used anywhere) into the same statement as %add would. 112 FD_CanForwardLeaf, 113 114 /// The root instruction can be forwarded and doing so avoids a scalar 115 /// dependency. 116 /// 117 /// This can be either because the operand tree can be moved to the target 118 /// statement, or a memory access is redirected to read from a different 119 /// location. 120 FD_CanForwardProfitably, 121 122 /// Used to indicate that a forwarding has be carried out successfully, and 123 /// the forwarded memory access can be deleted. 124 FD_DidForwardTree, 125 126 /// Used to indicate that a forwarding has be carried out successfully, and 127 /// the forwarded memory access is being reused. 128 FD_DidForwardLeaf, 129 130 /// A forwarding method cannot be applied to the operand tree. 131 /// The difference to FD_CannotForward is that there might be other methods 132 /// that can handle it. 133 /// The conditions that make an operand tree applicable must be checked even 134 /// with DoIt==true because a method following the one that returned 135 /// FD_NotApplicable might have returned FD_CanForwardTree. 136 FD_NotApplicable 137 }; 138 139 /// Implementation of operand tree forwarding for a specific SCoP. 140 /// 141 /// For a statement that requires a scalar value (through a value read 142 /// MemoryAccess), see if its operand can be moved into the statement. If so, 143 /// the MemoryAccess is removed and the all the operand tree instructions are 144 /// moved into the statement. All original instructions are left in the source 145 /// statements. The simplification pass can clean these up. 146 class ForwardOpTreeImpl : ZoneAlgorithm { 147 private: 148 /// Scope guard to limit the number of isl operations for this pass. 149 IslMaxOperationsGuard &MaxOpGuard; 150 151 /// How many instructions have been copied to other statements. 152 int NumInstructionsCopied = 0; 153 154 /// Number of loads forwarded because their value was known. 155 int NumKnownLoadsForwarded = 0; 156 157 /// Number of values reloaded from known array elements. 158 int NumReloads = 0; 159 160 /// How many read-only accesses have been copied. 161 int NumReadOnlyCopied = 0; 162 163 /// How many operand trees have been forwarded. 164 int NumForwardedTrees = 0; 165 166 /// Number of statements with at least one forwarded operand tree. 167 int NumModifiedStmts = 0; 168 169 /// Whether we carried out at least one change to the SCoP. 170 bool Modified = false; 171 172 /// Contains the zones where array elements are known to contain a specific 173 /// value. 174 /// { [Element[] -> Zone[]] -> ValInst[] } 175 /// @see computeKnown() 176 isl::union_map Known; 177 178 /// Translator for newly introduced ValInsts to already existing ValInsts such 179 /// that new introduced load instructions can reuse the Known analysis of its 180 /// original load. { ValInst[] -> ValInst[] } 181 isl::union_map Translator; 182 183 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 184 /// 185 /// @param ValInst { Domain[] -> ValInst[] } 186 /// The values for which we search for alternative locations, 187 /// per statement instance. 188 /// 189 /// @return { Domain[] -> Element[] } 190 /// For each statement instance, the array elements that contain the 191 /// same ValInst. 192 isl::union_map findSameContentElements(isl::union_map ValInst) { 193 assert(!ValInst.is_single_valued().is_false()); 194 195 // { Domain[] } 196 isl::union_set Domain = ValInst.domain(); 197 198 // { Domain[] -> Scatter[] } 199 isl::union_map Schedule = getScatterFor(Domain); 200 201 // { Element[] -> [Scatter[] -> ValInst[]] } 202 isl::union_map MustKnownCurried = 203 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 204 205 // { [Domain[] -> ValInst[]] -> Scatter[] } 206 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 207 208 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 209 isl::union_map SchedValDomVal = 210 DomValSched.range_product(ValInst.range_map()).reverse(); 211 212 // { Element[] -> [Domain[] -> ValInst[]] } 213 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 214 215 // { Domain[] -> Element[] } 216 isl::union_map MustKnownMap = 217 MustKnownInst.uncurry().domain().unwrap().reverse(); 218 simplify(MustKnownMap); 219 220 return MustKnownMap; 221 } 222 223 /// Find a single array element for each statement instance, within a single 224 /// array. 225 /// 226 /// @param MustKnown { Domain[] -> Element[] } 227 /// Set of candidate array elements. 228 /// @param Domain { Domain[] } 229 /// The statement instance for which we need elements for. 230 /// 231 /// @return { Domain[] -> Element[] } 232 /// For each statement instance, an array element out of @p MustKnown. 233 /// All array elements must be in the same array (Polly does not yet 234 /// support reading from different accesses using the same 235 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 236 /// null. 237 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 238 // { Domain[] -> Element[] } 239 isl::map Result; 240 241 // MemoryAccesses can read only elements from a single array 242 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 243 // Look through all spaces until we find one that contains at least the 244 // wanted statement instance.s 245 for (isl::map Map : MustKnown.get_map_list()) { 246 // Get the array this is accessing. 247 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 248 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 249 250 // No support for generation of indirect array accesses. 251 if (SAI->getBasePtrOriginSAI()) 252 continue; 253 254 // Determine whether this map contains all wanted values. 255 isl::set MapDom = Map.domain(); 256 if (!Domain.is_subset(MapDom).is_true()) 257 continue; 258 259 // There might be multiple array elements that contain the same value, but 260 // choose only one of them. lexmin is used because it returns a one-value 261 // mapping, we do not care about which one. 262 // TODO: Get the simplest access function. 263 Result = Map.lexmin(); 264 break; 265 } 266 267 return Result; 268 } 269 270 public: 271 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 272 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 273 274 /// Compute the zones of known array element contents. 275 /// 276 /// @return True if the computed #Known is usable. 277 bool computeKnownValues() { 278 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 279 280 // Check that nothing strange occurs. 281 collectCompatibleElts(); 282 283 { 284 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 285 286 computeCommon(); 287 if (NormalizePHIs) 288 computeNormalizedPHIs(); 289 Known = computeKnown(true, true); 290 291 // Preexisting ValInsts use the known content analysis of themselves. 292 Translator = makeIdentityMap(Known.range(), false); 293 } 294 295 if (!Known || !Translator || !NormalizeMap) { 296 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 297 Known = nullptr; 298 Translator = nullptr; 299 NormalizeMap = nullptr; 300 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 301 return false; 302 } 303 304 KnownAnalyzed++; 305 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 306 307 return true; 308 } 309 310 void printStatistics(raw_ostream &OS, int Indent = 0) { 311 OS.indent(Indent) << "Statistics {\n"; 312 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 313 << '\n'; 314 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 315 << '\n'; 316 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 317 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 318 << '\n'; 319 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 320 << '\n'; 321 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 322 << NumModifiedStmts << '\n'; 323 OS.indent(Indent) << "}\n"; 324 } 325 326 void printStatements(raw_ostream &OS, int Indent = 0) const { 327 OS.indent(Indent) << "After statements {\n"; 328 for (auto &Stmt : *S) { 329 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 330 for (auto *MA : Stmt) 331 MA->print(OS); 332 333 OS.indent(Indent + 12); 334 Stmt.printInstructions(OS); 335 } 336 OS.indent(Indent) << "}\n"; 337 } 338 339 /// Create a new MemoryAccess of type read and MemoryKind::Array. 340 /// 341 /// @param Stmt The statement in which the access occurs. 342 /// @param LI The instruction that does the access. 343 /// @param AccessRelation The array element that each statement instance 344 /// accesses. 345 /// 346 /// @param The newly created access. 347 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 348 isl::map AccessRelation) { 349 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 350 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 351 352 // Create a dummy SCEV access, to be replaced anyway. 353 SmallVector<const SCEV *, 4> Sizes; 354 Sizes.reserve(SAI->getNumberOfDimensions()); 355 SmallVector<const SCEV *, 4> Subscripts; 356 Subscripts.reserve(SAI->getNumberOfDimensions()); 357 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 358 Sizes.push_back(SAI->getDimensionSize(i)); 359 Subscripts.push_back(nullptr); 360 } 361 362 MemoryAccess *Access = 363 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 364 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 365 S->addAccessFunction(Access); 366 Stmt->addAccess(Access, true); 367 368 Access->setNewAccessRelation(AccessRelation); 369 370 return Access; 371 } 372 373 /// Forward a load by reading from an array element that contains the same 374 /// value. Typically the location it was loaded from. 375 /// 376 /// @param TargetStmt The statement the operand tree will be copied to. 377 /// @param Inst The (possibly speculatable) instruction to forward. 378 /// @param UseStmt The statement that uses @p Inst. 379 /// @param UseLoop The loop @p Inst is used in. 380 /// @param DefStmt The statement @p Inst is defined in. 381 /// @param DefLoop The loop which contains @p Inst. 382 /// @param DoIt If false, only determine whether an operand tree can be 383 /// forwarded. If true, carry out the forwarding. Do not 384 /// use DoIt==true if an operand tree is not known to be 385 /// forwardable. 386 /// 387 /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new 388 /// load. 389 /// FD_CannotForward if the pointer operand cannot be forwarded. 390 /// FD_CanForwardProfitably if @p Inst is forwardable. 391 /// FD_DidForwardTree if @p DoIt was true. 392 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 393 ScopStmt *UseStmt, Loop *UseLoop, 394 ScopStmt *DefStmt, Loop *DefLoop, 395 bool DoIt) { 396 // Cannot do anything without successful known analysis. 397 if (Known.is_null() || Translator.is_null() || 398 MaxOpGuard.hasQuotaExceeded()) 399 return FD_NotApplicable; 400 401 LoadInst *LI = dyn_cast<LoadInst>(Inst); 402 if (!LI) 403 return FD_NotApplicable; 404 405 // If the load is already in the statement, no forwarding is necessary. 406 // However, it might happen that the LoadInst is already present in the 407 // statement's instruction list. In that case we do as follows: 408 // - For the evaluation (DoIt==false), we can trivially forward it as it is 409 // benefit of forwarding an already present instruction. 410 // - For the execution (DoIt==true), prepend the instruction (to make it 411 // available to all instructions following in the instruction list), but 412 // do not add another MemoryAccess. 413 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 414 if (Access && !DoIt) 415 return FD_CanForwardProfitably; 416 417 ForwardingDecision OpDecision = forwardTree( 418 TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt); 419 switch (OpDecision) { 420 case FD_CannotForward: 421 assert(!DoIt); 422 return OpDecision; 423 424 case FD_CanForwardLeaf: 425 case FD_CanForwardProfitably: 426 assert(!DoIt); 427 break; 428 429 case FD_DidForwardLeaf: 430 case FD_DidForwardTree: 431 assert(DoIt); 432 break; 433 434 default: 435 llvm_unreachable("Shouldn't return this"); 436 } 437 438 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 439 440 // { DomainDef[] -> ValInst[] } 441 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 442 assert(!isNormalized(ExpectedVal).is_false() && 443 "LoadInsts are always normalized"); 444 445 // { DomainUse[] -> DomainTarget[] } 446 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 447 448 // { DomainTarget[] -> ValInst[] } 449 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 450 isl::union_map TranslatedExpectedVal = 451 isl::union_map(TargetExpectedVal).apply_range(Translator); 452 453 // { DomainTarget[] -> Element[] } 454 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 455 456 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 457 if (!SameVal) 458 return FD_NotApplicable; 459 460 if (DoIt) 461 TargetStmt->prependInstruction(LI); 462 463 if (!DoIt) 464 return FD_CanForwardProfitably; 465 466 if (Access) { 467 LLVM_DEBUG( 468 dbgs() << " forwarded known load with preexisting MemoryAccess" 469 << Access << "\n"); 470 } else { 471 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 472 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 473 << Access << "\n"); 474 475 // { ValInst[] } 476 isl::space ValInstSpace = ExpectedVal.get_space().range(); 477 478 // After adding a new load to the SCoP, also update the Known content 479 // about it. The new load will have a known ValInst of 480 // { [DomainTarget[] -> Value[]] } 481 // but which -- because it is a copy of it -- has same value as the 482 // { [DomainDef[] -> Value[]] } 483 // that it replicates. Instead of cloning the known content of 484 // [DomainDef[] -> Value[]] 485 // for DomainTarget[], we add a 'translator' that maps 486 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 487 // before comparing to the known content. 488 // TODO: 'Translator' could also be used to map PHINodes to their incoming 489 // ValInsts. 490 if (ValInstSpace.is_wrapping()) { 491 // { DefDomain[] -> Value[] } 492 isl::map ValInsts = ExpectedVal.range().unwrap(); 493 494 // { DefDomain[] } 495 isl::set DefDomain = ValInsts.domain(); 496 497 // { Value[] } 498 isl::space ValSpace = ValInstSpace.unwrap().range(); 499 500 // { Value[] -> Value[] } 501 isl::map ValToVal = 502 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 503 504 // { DomainDef[] -> DomainTarget[] } 505 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); 506 507 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 508 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 509 510 Translator = Translator.add_map(LocalTranslator); 511 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 512 << "\n"); 513 } 514 } 515 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 516 << "\n"); 517 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 518 << "\n"); 519 assert(Access); 520 521 NumKnownLoadsForwarded++; 522 TotalKnownLoadsForwarded++; 523 return FD_DidForwardTree; 524 } 525 526 /// Forward a scalar by redirecting the access to an array element that stores 527 /// the same value. 528 /// 529 /// @param TargetStmt The statement the operand tree will be copied to. 530 /// @param Inst The scalar to forward. 531 /// @param UseStmt The statement that uses @p Inst. 532 /// @param UseLoop The loop @p Inst is used in. 533 /// @param DefStmt The statement @p Inst is defined in. 534 /// @param DefLoop The loop which contains @p Inst. 535 /// @param DoIt If false, only determine whether an operand tree can be 536 /// forwarded. If true, carry out the forwarding. Do not 537 /// use DoIt==true if an operand tree is not known to be 538 /// forwardable. 539 /// 540 /// @return FD_NotApplicable if @p Inst cannot be reloaded. 541 /// FD_CanForwardLeaf if @p Inst can be reloaded. 542 /// FD_CanForwardProfitably if @p Inst has been reloaded. 543 /// FD_DidForwardLeaf if @p DoIt was true. 544 ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 545 ScopStmt *UseStmt, Loop *UseLoop, 546 ScopStmt *DefStmt, Loop *DefLoop, 547 bool DoIt) { 548 // Cannot do anything without successful known analysis. 549 if (Known.is_null() || Translator.is_null() || 550 MaxOpGuard.hasQuotaExceeded()) 551 return FD_NotApplicable; 552 553 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 554 if (Access && Access->isLatestArrayKind()) { 555 if (DoIt) 556 return FD_DidForwardLeaf; 557 return FD_CanForwardLeaf; 558 } 559 560 // Don't spend too much time analyzing whether it can be reloaded. When 561 // carrying-out the forwarding, we cannot bail-out in the middle of the 562 // transformation. It also shouldn't take as long because some results are 563 // cached. 564 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 565 566 // { DomainDef[] -> ValInst[] } 567 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 568 569 // { DomainUse[] -> DomainTarget[] } 570 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 571 572 // { DomainTarget[] -> ValInst[] } 573 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 574 isl::union_map TranslatedExpectedVal = 575 TargetExpectedVal.apply_range(Translator); 576 577 // { DomainTarget[] -> Element[] } 578 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 579 580 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 581 if (!SameVal) 582 return FD_NotApplicable; 583 584 if (!DoIt) 585 return FD_CanForwardProfitably; 586 587 if (!Access) 588 Access = TargetStmt->ensureValueRead(Inst); 589 590 simplify(SameVal); 591 Access->setNewAccessRelation(SameVal); 592 593 TotalReloads++; 594 NumReloads++; 595 return FD_DidForwardLeaf; 596 } 597 598 /// Forwards a speculatively executable instruction. 599 /// 600 /// @param TargetStmt The statement the operand tree will be copied to. 601 /// @param UseInst The (possibly speculatable) instruction to forward. 602 /// @param DefStmt The statement @p UseInst is defined in. 603 /// @param DefLoop The loop which contains @p UseInst. 604 /// @param DoIt If false, only determine whether an operand tree can be 605 /// forwarded. If true, carry out the forwarding. Do not 606 /// use DoIt==true if an operand tree is not known to be 607 /// forwardable. 608 /// 609 /// @return FD_NotApplicable if @p UseInst is not speculatable. 610 /// FD_CannotForward if one of @p UseInst's operands is not 611 /// forwardable. 612 /// FD_CanForwardTree if @p UseInst is forwardable. 613 /// FD_DidForward if @p DoIt was true. 614 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 615 Instruction *UseInst, 616 ScopStmt *DefStmt, Loop *DefLoop, 617 bool DoIt) { 618 // PHIs, unless synthesizable, are not yet supported. 619 if (isa<PHINode>(UseInst)) 620 return FD_NotApplicable; 621 622 // Compatible instructions must satisfy the following conditions: 623 // 1. Idempotent (instruction will be copied, not moved; although its 624 // original instance might be removed by simplification) 625 // 2. Not access memory (There might be memory writes between) 626 // 3. Not cause undefined behaviour (we might copy to a location when the 627 // original instruction was no executed; this is currently not possible 628 // because we do not forward PHINodes) 629 // 4. Not leak memory if executed multiple times (i.e. malloc) 630 // 631 // Instruction::mayHaveSideEffects is not sufficient because it considers 632 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 633 // not sufficient because it allows memory accesses. 634 if (mayBeMemoryDependent(*UseInst)) 635 return FD_NotApplicable; 636 637 if (DoIt) { 638 // To ensure the right order, prepend this instruction before its 639 // operands. This ensures that its operands are inserted before the 640 // instruction using them. 641 // TODO: The operand tree is not really a tree, but a DAG. We should be 642 // able to handle DAGs without duplication. 643 TargetStmt->prependInstruction(UseInst); 644 NumInstructionsCopied++; 645 TotalInstructionsCopied++; 646 } 647 648 for (Value *OpVal : UseInst->operand_values()) { 649 ForwardingDecision OpDecision = 650 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt); 651 switch (OpDecision) { 652 case FD_CannotForward: 653 assert(!DoIt); 654 return FD_CannotForward; 655 656 case FD_CanForwardLeaf: 657 case FD_CanForwardProfitably: 658 assert(!DoIt); 659 break; 660 661 case FD_DidForwardLeaf: 662 case FD_DidForwardTree: 663 assert(DoIt); 664 break; 665 666 case FD_NotApplicable: 667 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 668 } 669 } 670 671 if (DoIt) 672 return FD_DidForwardTree; 673 return FD_CanForwardProfitably; 674 } 675 676 /// Determines whether an operand tree can be forwarded or carries out a 677 /// forwarding, depending on the @p DoIt flag. 678 /// 679 /// @param TargetStmt The statement the operand tree will be copied to. 680 /// @param UseVal The value (usually an instruction) which is root of an 681 /// operand tree. 682 /// @param UseStmt The statement that uses @p UseVal. 683 /// @param UseLoop The loop @p UseVal is used in. 684 /// @param DoIt If false, only determine whether an operand tree can be 685 /// forwarded. If true, carry out the forwarding. Do not 686 /// use DoIt==true if an operand tree is not known to be 687 /// forwardable. 688 /// 689 /// @return If DoIt==false, return whether the operand tree can be forwarded. 690 /// If DoIt==true, return FD_DidForward. 691 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 692 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) { 693 ScopStmt *DefStmt = nullptr; 694 Loop *DefLoop = nullptr; 695 696 // { DefDomain[] -> TargetDomain[] } 697 isl::map DefToTarget; 698 699 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 700 switch (VUse.getKind()) { 701 case VirtualUse::Constant: 702 case VirtualUse::Block: 703 case VirtualUse::Hoisted: 704 // These can be used anywhere without special considerations. 705 if (DoIt) 706 return FD_DidForwardTree; 707 return FD_CanForwardLeaf; 708 709 case VirtualUse::Synthesizable: { 710 // ScopExpander will take care for of generating the code at the new 711 // location. 712 if (DoIt) 713 return FD_DidForwardTree; 714 715 // Check if the value is synthesizable at the new location as well. This 716 // might be possible when leaving a loop for which ScalarEvolution is 717 // unable to derive the exit value for. 718 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 719 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 720 // do not forward past its loop header. This would require us to use a 721 // previous loop induction variable instead the current one. We currently 722 // do not allow forwarding PHI nodes, thus this should never occur (the 723 // only exception where no phi is necessary being an unreachable loop 724 // without edge from the outside). 725 VirtualUse TargetUse = VirtualUse::create( 726 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 727 if (TargetUse.getKind() == VirtualUse::Synthesizable) 728 return FD_CanForwardLeaf; 729 730 LLVM_DEBUG( 731 dbgs() << " Synthesizable would not be synthesizable anymore: " 732 << *UseVal << "\n"); 733 return FD_CannotForward; 734 } 735 736 case VirtualUse::ReadOnly: 737 // Note that we cannot return FD_CanForwardTree here. With a operand tree 738 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 739 // With -polly-analyze-read-only-scalars=true we would ensure the 740 // existence of a MemoryAccess (which already exists for a leaf) and be 741 // removed again by tryForwardTree because it's goal is to remove this 742 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 743 // to do so. 744 if (!DoIt) 745 return FD_CanForwardLeaf; 746 747 // If we model read-only scalars, we need to create a MemoryAccess for it. 748 if (ModelReadOnlyScalars) 749 TargetStmt->ensureValueRead(UseVal); 750 751 NumReadOnlyCopied++; 752 TotalReadOnlyCopied++; 753 return FD_DidForwardLeaf; 754 755 case VirtualUse::Intra: 756 // Knowing that UseStmt and DefStmt are the same statement instance, just 757 // reuse the information about UseStmt for DefStmt 758 DefStmt = UseStmt; 759 760 LLVM_FALLTHROUGH; 761 case VirtualUse::Inter: 762 Instruction *Inst = cast<Instruction>(UseVal); 763 764 if (!DefStmt) { 765 DefStmt = S->getStmtFor(Inst); 766 if (!DefStmt) 767 return FD_CannotForward; 768 } 769 770 DefLoop = LI->getLoopFor(Inst->getParent()); 771 772 ForwardingDecision SpeculativeResult = 773 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt); 774 if (SpeculativeResult != FD_NotApplicable) 775 return SpeculativeResult; 776 777 ForwardingDecision KnownResult = forwardKnownLoad( 778 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); 779 if (KnownResult != FD_NotApplicable) 780 return KnownResult; 781 782 ForwardingDecision ReloadResult = reloadKnownContent( 783 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); 784 if (ReloadResult != FD_NotApplicable) 785 return ReloadResult; 786 787 // When no method is found to forward the operand tree, we effectively 788 // cannot handle it. 789 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 790 return FD_CannotForward; 791 } 792 793 llvm_unreachable("Case unhandled"); 794 } 795 796 /// Try to forward an operand tree rooted in @p RA. 797 bool tryForwardTree(MemoryAccess *RA) { 798 assert(RA->isLatestScalarKind()); 799 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 800 801 ScopStmt *Stmt = RA->getStatement(); 802 Loop *InLoop = Stmt->getSurroundingLoop(); 803 804 isl::map TargetToUse; 805 if (!Known.is_null()) { 806 isl::space DomSpace = Stmt->getDomainSpace(); 807 TargetToUse = 808 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 809 } 810 811 ForwardingDecision Assessment = 812 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false); 813 assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf); 814 if (Assessment != FD_CanForwardProfitably) 815 return false; 816 817 ForwardingDecision Execution = 818 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true); 819 assert(((Execution == FD_DidForwardTree) || 820 (Execution == FD_DidForwardLeaf)) && 821 "A previous positive assessment must also be executable"); 822 823 if (Execution == FD_DidForwardTree) 824 Stmt->removeSingleMemoryAccess(RA); 825 return true; 826 } 827 828 /// Return which SCoP this instance is processing. 829 Scop *getScop() const { return S; } 830 831 /// Run the algorithm: Use value read accesses as operand tree roots and try 832 /// to forward them into the statement. 833 bool forwardOperandTrees() { 834 for (ScopStmt &Stmt : *S) { 835 bool StmtModified = false; 836 837 // Because we are modifying the MemoryAccess list, collect them first to 838 // avoid iterator invalidation. 839 SmallVector<MemoryAccess *, 16> Accs; 840 for (MemoryAccess *RA : Stmt) { 841 if (!RA->isRead()) 842 continue; 843 if (!RA->isLatestScalarKind()) 844 continue; 845 846 Accs.push_back(RA); 847 } 848 849 for (MemoryAccess *RA : Accs) { 850 if (tryForwardTree(RA)) { 851 Modified = true; 852 StmtModified = true; 853 NumForwardedTrees++; 854 TotalForwardedTrees++; 855 } 856 } 857 858 if (StmtModified) { 859 NumModifiedStmts++; 860 TotalModifiedStmts++; 861 } 862 } 863 864 if (Modified) 865 ScopsModified++; 866 return Modified; 867 } 868 869 /// Print the pass result, performed transformations and the SCoP after the 870 /// transformation. 871 void print(raw_ostream &OS, int Indent = 0) { 872 printStatistics(OS, Indent); 873 874 if (!Modified) { 875 // This line can easily be checked in regression tests. 876 OS << "ForwardOpTree executed, but did not modify anything\n"; 877 return; 878 } 879 880 printStatements(OS, Indent); 881 } 882 }; 883 884 /// Pass that redirects scalar reads to array elements that are known to contain 885 /// the same value. 886 /// 887 /// This reduces the number of scalar accesses and therefore potentially 888 /// increases the freedom of the scheduler. In the ideal case, all reads of a 889 /// scalar definition are redirected (We currently do not care about removing 890 /// the write in this case). This is also useful for the main DeLICM pass as 891 /// there are less scalars to be mapped. 892 class ForwardOpTree : public ScopPass { 893 private: 894 /// The pass implementation, also holding per-scop data. 895 std::unique_ptr<ForwardOpTreeImpl> Impl; 896 897 public: 898 static char ID; 899 900 explicit ForwardOpTree() : ScopPass(ID) {} 901 ForwardOpTree(const ForwardOpTree &) = delete; 902 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 903 904 void getAnalysisUsage(AnalysisUsage &AU) const override { 905 AU.addRequiredTransitive<ScopInfoRegionPass>(); 906 AU.addRequired<LoopInfoWrapperPass>(); 907 AU.setPreservesAll(); 908 } 909 910 bool runOnScop(Scop &S) override { 911 // Free resources for previous SCoP's computation, if not yet done. 912 releaseMemory(); 913 914 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 915 916 { 917 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 918 Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 919 920 if (AnalyzeKnown) { 921 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 922 Impl->computeKnownValues(); 923 } 924 925 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 926 Impl->forwardOperandTrees(); 927 928 if (MaxOpGuard.hasQuotaExceeded()) { 929 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 930 "max_operations exceeded\n"); 931 KnownOutOfQuota++; 932 } 933 } 934 935 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 936 LLVM_DEBUG(dbgs() << S); 937 938 // Update statistics 939 auto ScopStats = S.getStatistics(); 940 NumValueWrites += ScopStats.NumValueWrites; 941 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 942 NumPHIWrites += ScopStats.NumPHIWrites; 943 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 944 NumSingletonWrites += ScopStats.NumSingletonWrites; 945 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 946 947 return false; 948 } 949 950 void printScop(raw_ostream &OS, Scop &S) const override { 951 if (!Impl) 952 return; 953 954 assert(Impl->getScop() == &S); 955 Impl->print(OS); 956 } 957 958 void releaseMemory() override { Impl.reset(); } 959 }; // class ForwardOpTree 960 961 char ForwardOpTree::ID; 962 } // namespace 963 964 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 965 966 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 967 "Polly - Forward operand tree", false, false) 968 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 969 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 970 "Polly - Forward operand tree", false, false) 971