1 //===------ ForwardOpTree.h -------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Move instructions between statements. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/ForwardOpTree.h" 15 16 #include "polly/Options.h" 17 #include "polly/RegisterPasses.h" 18 #include "polly/ScopBuilder.h" 19 #include "polly/ScopInfo.h" 20 #include "polly/ScopPass.h" 21 #include "polly/Support/GICHelper.h" 22 #include "polly/Support/ISLOStream.h" 23 #include "polly/Support/ISLTools.h" 24 #include "polly/Support/VirtualInstruction.h" 25 #include "polly/ZoneAlgo.h" 26 #include "llvm/Analysis/ValueTracking.h" 27 28 #define DEBUG_TYPE "polly-optree" 29 30 using namespace polly; 31 using namespace llvm; 32 33 static cl::opt<bool> 34 AnalyzeKnown("polly-optree-analyze-known", 35 cl::desc("Analyze array contents for load forwarding"), 36 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 37 38 static cl::opt<unsigned long> 39 MaxOps("polly-optree-max-ops", 40 cl::desc("Maximum number of ISL operations to invest for known " 41 "analysis; 0=no limit"), 42 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 43 44 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 45 STATISTIC(KnownOutOfQuota, 46 "Analyses aborted because max_operations was reached"); 47 STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis"); 48 49 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 50 STATISTIC(TotalKnownLoadsForwarded, 51 "Number of forwarded loads because their value was known"); 52 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 53 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 54 STATISTIC(TotalModifiedStmts, 55 "Number of statements with at least one forwarded tree"); 56 57 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 58 59 namespace { 60 61 /// The state of whether an operand tree was/can be forwarded. 62 /// 63 /// The items apply to an instructions and its operand tree with the instruction 64 /// as the root element. If the value in question is not an instruction in the 65 /// SCoP, it can be a leaf of an instruction's operand tree. 66 enum ForwardingDecision { 67 /// The root instruction or value cannot be forwarded at all. 68 FD_CannotForward, 69 70 /// The root instruction or value can be forwarded as a leaf of a larger 71 /// operand tree. 72 /// It does not make sense to move the value itself, it would just replace it 73 /// by a use of itself. For instance, a constant "5" used in a statement can 74 /// be forwarded, but it would just replace it by the same constant "5". 75 /// However, it makes sense to move as an operand of 76 /// 77 /// %add = add 5, 5 78 /// 79 /// where "5" is moved as part of a larger operand tree. "5" would be placed 80 /// (disregarding for a moment that literal constants don't have a location 81 /// and can be used anywhere) into the same statement as %add would. 82 FD_CanForwardLeaf, 83 84 /// The root instruction can be forwarded in a non-trivial way. This requires 85 /// the operand tree root to be an instruction in some statement. 86 FD_CanForwardTree, 87 88 /// Used to indicate that a forwarding has be carried out successfully. 89 FD_DidForward, 90 91 /// A forwarding method cannot be applied to the operand tree. 92 /// The difference to FD_CannotForward is that there might be other methods 93 /// that can handle it. 94 /// The conditions that make an operand tree applicable must be checked even 95 /// with DoIt==true because a method following the one that returned 96 /// FD_NotApplicable might have returned FD_CanForwardTree. 97 FD_NotApplicable 98 }; 99 100 /// Implementation of operand tree forwarding for a specific SCoP. 101 /// 102 /// For a statement that requires a scalar value (through a value read 103 /// MemoryAccess), see if its operand can be moved into the statement. If so, 104 /// the MemoryAccess is removed and the all the operand tree instructions are 105 /// moved into the statement. All original instructions are left in the source 106 /// statements. The simplification pass can clean these up. 107 class ForwardOpTreeImpl : ZoneAlgorithm { 108 private: 109 /// How many instructions have been copied to other statements. 110 int NumInstructionsCopied = 0; 111 112 /// Number of loads forwarded because their value was known. 113 int NumKnownLoadsForwarded = 0; 114 115 /// How many read-only accesses have been copied. 116 int NumReadOnlyCopied = 0; 117 118 /// How many operand trees have been forwarded. 119 int NumForwardedTrees = 0; 120 121 /// Number of statements with at least one forwarded operand tree. 122 int NumModifiedStmts = 0; 123 124 /// Whether we carried out at least one change to the SCoP. 125 bool Modified = false; 126 127 /// Contains the zones where array elements are known to contain a specific 128 /// value. 129 /// { [Element[] -> Zone[]] -> ValInst[] } 130 /// @see computeKnown() 131 isl::union_map Known; 132 133 /// Translator for newly introduced ValInsts to already existing ValInsts such 134 /// that new introduced load instructions can reuse the Known analysis of its 135 /// original load. { ValInst[] -> ValInst[] } 136 isl::union_map Translator; 137 138 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 139 /// 140 /// @param ValInst { Domain[] -> ValInst[] } 141 /// The values for which we search for alternative locations, 142 /// per statement instance. 143 /// 144 /// @return { Domain[] -> Element[] } 145 /// For each statement instance, the array elements that contain the 146 /// same ValInst. 147 isl::union_map findSameContentElements(isl::union_map ValInst) { 148 assert(ValInst.is_single_valued().is_true()); 149 150 // { Domain[] } 151 isl::union_set Domain = ValInst.domain(); 152 153 // { Domain[] -> Scatter[] } 154 isl::union_map Schedule = getScatterFor(Domain); 155 156 // { Element[] -> [Scatter[] -> ValInst[]] } 157 isl::union_map MustKnownCurried = 158 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 159 160 // { [Domain[] -> ValInst[]] -> Scatter[] } 161 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 162 163 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 164 isl::union_map SchedValDomVal = 165 DomValSched.range_product(ValInst.range_map()).reverse(); 166 167 // { Element[] -> [Domain[] -> ValInst[]] } 168 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 169 170 // { Domain[] -> Element[] } 171 isl::union_map MustKnownMap = 172 MustKnownInst.uncurry().domain().unwrap().reverse(); 173 simplify(MustKnownMap); 174 175 return MustKnownMap; 176 } 177 178 /// Find a single array element for each statement instance, within a single 179 /// array. 180 /// 181 /// @param MustKnown { Domain[] -> Element[] } 182 /// Set of candidate array elements. 183 /// @param Domain { Domain[] } 184 /// The statement instance for which we need elements for. 185 /// 186 /// @return { Domain[] -> Element[] } 187 /// For each statement instance, an array element out of @p MustKnown. 188 /// All array elements must be in the same array (Polly does not yet 189 /// support reading from different accesses using the same 190 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 191 /// null. 192 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 193 // { Domain[] -> Element[] } 194 isl::map Result; 195 196 // MemoryAccesses can read only elements from a single array 197 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 198 // Look through all spaces until we find one that contains at least the 199 // wanted statement instance.s 200 MustKnown.foreach_map([&](isl::map Map) -> isl::stat { 201 // Get the array this is accessing. 202 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 203 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 204 205 // No support for generation of indirect array accesses. 206 if (SAI->getBasePtrOriginSAI()) 207 return isl::stat::ok; // continue 208 209 // Determine whether this map contains all wanted values. 210 isl::set MapDom = Map.domain(); 211 if (!Domain.is_subset(MapDom).is_true()) 212 return isl::stat::ok; // continue 213 214 // There might be multiple array elements that contain the same value, but 215 // choose only one of them. lexmin is used because it returns a one-value 216 // mapping, we do not care about which one. 217 // TODO: Get the simplest access function. 218 Result = Map.lexmin(); 219 return isl::stat::error; // break 220 }); 221 222 return Result; 223 } 224 225 public: 226 /// Compute the zones of known array element contents. 227 /// 228 /// @return True if the computed #Known is usable. 229 bool computeKnownValues() { 230 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 231 232 // Check that nothing strange occurs. 233 if (!isCompatibleScop()) { 234 KnownIncompatible++; 235 return false; 236 } 237 238 isl_ctx_reset_error(IslCtx.get()); 239 { 240 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps); 241 242 computeCommon(); 243 Known = computeKnown(true, true); 244 simplify(Known); 245 246 // Preexisting ValInsts use the known content analysis of themselves. 247 Translator = makeIdentityMap(Known.range(), false); 248 } 249 250 if (!Known || !Translator) { 251 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 252 KnownOutOfQuota++; 253 Known = nullptr; 254 Translator = nullptr; 255 DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 256 return false; 257 } 258 259 KnownAnalyzed++; 260 DEBUG(dbgs() << "All known: " << Known << "\n"); 261 262 return true; 263 } 264 265 void printStatistics(raw_ostream &OS, int Indent = 0) { 266 OS.indent(Indent) << "Statistics {\n"; 267 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 268 << '\n'; 269 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 270 << '\n'; 271 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 272 << '\n'; 273 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 274 << '\n'; 275 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 276 << NumModifiedStmts << '\n'; 277 OS.indent(Indent) << "}\n"; 278 } 279 280 void printStatements(llvm::raw_ostream &OS, int Indent = 0) const { 281 OS.indent(Indent) << "After statements {\n"; 282 for (auto &Stmt : *S) { 283 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 284 for (auto *MA : Stmt) 285 MA->print(OS); 286 287 OS.indent(Indent + 12); 288 Stmt.printInstructions(OS); 289 } 290 OS.indent(Indent) << "}\n"; 291 } 292 293 /// Create a new MemoryAccess of type read and MemoryKind::Array. 294 /// 295 /// @param Stmt The statement in which the access occurs. 296 /// @param LI The instruction that does the access. 297 /// @param AccessRelation The array element that each statement instance 298 /// accesses. 299 /// 300 /// @param The newly created access. 301 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 302 isl::map AccessRelation) { 303 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 304 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 305 306 // Create a dummy SCEV access, to be replaced anyway. 307 SmallVector<const SCEV *, 4> Sizes; 308 Sizes.reserve(SAI->getNumberOfDimensions()); 309 SmallVector<const SCEV *, 4> Subscripts; 310 Subscripts.reserve(SAI->getNumberOfDimensions()); 311 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 312 Sizes.push_back(SAI->getDimensionSize(i)); 313 Subscripts.push_back(nullptr); 314 } 315 316 MemoryAccess *Access = 317 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 318 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 319 S->addAccessFunction(Access); 320 Stmt->addAccess(Access, true); 321 322 Access->setNewAccessRelation(AccessRelation); 323 324 return Access; 325 } 326 327 /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a 328 /// use in every instance of @p UseStmt. 329 /// 330 /// @param UseStmt Statement a scalar is used in. 331 /// @param DefStmt Statement a scalar is defined in. 332 /// 333 /// @return { DomainUse[] -> DomainDef[] } 334 isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { 335 // { DomainUse[] -> Scatter[] } 336 isl::map UseScatter = getScatterFor(UseStmt); 337 338 // { Zone[] -> DomainDef[] } 339 isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); 340 341 // { Scatter[] -> DomainDef[] } 342 isl::map ReachDefTimepoints = 343 convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); 344 345 // { DomainUse[] -> DomainDef[] } 346 return UseScatter.apply_range(ReachDefTimepoints); 347 } 348 349 /// Forward a load by reading from an array element that contains the same 350 /// value. Typically the location it was loaded from. 351 /// 352 /// @param TargetStmt The statement the operand tree will be copied to. 353 /// @param Inst The (possibly speculatable) instruction to forward. 354 /// @param UseStmt The statement that uses @p Inst. 355 /// @param UseLoop The loop @p Inst is used in. 356 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 357 /// A mapping from the statement instance @p Inst is used 358 /// to the statement instance it is forwarded to. 359 /// @param DefStmt The statement @p Inst is defined in. 360 /// @param DefLoop The loop which contains @p Inst. 361 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 362 /// A mapping from the statement instance @p Inst is 363 /// defined to the statement instance it is forwarded to. 364 /// @param DoIt If false, only determine whether an operand tree can be 365 /// forwarded. If true, carry out the forwarding. Do not 366 /// use DoIt==true if an operand tree is not known to be 367 /// forwardable. 368 /// 369 /// @return FD_NotApplicable if @p Inst is not a LoadInst. 370 /// FD_CannotForward if no array element to load from was found. 371 /// FD_CanForwardLeaf if the load is already in the target statement 372 /// instance. 373 /// FD_CanForwardTree if @p Inst is forwardable. 374 /// FD_DidForward if @p DoIt was true. 375 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 376 ScopStmt *UseStmt, Loop *UseLoop, 377 isl::map UseToTarget, ScopStmt *DefStmt, 378 Loop *DefLoop, isl::map DefToTarget, 379 bool DoIt) { 380 // Cannot do anything without successful known analysis. 381 if (Known.is_null()) 382 return FD_NotApplicable; 383 384 LoadInst *LI = dyn_cast<LoadInst>(Inst); 385 if (!LI) 386 return FD_NotApplicable; 387 388 // If the load is already in the statement, not forwarding is necessary. 389 // However, it might happen that the LoadInst is already present in the 390 // statement's instruction list. In that case we do as follows: 391 // - For the evaluation (DoIt==false), we can trivially forward it as it is 392 // benefit of forwarding an already present instruction. 393 // - For the execution (DoIt==false), prepend the instruction (to make it 394 // available to all instructions following in the instruction list), but 395 // do not add another MemoryAccess. 396 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 397 if (Access && !DoIt) 398 return FD_CanForwardLeaf; 399 400 if (DoIt) 401 TargetStmt->prependInstruction(LI); 402 403 ForwardingDecision OpDecision = 404 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, 405 DefToTarget, DoIt); 406 switch (OpDecision) { 407 case FD_CannotForward: 408 assert(!DoIt); 409 return OpDecision; 410 411 case FD_CanForwardLeaf: 412 case FD_CanForwardTree: 413 assert(!DoIt); 414 break; 415 416 case FD_DidForward: 417 assert(DoIt); 418 break; 419 420 default: 421 llvm_unreachable("Shouldn't return this"); 422 } 423 424 // { DomainDef[] -> ValInst[] } 425 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 426 427 // { DomainTarget[] -> ValInst[] } 428 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 429 isl::union_map TranslatedExpectedVal = 430 isl::union_map(TargetExpectedVal).apply_range(Translator); 431 432 // { DomainTarget[] -> Element[] } 433 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 434 435 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 436 if (!SameVal) 437 return FD_CannotForward; 438 439 if (!DoIt) 440 return FD_CanForwardTree; 441 442 if (Access) { 443 DEBUG(dbgs() << " forwarded known load with preexisting MemoryAccess" 444 << Access << "\n"); 445 } else { 446 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 447 DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access 448 << "\n"); 449 450 // { ValInst[] } 451 isl::space ValInstSpace = ExpectedVal.get_space().range(); 452 453 // After adding a new load to the SCoP, also update the Known content 454 // about it. The new load will have a known ValInst of 455 // { [DomainTarget[] -> Value[]] } 456 // but which -- because it is a copy of it -- has same value as the 457 // { [DomainDef[] -> Value[]] } 458 // that it replicates. Instead of cloning the known content of 459 // [DomainDef[] -> Value[]] 460 // for DomainTarget[], we add a 'translator' that maps 461 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 462 // before comparing to the known content. 463 // TODO: 'Translator' could also be used to map PHINodes to their incoming 464 // ValInsts. 465 if (ValInstSpace.is_wrapping()) { 466 // { DefDomain[] -> Value[] } 467 isl::map ValInsts = ExpectedVal.range().unwrap(); 468 469 // { DefDomain[] } 470 isl::set DefDomain = ValInsts.domain(); 471 472 // { Value[] } 473 isl::space ValSpace = ValInstSpace.unwrap().range(); 474 475 // { Value[] -> Value[] } 476 isl::map ValToVal = 477 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 478 479 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 480 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 481 482 Translator = Translator.add_map(LocalTranslator); 483 DEBUG(dbgs() << " local translator is " << LocalTranslator 484 << "\n"); 485 } 486 } 487 DEBUG(dbgs() << " expected values where " << TargetExpectedVal 488 << "\n"); 489 DEBUG(dbgs() << " candidate elements where " << Candidates << "\n"); 490 assert(Access); 491 492 NumKnownLoadsForwarded++; 493 TotalKnownLoadsForwarded++; 494 return FD_DidForward; 495 } 496 497 /// Forwards a speculatively executable instruction. 498 /// 499 /// @param TargetStmt The statement the operand tree will be copied to. 500 /// @param UseInst The (possibly speculatable) instruction to forward. 501 /// @param DefStmt The statement @p UseInst is defined in. 502 /// @param DefLoop The loop which contains @p UseInst. 503 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 504 /// A mapping from the statement instance @p UseInst is 505 /// defined to the statement instance it is forwarded to. 506 /// @param DoIt If false, only determine whether an operand tree can be 507 /// forwarded. If true, carry out the forwarding. Do not 508 /// use DoIt==true if an operand tree is not known to be 509 /// forwardable. 510 /// 511 /// @return FD_NotApplicable if @p UseInst is not speculatable. 512 /// FD_CannotForward if one of @p UseInst's operands is not 513 /// forwardable. 514 /// FD_CanForwardTree if @p UseInst is forwardable. 515 /// FD_DidForward if @p DoIt was true. 516 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 517 Instruction *UseInst, 518 ScopStmt *DefStmt, Loop *DefLoop, 519 isl::map DefToTarget, bool DoIt) { 520 // PHIs, unless synthesizable, are not yet supported. 521 if (isa<PHINode>(UseInst)) 522 return FD_NotApplicable; 523 524 // Compatible instructions must satisfy the following conditions: 525 // 1. Idempotent (instruction will be copied, not moved; although its 526 // original instance might be removed by simplification) 527 // 2. Not access memory (There might be memory writes between) 528 // 3. Not cause undefined behaviour (we might copy to a location when the 529 // original instruction was no executed; this is currently not possible 530 // because we do not forward PHINodes) 531 // 4. Not leak memory if executed multiple times (i.e. malloc) 532 // 533 // Instruction::mayHaveSideEffects is not sufficient because it considers 534 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 535 // not sufficient because it allows memory accesses. 536 if (mayBeMemoryDependent(*UseInst)) 537 return FD_NotApplicable; 538 539 if (DoIt) { 540 // To ensure the right order, prepend this instruction before its 541 // operands. This ensures that its operands are inserted before the 542 // instruction using them. 543 // TODO: The operand tree is not really a tree, but a DAG. We should be 544 // able to handle DAGs without duplication. 545 TargetStmt->prependInstruction(UseInst); 546 NumInstructionsCopied++; 547 TotalInstructionsCopied++; 548 } 549 550 for (Value *OpVal : UseInst->operand_values()) { 551 ForwardingDecision OpDecision = 552 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt); 553 switch (OpDecision) { 554 case FD_CannotForward: 555 assert(!DoIt); 556 return FD_CannotForward; 557 558 case FD_CanForwardLeaf: 559 case FD_CanForwardTree: 560 assert(!DoIt); 561 break; 562 563 case FD_DidForward: 564 assert(DoIt); 565 break; 566 567 case FD_NotApplicable: 568 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 569 } 570 } 571 572 if (DoIt) 573 return FD_DidForward; 574 return FD_CanForwardTree; 575 } 576 577 /// Determines whether an operand tree can be forwarded or carries out a 578 /// forwarding, depending on the @p DoIt flag. 579 /// 580 /// @param TargetStmt The statement the operand tree will be copied to. 581 /// @param UseVal The value (usually an instruction) which is root of an 582 /// operand tree. 583 /// @param UseStmt The statement that uses @p UseVal. 584 /// @param UseLoop The loop @p UseVal is used in. 585 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 586 /// A mapping from the statement instance @p UseVal is used 587 /// to the statement instance it is forwarded to. 588 /// @param DoIt If false, only determine whether an operand tree can be 589 /// forwarded. If true, carry out the forwarding. Do not 590 /// use DoIt==true if an operand tree is not known to be 591 /// forwardable. 592 /// 593 /// @return If DoIt==false, return whether the operand tree can be forwarded. 594 /// If DoIt==true, return FD_DidForward. 595 ForwardingDecision forwardTree(ScopStmt *TargetStmt, llvm::Value *UseVal, 596 ScopStmt *UseStmt, llvm::Loop *UseLoop, 597 isl::map UseToTarget, bool DoIt) { 598 ScopStmt *DefStmt = nullptr; 599 Loop *DefLoop = nullptr; 600 601 // { DefDomain[] -> TargetDomain[] } 602 isl::map DefToTarget; 603 604 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 605 switch (VUse.getKind()) { 606 case VirtualUse::Constant: 607 case VirtualUse::Block: 608 case VirtualUse::Hoisted: 609 // These can be used anywhere without special considerations. 610 if (DoIt) 611 return FD_DidForward; 612 return FD_CanForwardLeaf; 613 614 case VirtualUse::Synthesizable: { 615 // ScopExpander will take care for of generating the code at the new 616 // location. 617 if (DoIt) 618 return FD_DidForward; 619 620 // Check if the value is synthesizable at the new location as well. This 621 // might be possible when leaving a loop for which ScalarEvolution is 622 // unable to derive the exit value for. 623 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 624 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 625 // do not forward past its loop header. This would require us to use a 626 // previous loop induction variable instead the current one. We currently 627 // do not allow forwarding PHI nodes, thus this should never occur (the 628 // only exception where no phi is necessary being an unreachable loop 629 // without edge from the outside). 630 VirtualUse TargetUse = VirtualUse::create( 631 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 632 if (TargetUse.getKind() == VirtualUse::Synthesizable) 633 return FD_CanForwardLeaf; 634 635 DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: " 636 << *UseVal << "\n"); 637 return FD_CannotForward; 638 } 639 640 case VirtualUse::ReadOnly: 641 // Note that we cannot return FD_CanForwardTree here. With a operand tree 642 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 643 // With -polly-analyze-read-only-scalars=true we would ensure the 644 // existence of a MemoryAccess (which already exists for a leaf) and be 645 // removed again by tryForwardTree because it's goal is to remove this 646 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 647 // to do so. 648 if (!DoIt) 649 return FD_CanForwardLeaf; 650 651 // If we model read-only scalars, we need to create a MemoryAccess for it. 652 if (ModelReadOnlyScalars) 653 TargetStmt->ensureValueRead(UseVal); 654 655 NumReadOnlyCopied++; 656 TotalReadOnlyCopied++; 657 return FD_DidForward; 658 659 case VirtualUse::Intra: 660 // Knowing that UseStmt and DefStmt are the same statement instance, just 661 // reuse the information about UseStmt for DefStmt 662 DefStmt = UseStmt; 663 DefToTarget = UseToTarget; 664 665 LLVM_FALLTHROUGH; 666 case VirtualUse::Inter: 667 Instruction *Inst = cast<Instruction>(UseVal); 668 669 if (!DefStmt) { 670 DefStmt = S->getStmtFor(Inst); 671 if (!DefStmt) 672 return FD_CannotForward; 673 } 674 675 DefLoop = LI->getLoopFor(Inst->getParent()); 676 677 if (DefToTarget.is_null() && !Known.is_null()) { 678 // { UseDomain[] -> DefDomain[] } 679 isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt); 680 681 // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to 682 // { DefDomain[] -> TargetDomain[] } 683 DefToTarget = UseToTarget.apply_domain(UseToDef); 684 simplify(DefToTarget); 685 } 686 687 ForwardingDecision SpeculativeResult = forwardSpeculatable( 688 TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt); 689 if (SpeculativeResult != FD_NotApplicable) 690 return SpeculativeResult; 691 692 ForwardingDecision KnownResult = 693 forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, 694 DefStmt, DefLoop, DefToTarget, DoIt); 695 if (KnownResult != FD_NotApplicable) 696 return KnownResult; 697 698 // When no method is found to forward the operand tree, we effectively 699 // cannot handle it. 700 DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 701 return FD_CannotForward; 702 } 703 704 llvm_unreachable("Case unhandled"); 705 } 706 707 /// Try to forward an operand tree rooted in @p RA. 708 bool tryForwardTree(MemoryAccess *RA) { 709 assert(RA->isLatestScalarKind()); 710 DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 711 712 ScopStmt *Stmt = RA->getStatement(); 713 Loop *InLoop = Stmt->getSurroundingLoop(); 714 715 isl::map TargetToUse; 716 if (!Known.is_null()) { 717 isl::space DomSpace = Stmt->getDomainSpace(); 718 TargetToUse = 719 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 720 } 721 722 ForwardingDecision Assessment = forwardTree( 723 Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false); 724 assert(Assessment != FD_DidForward); 725 if (Assessment != FD_CanForwardTree) 726 return false; 727 728 ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, 729 InLoop, TargetToUse, true); 730 assert(Execution == FD_DidForward && 731 "A previous positive assessment must also be executable"); 732 (void)Execution; 733 734 Stmt->removeSingleMemoryAccess(RA); 735 return true; 736 } 737 738 public: 739 ForwardOpTreeImpl(Scop *S, LoopInfo *LI) 740 : ZoneAlgorithm("polly-optree", S, LI) {} 741 742 /// Return which SCoP this instance is processing. 743 Scop *getScop() const { return S; } 744 745 /// Run the algorithm: Use value read accesses as operand tree roots and try 746 /// to forward them into the statement. 747 bool forwardOperandTrees() { 748 for (ScopStmt &Stmt : *S) { 749 // Currently we cannot modify the instruction list of region statements. 750 if (!Stmt.isBlockStmt()) 751 continue; 752 753 bool StmtModified = false; 754 755 // Because we are modifying the MemoryAccess list, collect them first to 756 // avoid iterator invalidation. 757 SmallVector<MemoryAccess *, 16> Accs; 758 for (MemoryAccess *RA : Stmt) { 759 if (!RA->isRead()) 760 continue; 761 if (!RA->isLatestScalarKind()) 762 continue; 763 764 Accs.push_back(RA); 765 } 766 767 for (MemoryAccess *RA : Accs) { 768 if (tryForwardTree(RA)) { 769 Modified = true; 770 StmtModified = true; 771 NumForwardedTrees++; 772 TotalForwardedTrees++; 773 } 774 } 775 776 if (StmtModified) { 777 NumModifiedStmts++; 778 TotalModifiedStmts++; 779 } 780 } 781 782 if (Modified) 783 ScopsModified++; 784 return Modified; 785 } 786 787 /// Print the pass result, performed transformations and the SCoP after the 788 /// transformation. 789 void print(llvm::raw_ostream &OS, int Indent = 0) { 790 printStatistics(OS, Indent); 791 792 if (!Modified) { 793 // This line can easily be checked in regression tests. 794 OS << "ForwardOpTree executed, but did not modify anything\n"; 795 return; 796 } 797 798 printStatements(OS, Indent); 799 } 800 }; 801 802 /// Pass that redirects scalar reads to array elements that are known to contain 803 /// the same value. 804 /// 805 /// This reduces the number of scalar accesses and therefore potentially 806 /// increases the freedom of the scheduler. In the ideal case, all reads of a 807 /// scalar definition are redirected (We currently do not care about removing 808 /// the write in this case). This is also useful for the main DeLICM pass as 809 /// there are less scalars to be mapped. 810 class ForwardOpTree : public ScopPass { 811 private: 812 ForwardOpTree(const ForwardOpTree &) = delete; 813 const ForwardOpTree &operator=(const ForwardOpTree &) = delete; 814 815 /// The pass implementation, also holding per-scop data. 816 std::unique_ptr<ForwardOpTreeImpl> Impl; 817 818 public: 819 static char ID; 820 821 explicit ForwardOpTree() : ScopPass(ID) {} 822 823 virtual void getAnalysisUsage(AnalysisUsage &AU) const override { 824 AU.addRequiredTransitive<ScopInfoRegionPass>(); 825 AU.addRequired<LoopInfoWrapperPass>(); 826 AU.setPreservesAll(); 827 } 828 829 virtual bool runOnScop(Scop &S) override { 830 // Free resources for previous SCoP's computation, if not yet done. 831 releaseMemory(); 832 833 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 834 Impl = make_unique<ForwardOpTreeImpl>(&S, &LI); 835 836 if (AnalyzeKnown) { 837 DEBUG(dbgs() << "Prepare forwarders...\n"); 838 Impl->computeKnownValues(); 839 } 840 841 DEBUG(dbgs() << "Forwarding operand trees...\n"); 842 Impl->forwardOperandTrees(); 843 844 DEBUG(dbgs() << "\nFinal Scop:\n"); 845 DEBUG(dbgs() << S); 846 847 return false; 848 } 849 850 virtual void printScop(raw_ostream &OS, Scop &S) const override { 851 if (!Impl) 852 return; 853 854 assert(Impl->getScop() == &S); 855 Impl->print(OS); 856 } 857 858 virtual void releaseMemory() override { Impl.reset(); } 859 860 }; // class ForwardOpTree 861 862 char ForwardOpTree::ID; 863 } // anonymous namespace 864 865 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 866 867 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 868 "Polly - Forward operand tree", false, false) 869 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 870 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 871 "Polly - Forward operand tree", false, false) 872