1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Move instructions between statements. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/ForwardOpTree.h" 15 #include "polly/Options.h" 16 #include "polly/ScopBuilder.h" 17 #include "polly/ScopInfo.h" 18 #include "polly/ScopPass.h" 19 #include "polly/Support/GICHelper.h" 20 #include "polly/Support/ISLOStream.h" 21 #include "polly/Support/ISLTools.h" 22 #include "polly/Support/VirtualInstruction.h" 23 #include "polly/ZoneAlgo.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "isl/ctx.h" 40 #include "isl/isl-noexceptions.h" 41 #include <cassert> 42 #include <memory> 43 44 #define DEBUG_TYPE "polly-optree" 45 46 using namespace llvm; 47 using namespace polly; 48 49 static cl::opt<bool> 50 AnalyzeKnown("polly-optree-analyze-known", 51 cl::desc("Analyze array contents for load forwarding"), 52 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 53 54 static cl::opt<unsigned long> 55 MaxOps("polly-optree-max-ops", 56 cl::desc("Maximum number of ISL operations to invest for known " 57 "analysis; 0=no limit"), 58 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 59 60 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 61 STATISTIC(KnownOutOfQuota, 62 "Analyses aborted because max_operations was reached"); 63 STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis"); 64 65 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 66 STATISTIC(TotalKnownLoadsForwarded, 67 "Number of forwarded loads because their value was known"); 68 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 69 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 70 STATISTIC(TotalModifiedStmts, 71 "Number of statements with at least one forwarded tree"); 72 73 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 74 75 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 76 STATISTIC(NumValueWritesInLoops, 77 "Number of scalar value writes nested in affine loops after OpTree"); 78 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 79 STATISTIC(NumPHIWritesInLoops, 80 "Number of scalar phi writes nested in affine loops after OpTree"); 81 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 82 STATISTIC(NumSingletonWritesInLoops, 83 "Number of singleton writes nested in affine loops after OpTree"); 84 85 namespace { 86 87 /// The state of whether an operand tree was/can be forwarded. 88 /// 89 /// The items apply to an instructions and its operand tree with the instruction 90 /// as the root element. If the value in question is not an instruction in the 91 /// SCoP, it can be a leaf of an instruction's operand tree. 92 enum ForwardingDecision { 93 /// The root instruction or value cannot be forwarded at all. 94 FD_CannotForward, 95 96 /// The root instruction or value can be forwarded as a leaf of a larger 97 /// operand tree. 98 /// It does not make sense to move the value itself, it would just replace it 99 /// by a use of itself. For instance, a constant "5" used in a statement can 100 /// be forwarded, but it would just replace it by the same constant "5". 101 /// However, it makes sense to move as an operand of 102 /// 103 /// %add = add 5, 5 104 /// 105 /// where "5" is moved as part of a larger operand tree. "5" would be placed 106 /// (disregarding for a moment that literal constants don't have a location 107 /// and can be used anywhere) into the same statement as %add would. 108 FD_CanForwardLeaf, 109 110 /// The root instruction can be forwarded in a non-trivial way. This requires 111 /// the operand tree root to be an instruction in some statement. 112 FD_CanForwardTree, 113 114 /// Used to indicate that a forwarding has be carried out successfully. 115 FD_DidForward, 116 117 /// A forwarding method cannot be applied to the operand tree. 118 /// The difference to FD_CannotForward is that there might be other methods 119 /// that can handle it. 120 /// The conditions that make an operand tree applicable must be checked even 121 /// with DoIt==true because a method following the one that returned 122 /// FD_NotApplicable might have returned FD_CanForwardTree. 123 FD_NotApplicable 124 }; 125 126 /// Implementation of operand tree forwarding for a specific SCoP. 127 /// 128 /// For a statement that requires a scalar value (through a value read 129 /// MemoryAccess), see if its operand can be moved into the statement. If so, 130 /// the MemoryAccess is removed and the all the operand tree instructions are 131 /// moved into the statement. All original instructions are left in the source 132 /// statements. The simplification pass can clean these up. 133 class ForwardOpTreeImpl : ZoneAlgorithm { 134 private: 135 /// How many instructions have been copied to other statements. 136 int NumInstructionsCopied = 0; 137 138 /// Number of loads forwarded because their value was known. 139 int NumKnownLoadsForwarded = 0; 140 141 /// How many read-only accesses have been copied. 142 int NumReadOnlyCopied = 0; 143 144 /// How many operand trees have been forwarded. 145 int NumForwardedTrees = 0; 146 147 /// Number of statements with at least one forwarded operand tree. 148 int NumModifiedStmts = 0; 149 150 /// Whether we carried out at least one change to the SCoP. 151 bool Modified = false; 152 153 /// Contains the zones where array elements are known to contain a specific 154 /// value. 155 /// { [Element[] -> Zone[]] -> ValInst[] } 156 /// @see computeKnown() 157 isl::union_map Known; 158 159 /// Translator for newly introduced ValInsts to already existing ValInsts such 160 /// that new introduced load instructions can reuse the Known analysis of its 161 /// original load. { ValInst[] -> ValInst[] } 162 isl::union_map Translator; 163 164 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 165 /// 166 /// @param ValInst { Domain[] -> ValInst[] } 167 /// The values for which we search for alternative locations, 168 /// per statement instance. 169 /// 170 /// @return { Domain[] -> Element[] } 171 /// For each statement instance, the array elements that contain the 172 /// same ValInst. 173 isl::union_map findSameContentElements(isl::union_map ValInst) { 174 assert(ValInst.is_single_valued().is_true()); 175 176 // { Domain[] } 177 isl::union_set Domain = ValInst.domain(); 178 179 // { Domain[] -> Scatter[] } 180 isl::union_map Schedule = getScatterFor(Domain); 181 182 // { Element[] -> [Scatter[] -> ValInst[]] } 183 isl::union_map MustKnownCurried = 184 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 185 186 // { [Domain[] -> ValInst[]] -> Scatter[] } 187 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 188 189 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 190 isl::union_map SchedValDomVal = 191 DomValSched.range_product(ValInst.range_map()).reverse(); 192 193 // { Element[] -> [Domain[] -> ValInst[]] } 194 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 195 196 // { Domain[] -> Element[] } 197 isl::union_map MustKnownMap = 198 MustKnownInst.uncurry().domain().unwrap().reverse(); 199 simplify(MustKnownMap); 200 201 return MustKnownMap; 202 } 203 204 /// Find a single array element for each statement instance, within a single 205 /// array. 206 /// 207 /// @param MustKnown { Domain[] -> Element[] } 208 /// Set of candidate array elements. 209 /// @param Domain { Domain[] } 210 /// The statement instance for which we need elements for. 211 /// 212 /// @return { Domain[] -> Element[] } 213 /// For each statement instance, an array element out of @p MustKnown. 214 /// All array elements must be in the same array (Polly does not yet 215 /// support reading from different accesses using the same 216 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 217 /// null. 218 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 219 // { Domain[] -> Element[] } 220 isl::map Result; 221 222 // MemoryAccesses can read only elements from a single array 223 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 224 // Look through all spaces until we find one that contains at least the 225 // wanted statement instance.s 226 MustKnown.foreach_map([&](isl::map Map) -> isl::stat { 227 // Get the array this is accessing. 228 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 229 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 230 231 // No support for generation of indirect array accesses. 232 if (SAI->getBasePtrOriginSAI()) 233 return isl::stat::ok; // continue 234 235 // Determine whether this map contains all wanted values. 236 isl::set MapDom = Map.domain(); 237 if (!Domain.is_subset(MapDom).is_true()) 238 return isl::stat::ok; // continue 239 240 // There might be multiple array elements that contain the same value, but 241 // choose only one of them. lexmin is used because it returns a one-value 242 // mapping, we do not care about which one. 243 // TODO: Get the simplest access function. 244 Result = Map.lexmin(); 245 return isl::stat::error; // break 246 }); 247 248 return Result; 249 } 250 251 public: 252 ForwardOpTreeImpl(Scop *S, LoopInfo *LI) 253 : ZoneAlgorithm("polly-optree", S, LI) {} 254 255 /// Compute the zones of known array element contents. 256 /// 257 /// @return True if the computed #Known is usable. 258 bool computeKnownValues() { 259 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 260 261 // Check that nothing strange occurs. 262 if (!isCompatibleScop()) { 263 KnownIncompatible++; 264 return false; 265 } 266 267 isl_ctx_reset_error(IslCtx.get()); 268 { 269 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), MaxOps); 270 271 computeCommon(); 272 Known = computeKnown(true, true); 273 simplify(Known); 274 275 // Preexisting ValInsts use the known content analysis of themselves. 276 Translator = makeIdentityMap(Known.range(), false); 277 } 278 279 if (!Known || !Translator) { 280 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 281 KnownOutOfQuota++; 282 Known = nullptr; 283 Translator = nullptr; 284 DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 285 return false; 286 } 287 288 KnownAnalyzed++; 289 DEBUG(dbgs() << "All known: " << Known << "\n"); 290 291 return true; 292 } 293 294 void printStatistics(raw_ostream &OS, int Indent = 0) { 295 OS.indent(Indent) << "Statistics {\n"; 296 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 297 << '\n'; 298 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 299 << '\n'; 300 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 301 << '\n'; 302 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 303 << '\n'; 304 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 305 << NumModifiedStmts << '\n'; 306 OS.indent(Indent) << "}\n"; 307 } 308 309 void printStatements(raw_ostream &OS, int Indent = 0) const { 310 OS.indent(Indent) << "After statements {\n"; 311 for (auto &Stmt : *S) { 312 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 313 for (auto *MA : Stmt) 314 MA->print(OS); 315 316 OS.indent(Indent + 12); 317 Stmt.printInstructions(OS); 318 } 319 OS.indent(Indent) << "}\n"; 320 } 321 322 /// Create a new MemoryAccess of type read and MemoryKind::Array. 323 /// 324 /// @param Stmt The statement in which the access occurs. 325 /// @param LI The instruction that does the access. 326 /// @param AccessRelation The array element that each statement instance 327 /// accesses. 328 /// 329 /// @param The newly created access. 330 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 331 isl::map AccessRelation) { 332 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 333 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 334 335 // Create a dummy SCEV access, to be replaced anyway. 336 SmallVector<const SCEV *, 4> Sizes; 337 Sizes.reserve(SAI->getNumberOfDimensions()); 338 SmallVector<const SCEV *, 4> Subscripts; 339 Subscripts.reserve(SAI->getNumberOfDimensions()); 340 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 341 Sizes.push_back(SAI->getDimensionSize(i)); 342 Subscripts.push_back(nullptr); 343 } 344 345 MemoryAccess *Access = 346 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 347 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 348 S->addAccessFunction(Access); 349 Stmt->addAccess(Access, true); 350 351 Access->setNewAccessRelation(AccessRelation); 352 353 return Access; 354 } 355 356 /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a 357 /// use in every instance of @p UseStmt. 358 /// 359 /// @param UseStmt Statement a scalar is used in. 360 /// @param DefStmt Statement a scalar is defined in. 361 /// 362 /// @return { DomainUse[] -> DomainDef[] } 363 isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { 364 // { DomainUse[] -> Scatter[] } 365 isl::map UseScatter = getScatterFor(UseStmt); 366 367 // { Zone[] -> DomainDef[] } 368 isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); 369 370 // { Scatter[] -> DomainDef[] } 371 isl::map ReachDefTimepoints = 372 convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); 373 374 // { DomainUse[] -> DomainDef[] } 375 return UseScatter.apply_range(ReachDefTimepoints); 376 } 377 378 /// Forward a load by reading from an array element that contains the same 379 /// value. Typically the location it was loaded from. 380 /// 381 /// @param TargetStmt The statement the operand tree will be copied to. 382 /// @param Inst The (possibly speculatable) instruction to forward. 383 /// @param UseStmt The statement that uses @p Inst. 384 /// @param UseLoop The loop @p Inst is used in. 385 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 386 /// A mapping from the statement instance @p Inst is used 387 /// to the statement instance it is forwarded to. 388 /// @param DefStmt The statement @p Inst is defined in. 389 /// @param DefLoop The loop which contains @p Inst. 390 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 391 /// A mapping from the statement instance @p Inst is 392 /// defined to the statement instance it is forwarded to. 393 /// @param DoIt If false, only determine whether an operand tree can be 394 /// forwarded. If true, carry out the forwarding. Do not 395 /// use DoIt==true if an operand tree is not known to be 396 /// forwardable. 397 /// 398 /// @return FD_NotApplicable if @p Inst is not a LoadInst. 399 /// FD_CannotForward if no array element to load from was found. 400 /// FD_CanForwardLeaf if the load is already in the target statement 401 /// instance. 402 /// FD_CanForwardTree if @p Inst is forwardable. 403 /// FD_DidForward if @p DoIt was true. 404 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 405 ScopStmt *UseStmt, Loop *UseLoop, 406 isl::map UseToTarget, ScopStmt *DefStmt, 407 Loop *DefLoop, isl::map DefToTarget, 408 bool DoIt) { 409 // Cannot do anything without successful known analysis. 410 if (Known.is_null()) 411 return FD_NotApplicable; 412 413 LoadInst *LI = dyn_cast<LoadInst>(Inst); 414 if (!LI) 415 return FD_NotApplicable; 416 417 // If the load is already in the statement, not forwarding is necessary. 418 // However, it might happen that the LoadInst is already present in the 419 // statement's instruction list. In that case we do as follows: 420 // - For the evaluation (DoIt==false), we can trivially forward it as it is 421 // benefit of forwarding an already present instruction. 422 // - For the execution (DoIt==false), prepend the instruction (to make it 423 // available to all instructions following in the instruction list), but 424 // do not add another MemoryAccess. 425 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 426 if (Access && !DoIt) 427 return FD_CanForwardLeaf; 428 429 if (DoIt) 430 TargetStmt->prependInstruction(LI); 431 432 ForwardingDecision OpDecision = 433 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, 434 DefToTarget, DoIt); 435 switch (OpDecision) { 436 case FD_CannotForward: 437 assert(!DoIt); 438 return OpDecision; 439 440 case FD_CanForwardLeaf: 441 case FD_CanForwardTree: 442 assert(!DoIt); 443 break; 444 445 case FD_DidForward: 446 assert(DoIt); 447 break; 448 449 default: 450 llvm_unreachable("Shouldn't return this"); 451 } 452 453 // { DomainDef[] -> ValInst[] } 454 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 455 456 // { DomainTarget[] -> ValInst[] } 457 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 458 isl::union_map TranslatedExpectedVal = 459 isl::union_map(TargetExpectedVal).apply_range(Translator); 460 461 // { DomainTarget[] -> Element[] } 462 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 463 464 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 465 if (!SameVal) 466 return FD_CannotForward; 467 468 if (!DoIt) 469 return FD_CanForwardTree; 470 471 if (Access) { 472 DEBUG(dbgs() << " forwarded known load with preexisting MemoryAccess" 473 << Access << "\n"); 474 } else { 475 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 476 DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access 477 << "\n"); 478 479 // { ValInst[] } 480 isl::space ValInstSpace = ExpectedVal.get_space().range(); 481 482 // After adding a new load to the SCoP, also update the Known content 483 // about it. The new load will have a known ValInst of 484 // { [DomainTarget[] -> Value[]] } 485 // but which -- because it is a copy of it -- has same value as the 486 // { [DomainDef[] -> Value[]] } 487 // that it replicates. Instead of cloning the known content of 488 // [DomainDef[] -> Value[]] 489 // for DomainTarget[], we add a 'translator' that maps 490 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 491 // before comparing to the known content. 492 // TODO: 'Translator' could also be used to map PHINodes to their incoming 493 // ValInsts. 494 if (ValInstSpace.is_wrapping()) { 495 // { DefDomain[] -> Value[] } 496 isl::map ValInsts = ExpectedVal.range().unwrap(); 497 498 // { DefDomain[] } 499 isl::set DefDomain = ValInsts.domain(); 500 501 // { Value[] } 502 isl::space ValSpace = ValInstSpace.unwrap().range(); 503 504 // { Value[] -> Value[] } 505 isl::map ValToVal = 506 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 507 508 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 509 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 510 511 Translator = Translator.add_map(LocalTranslator); 512 DEBUG(dbgs() << " local translator is " << LocalTranslator 513 << "\n"); 514 } 515 } 516 DEBUG(dbgs() << " expected values where " << TargetExpectedVal 517 << "\n"); 518 DEBUG(dbgs() << " candidate elements where " << Candidates << "\n"); 519 assert(Access); 520 521 NumKnownLoadsForwarded++; 522 TotalKnownLoadsForwarded++; 523 return FD_DidForward; 524 } 525 526 /// Forwards a speculatively executable instruction. 527 /// 528 /// @param TargetStmt The statement the operand tree will be copied to. 529 /// @param UseInst The (possibly speculatable) instruction to forward. 530 /// @param DefStmt The statement @p UseInst is defined in. 531 /// @param DefLoop The loop which contains @p UseInst. 532 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 533 /// A mapping from the statement instance @p UseInst is 534 /// defined to the statement instance it is forwarded to. 535 /// @param DoIt If false, only determine whether an operand tree can be 536 /// forwarded. If true, carry out the forwarding. Do not 537 /// use DoIt==true if an operand tree is not known to be 538 /// forwardable. 539 /// 540 /// @return FD_NotApplicable if @p UseInst is not speculatable. 541 /// FD_CannotForward if one of @p UseInst's operands is not 542 /// forwardable. 543 /// FD_CanForwardTree if @p UseInst is forwardable. 544 /// FD_DidForward if @p DoIt was true. 545 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 546 Instruction *UseInst, 547 ScopStmt *DefStmt, Loop *DefLoop, 548 isl::map DefToTarget, bool DoIt) { 549 // PHIs, unless synthesizable, are not yet supported. 550 if (isa<PHINode>(UseInst)) 551 return FD_NotApplicable; 552 553 // Compatible instructions must satisfy the following conditions: 554 // 1. Idempotent (instruction will be copied, not moved; although its 555 // original instance might be removed by simplification) 556 // 2. Not access memory (There might be memory writes between) 557 // 3. Not cause undefined behaviour (we might copy to a location when the 558 // original instruction was no executed; this is currently not possible 559 // because we do not forward PHINodes) 560 // 4. Not leak memory if executed multiple times (i.e. malloc) 561 // 562 // Instruction::mayHaveSideEffects is not sufficient because it considers 563 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 564 // not sufficient because it allows memory accesses. 565 if (mayBeMemoryDependent(*UseInst)) 566 return FD_NotApplicable; 567 568 if (DoIt) { 569 // To ensure the right order, prepend this instruction before its 570 // operands. This ensures that its operands are inserted before the 571 // instruction using them. 572 // TODO: The operand tree is not really a tree, but a DAG. We should be 573 // able to handle DAGs without duplication. 574 TargetStmt->prependInstruction(UseInst); 575 NumInstructionsCopied++; 576 TotalInstructionsCopied++; 577 } 578 579 for (Value *OpVal : UseInst->operand_values()) { 580 ForwardingDecision OpDecision = 581 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt); 582 switch (OpDecision) { 583 case FD_CannotForward: 584 assert(!DoIt); 585 return FD_CannotForward; 586 587 case FD_CanForwardLeaf: 588 case FD_CanForwardTree: 589 assert(!DoIt); 590 break; 591 592 case FD_DidForward: 593 assert(DoIt); 594 break; 595 596 case FD_NotApplicable: 597 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 598 } 599 } 600 601 if (DoIt) 602 return FD_DidForward; 603 return FD_CanForwardTree; 604 } 605 606 /// Determines whether an operand tree can be forwarded or carries out a 607 /// forwarding, depending on the @p DoIt flag. 608 /// 609 /// @param TargetStmt The statement the operand tree will be copied to. 610 /// @param UseVal The value (usually an instruction) which is root of an 611 /// operand tree. 612 /// @param UseStmt The statement that uses @p UseVal. 613 /// @param UseLoop The loop @p UseVal is used in. 614 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 615 /// A mapping from the statement instance @p UseVal is used 616 /// to the statement instance it is forwarded to. 617 /// @param DoIt If false, only determine whether an operand tree can be 618 /// forwarded. If true, carry out the forwarding. Do not 619 /// use DoIt==true if an operand tree is not known to be 620 /// forwardable. 621 /// 622 /// @return If DoIt==false, return whether the operand tree can be forwarded. 623 /// If DoIt==true, return FD_DidForward. 624 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 625 ScopStmt *UseStmt, Loop *UseLoop, 626 isl::map UseToTarget, bool DoIt) { 627 ScopStmt *DefStmt = nullptr; 628 Loop *DefLoop = nullptr; 629 630 // { DefDomain[] -> TargetDomain[] } 631 isl::map DefToTarget; 632 633 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 634 switch (VUse.getKind()) { 635 case VirtualUse::Constant: 636 case VirtualUse::Block: 637 case VirtualUse::Hoisted: 638 // These can be used anywhere without special considerations. 639 if (DoIt) 640 return FD_DidForward; 641 return FD_CanForwardLeaf; 642 643 case VirtualUse::Synthesizable: { 644 // ScopExpander will take care for of generating the code at the new 645 // location. 646 if (DoIt) 647 return FD_DidForward; 648 649 // Check if the value is synthesizable at the new location as well. This 650 // might be possible when leaving a loop for which ScalarEvolution is 651 // unable to derive the exit value for. 652 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 653 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 654 // do not forward past its loop header. This would require us to use a 655 // previous loop induction variable instead the current one. We currently 656 // do not allow forwarding PHI nodes, thus this should never occur (the 657 // only exception where no phi is necessary being an unreachable loop 658 // without edge from the outside). 659 VirtualUse TargetUse = VirtualUse::create( 660 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 661 if (TargetUse.getKind() == VirtualUse::Synthesizable) 662 return FD_CanForwardLeaf; 663 664 DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: " 665 << *UseVal << "\n"); 666 return FD_CannotForward; 667 } 668 669 case VirtualUse::ReadOnly: 670 // Note that we cannot return FD_CanForwardTree here. With a operand tree 671 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 672 // With -polly-analyze-read-only-scalars=true we would ensure the 673 // existence of a MemoryAccess (which already exists for a leaf) and be 674 // removed again by tryForwardTree because it's goal is to remove this 675 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 676 // to do so. 677 if (!DoIt) 678 return FD_CanForwardLeaf; 679 680 // If we model read-only scalars, we need to create a MemoryAccess for it. 681 if (ModelReadOnlyScalars) 682 TargetStmt->ensureValueRead(UseVal); 683 684 NumReadOnlyCopied++; 685 TotalReadOnlyCopied++; 686 return FD_DidForward; 687 688 case VirtualUse::Intra: 689 // Knowing that UseStmt and DefStmt are the same statement instance, just 690 // reuse the information about UseStmt for DefStmt 691 DefStmt = UseStmt; 692 DefToTarget = UseToTarget; 693 694 LLVM_FALLTHROUGH; 695 case VirtualUse::Inter: 696 Instruction *Inst = cast<Instruction>(UseVal); 697 698 if (!DefStmt) { 699 DefStmt = S->getStmtFor(Inst); 700 if (!DefStmt) 701 return FD_CannotForward; 702 } 703 704 DefLoop = LI->getLoopFor(Inst->getParent()); 705 706 if (DefToTarget.is_null() && !Known.is_null()) { 707 // { UseDomain[] -> DefDomain[] } 708 isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt); 709 710 // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to 711 // { DefDomain[] -> TargetDomain[] } 712 DefToTarget = UseToTarget.apply_domain(UseToDef); 713 simplify(DefToTarget); 714 } 715 716 ForwardingDecision SpeculativeResult = forwardSpeculatable( 717 TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt); 718 if (SpeculativeResult != FD_NotApplicable) 719 return SpeculativeResult; 720 721 ForwardingDecision KnownResult = 722 forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, 723 DefStmt, DefLoop, DefToTarget, DoIt); 724 if (KnownResult != FD_NotApplicable) 725 return KnownResult; 726 727 // When no method is found to forward the operand tree, we effectively 728 // cannot handle it. 729 DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 730 return FD_CannotForward; 731 } 732 733 llvm_unreachable("Case unhandled"); 734 } 735 736 /// Try to forward an operand tree rooted in @p RA. 737 bool tryForwardTree(MemoryAccess *RA) { 738 assert(RA->isLatestScalarKind()); 739 DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 740 741 ScopStmt *Stmt = RA->getStatement(); 742 Loop *InLoop = Stmt->getSurroundingLoop(); 743 744 isl::map TargetToUse; 745 if (!Known.is_null()) { 746 isl::space DomSpace = Stmt->getDomainSpace(); 747 TargetToUse = 748 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 749 } 750 751 ForwardingDecision Assessment = forwardTree( 752 Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false); 753 assert(Assessment != FD_DidForward); 754 if (Assessment != FD_CanForwardTree) 755 return false; 756 757 ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, 758 InLoop, TargetToUse, true); 759 assert(Execution == FD_DidForward && 760 "A previous positive assessment must also be executable"); 761 (void)Execution; 762 763 Stmt->removeSingleMemoryAccess(RA); 764 return true; 765 } 766 767 /// Return which SCoP this instance is processing. 768 Scop *getScop() const { return S; } 769 770 /// Run the algorithm: Use value read accesses as operand tree roots and try 771 /// to forward them into the statement. 772 bool forwardOperandTrees() { 773 for (ScopStmt &Stmt : *S) { 774 // Currently we cannot modify the instruction list of region statements. 775 if (!Stmt.isBlockStmt()) 776 continue; 777 778 bool StmtModified = false; 779 780 // Because we are modifying the MemoryAccess list, collect them first to 781 // avoid iterator invalidation. 782 SmallVector<MemoryAccess *, 16> Accs; 783 for (MemoryAccess *RA : Stmt) { 784 if (!RA->isRead()) 785 continue; 786 if (!RA->isLatestScalarKind()) 787 continue; 788 789 Accs.push_back(RA); 790 } 791 792 for (MemoryAccess *RA : Accs) { 793 if (tryForwardTree(RA)) { 794 Modified = true; 795 StmtModified = true; 796 NumForwardedTrees++; 797 TotalForwardedTrees++; 798 } 799 } 800 801 if (StmtModified) { 802 NumModifiedStmts++; 803 TotalModifiedStmts++; 804 } 805 } 806 807 if (Modified) 808 ScopsModified++; 809 return Modified; 810 } 811 812 /// Print the pass result, performed transformations and the SCoP after the 813 /// transformation. 814 void print(raw_ostream &OS, int Indent = 0) { 815 printStatistics(OS, Indent); 816 817 if (!Modified) { 818 // This line can easily be checked in regression tests. 819 OS << "ForwardOpTree executed, but did not modify anything\n"; 820 return; 821 } 822 823 printStatements(OS, Indent); 824 } 825 }; 826 827 /// Pass that redirects scalar reads to array elements that are known to contain 828 /// the same value. 829 /// 830 /// This reduces the number of scalar accesses and therefore potentially 831 /// increases the freedom of the scheduler. In the ideal case, all reads of a 832 /// scalar definition are redirected (We currently do not care about removing 833 /// the write in this case). This is also useful for the main DeLICM pass as 834 /// there are less scalars to be mapped. 835 class ForwardOpTree : public ScopPass { 836 private: 837 /// The pass implementation, also holding per-scop data. 838 std::unique_ptr<ForwardOpTreeImpl> Impl; 839 840 public: 841 static char ID; 842 843 explicit ForwardOpTree() : ScopPass(ID) {} 844 ForwardOpTree(const ForwardOpTree &) = delete; 845 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 846 847 void getAnalysisUsage(AnalysisUsage &AU) const override { 848 AU.addRequiredTransitive<ScopInfoRegionPass>(); 849 AU.addRequired<LoopInfoWrapperPass>(); 850 AU.setPreservesAll(); 851 } 852 853 bool runOnScop(Scop &S) override { 854 // Free resources for previous SCoP's computation, if not yet done. 855 releaseMemory(); 856 857 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 858 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI); 859 860 if (AnalyzeKnown) { 861 DEBUG(dbgs() << "Prepare forwarders...\n"); 862 Impl->computeKnownValues(); 863 } 864 865 DEBUG(dbgs() << "Forwarding operand trees...\n"); 866 Impl->forwardOperandTrees(); 867 868 DEBUG(dbgs() << "\nFinal Scop:\n"); 869 DEBUG(dbgs() << S); 870 871 // Update statistics 872 auto ScopStats = S.getStatistics(); 873 NumValueWrites += ScopStats.NumValueWrites; 874 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 875 NumPHIWrites += ScopStats.NumPHIWrites; 876 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 877 NumSingletonWrites += ScopStats.NumSingletonWrites; 878 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 879 880 return false; 881 } 882 883 void printScop(raw_ostream &OS, Scop &S) const override { 884 if (!Impl) 885 return; 886 887 assert(Impl->getScop() == &S); 888 Impl->print(OS); 889 } 890 891 void releaseMemory() override { Impl.reset(); } 892 }; // class ForwardOpTree 893 894 char ForwardOpTree::ID; 895 896 } // namespace 897 898 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 899 900 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 901 "Polly - Forward operand tree", false, false) 902 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 903 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 904 "Polly - Forward operand tree", false, false) 905