1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Move instructions between statements. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/ForwardOpTree.h" 15 #include "polly/Options.h" 16 #include "polly/ScopBuilder.h" 17 #include "polly/ScopInfo.h" 18 #include "polly/ScopPass.h" 19 #include "polly/Support/GICHelper.h" 20 #include "polly/Support/ISLOStream.h" 21 #include "polly/Support/ISLTools.h" 22 #include "polly/Support/VirtualInstruction.h" 23 #include "polly/ZoneAlgo.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "isl/ctx.h" 40 #include "isl/isl-noexceptions.h" 41 #include <cassert> 42 #include <memory> 43 44 #define DEBUG_TYPE "polly-optree" 45 46 using namespace llvm; 47 using namespace polly; 48 49 static cl::opt<bool> 50 AnalyzeKnown("polly-optree-analyze-known", 51 cl::desc("Analyze array contents for load forwarding"), 52 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 53 54 static cl::opt<unsigned> 55 MaxOps("polly-optree-max-ops", 56 cl::desc("Maximum number of ISL operations to invest for known " 57 "analysis; 0=no limit"), 58 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 59 60 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 61 STATISTIC(KnownOutOfQuota, 62 "Analyses aborted because max_operations was reached"); 63 64 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 65 STATISTIC(TotalKnownLoadsForwarded, 66 "Number of forwarded loads because their value was known"); 67 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 68 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 69 STATISTIC(TotalModifiedStmts, 70 "Number of statements with at least one forwarded tree"); 71 72 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 73 74 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 75 STATISTIC(NumValueWritesInLoops, 76 "Number of scalar value writes nested in affine loops after OpTree"); 77 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 78 STATISTIC(NumPHIWritesInLoops, 79 "Number of scalar phi writes nested in affine loops after OpTree"); 80 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 81 STATISTIC(NumSingletonWritesInLoops, 82 "Number of singleton writes nested in affine loops after OpTree"); 83 84 namespace { 85 86 /// The state of whether an operand tree was/can be forwarded. 87 /// 88 /// The items apply to an instructions and its operand tree with the instruction 89 /// as the root element. If the value in question is not an instruction in the 90 /// SCoP, it can be a leaf of an instruction's operand tree. 91 enum ForwardingDecision { 92 /// The root instruction or value cannot be forwarded at all. 93 FD_CannotForward, 94 95 /// The root instruction or value can be forwarded as a leaf of a larger 96 /// operand tree. 97 /// It does not make sense to move the value itself, it would just replace it 98 /// by a use of itself. For instance, a constant "5" used in a statement can 99 /// be forwarded, but it would just replace it by the same constant "5". 100 /// However, it makes sense to move as an operand of 101 /// 102 /// %add = add 5, 5 103 /// 104 /// where "5" is moved as part of a larger operand tree. "5" would be placed 105 /// (disregarding for a moment that literal constants don't have a location 106 /// and can be used anywhere) into the same statement as %add would. 107 FD_CanForwardLeaf, 108 109 /// The root instruction can be forwarded in a non-trivial way. This requires 110 /// the operand tree root to be an instruction in some statement. 111 FD_CanForwardTree, 112 113 /// Used to indicate that a forwarding has be carried out successfully. 114 FD_DidForward, 115 116 /// A forwarding method cannot be applied to the operand tree. 117 /// The difference to FD_CannotForward is that there might be other methods 118 /// that can handle it. 119 /// The conditions that make an operand tree applicable must be checked even 120 /// with DoIt==true because a method following the one that returned 121 /// FD_NotApplicable might have returned FD_CanForwardTree. 122 FD_NotApplicable 123 }; 124 125 /// Implementation of operand tree forwarding for a specific SCoP. 126 /// 127 /// For a statement that requires a scalar value (through a value read 128 /// MemoryAccess), see if its operand can be moved into the statement. If so, 129 /// the MemoryAccess is removed and the all the operand tree instructions are 130 /// moved into the statement. All original instructions are left in the source 131 /// statements. The simplification pass can clean these up. 132 class ForwardOpTreeImpl : ZoneAlgorithm { 133 private: 134 /// Scope guard to limit the number of isl operations for this pass. 135 IslMaxOperationsGuard &MaxOpGuard; 136 137 /// How many instructions have been copied to other statements. 138 int NumInstructionsCopied = 0; 139 140 /// Number of loads forwarded because their value was known. 141 int NumKnownLoadsForwarded = 0; 142 143 /// How many read-only accesses have been copied. 144 int NumReadOnlyCopied = 0; 145 146 /// How many operand trees have been forwarded. 147 int NumForwardedTrees = 0; 148 149 /// Number of statements with at least one forwarded operand tree. 150 int NumModifiedStmts = 0; 151 152 /// Whether we carried out at least one change to the SCoP. 153 bool Modified = false; 154 155 /// Contains the zones where array elements are known to contain a specific 156 /// value. 157 /// { [Element[] -> Zone[]] -> ValInst[] } 158 /// @see computeKnown() 159 isl::union_map Known; 160 161 /// Translator for newly introduced ValInsts to already existing ValInsts such 162 /// that new introduced load instructions can reuse the Known analysis of its 163 /// original load. { ValInst[] -> ValInst[] } 164 isl::union_map Translator; 165 166 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 167 /// 168 /// @param ValInst { Domain[] -> ValInst[] } 169 /// The values for which we search for alternative locations, 170 /// per statement instance. 171 /// 172 /// @return { Domain[] -> Element[] } 173 /// For each statement instance, the array elements that contain the 174 /// same ValInst. 175 isl::union_map findSameContentElements(isl::union_map ValInst) { 176 assert(ValInst.is_single_valued().is_true()); 177 178 // { Domain[] } 179 isl::union_set Domain = ValInst.domain(); 180 181 // { Domain[] -> Scatter[] } 182 isl::union_map Schedule = getScatterFor(Domain); 183 184 // { Element[] -> [Scatter[] -> ValInst[]] } 185 isl::union_map MustKnownCurried = 186 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 187 188 // { [Domain[] -> ValInst[]] -> Scatter[] } 189 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 190 191 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 192 isl::union_map SchedValDomVal = 193 DomValSched.range_product(ValInst.range_map()).reverse(); 194 195 // { Element[] -> [Domain[] -> ValInst[]] } 196 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 197 198 // { Domain[] -> Element[] } 199 isl::union_map MustKnownMap = 200 MustKnownInst.uncurry().domain().unwrap().reverse(); 201 simplify(MustKnownMap); 202 203 return MustKnownMap; 204 } 205 206 /// Find a single array element for each statement instance, within a single 207 /// array. 208 /// 209 /// @param MustKnown { Domain[] -> Element[] } 210 /// Set of candidate array elements. 211 /// @param Domain { Domain[] } 212 /// The statement instance for which we need elements for. 213 /// 214 /// @return { Domain[] -> Element[] } 215 /// For each statement instance, an array element out of @p MustKnown. 216 /// All array elements must be in the same array (Polly does not yet 217 /// support reading from different accesses using the same 218 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 219 /// null. 220 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 221 // { Domain[] -> Element[] } 222 isl::map Result; 223 224 // MemoryAccesses can read only elements from a single array 225 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 226 // Look through all spaces until we find one that contains at least the 227 // wanted statement instance.s 228 MustKnown.foreach_map([&](isl::map Map) -> isl::stat { 229 // Get the array this is accessing. 230 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 231 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 232 233 // No support for generation of indirect array accesses. 234 if (SAI->getBasePtrOriginSAI()) 235 return isl::stat::ok; // continue 236 237 // Determine whether this map contains all wanted values. 238 isl::set MapDom = Map.domain(); 239 if (!Domain.is_subset(MapDom).is_true()) 240 return isl::stat::ok; // continue 241 242 // There might be multiple array elements that contain the same value, but 243 // choose only one of them. lexmin is used because it returns a one-value 244 // mapping, we do not care about which one. 245 // TODO: Get the simplest access function. 246 Result = Map.lexmin(); 247 return isl::stat::error; // break 248 }); 249 250 return Result; 251 } 252 253 public: 254 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 255 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 256 257 /// Compute the zones of known array element contents. 258 /// 259 /// @return True if the computed #Known is usable. 260 bool computeKnownValues() { 261 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 262 263 // Check that nothing strange occurs. 264 collectCompatibleElts(); 265 266 { 267 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 268 269 computeCommon(); 270 Known = computeKnown(true, true); 271 272 // Preexisting ValInsts use the known content analysis of themselves. 273 Translator = makeIdentityMap(Known.range(), false); 274 } 275 276 if (!Known || !Translator) { 277 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 278 Known = nullptr; 279 Translator = nullptr; 280 DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 281 return false; 282 } 283 284 KnownAnalyzed++; 285 DEBUG(dbgs() << "All known: " << Known << "\n"); 286 287 return true; 288 } 289 290 void printStatistics(raw_ostream &OS, int Indent = 0) { 291 OS.indent(Indent) << "Statistics {\n"; 292 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 293 << '\n'; 294 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 295 << '\n'; 296 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 297 << '\n'; 298 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 299 << '\n'; 300 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 301 << NumModifiedStmts << '\n'; 302 OS.indent(Indent) << "}\n"; 303 } 304 305 void printStatements(raw_ostream &OS, int Indent = 0) const { 306 OS.indent(Indent) << "After statements {\n"; 307 for (auto &Stmt : *S) { 308 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 309 for (auto *MA : Stmt) 310 MA->print(OS); 311 312 OS.indent(Indent + 12); 313 Stmt.printInstructions(OS); 314 } 315 OS.indent(Indent) << "}\n"; 316 } 317 318 /// Create a new MemoryAccess of type read and MemoryKind::Array. 319 /// 320 /// @param Stmt The statement in which the access occurs. 321 /// @param LI The instruction that does the access. 322 /// @param AccessRelation The array element that each statement instance 323 /// accesses. 324 /// 325 /// @param The newly created access. 326 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 327 isl::map AccessRelation) { 328 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 329 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 330 331 // Create a dummy SCEV access, to be replaced anyway. 332 SmallVector<const SCEV *, 4> Sizes; 333 Sizes.reserve(SAI->getNumberOfDimensions()); 334 SmallVector<const SCEV *, 4> Subscripts; 335 Subscripts.reserve(SAI->getNumberOfDimensions()); 336 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 337 Sizes.push_back(SAI->getDimensionSize(i)); 338 Subscripts.push_back(nullptr); 339 } 340 341 MemoryAccess *Access = 342 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 343 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 344 S->addAccessFunction(Access); 345 Stmt->addAccess(Access, true); 346 347 Access->setNewAccessRelation(AccessRelation); 348 349 return Access; 350 } 351 352 /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a 353 /// use in every instance of @p UseStmt. 354 /// 355 /// @param UseStmt Statement a scalar is used in. 356 /// @param DefStmt Statement a scalar is defined in. 357 /// 358 /// @return { DomainUse[] -> DomainDef[] } 359 isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { 360 // { DomainUse[] -> Scatter[] } 361 isl::map UseScatter = getScatterFor(UseStmt); 362 363 // { Zone[] -> DomainDef[] } 364 isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); 365 366 // { Scatter[] -> DomainDef[] } 367 isl::map ReachDefTimepoints = 368 convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); 369 370 // { DomainUse[] -> DomainDef[] } 371 return UseScatter.apply_range(ReachDefTimepoints); 372 } 373 374 /// Forward a load by reading from an array element that contains the same 375 /// value. Typically the location it was loaded from. 376 /// 377 /// @param TargetStmt The statement the operand tree will be copied to. 378 /// @param Inst The (possibly speculatable) instruction to forward. 379 /// @param UseStmt The statement that uses @p Inst. 380 /// @param UseLoop The loop @p Inst is used in. 381 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 382 /// A mapping from the statement instance @p Inst is used 383 /// to the statement instance it is forwarded to. 384 /// @param DefStmt The statement @p Inst is defined in. 385 /// @param DefLoop The loop which contains @p Inst. 386 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 387 /// A mapping from the statement instance @p Inst is 388 /// defined to the statement instance it is forwarded to. 389 /// @param DoIt If false, only determine whether an operand tree can be 390 /// forwarded. If true, carry out the forwarding. Do not 391 /// use DoIt==true if an operand tree is not known to be 392 /// forwardable. 393 /// 394 /// @return FD_NotApplicable if @p Inst is not a LoadInst. 395 /// FD_CannotForward if no array element to load from was found. 396 /// FD_CanForwardLeaf if the load is already in the target statement 397 /// instance. 398 /// FD_CanForwardTree if @p Inst is forwardable. 399 /// FD_DidForward if @p DoIt was true. 400 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 401 ScopStmt *UseStmt, Loop *UseLoop, 402 isl::map UseToTarget, ScopStmt *DefStmt, 403 Loop *DefLoop, isl::map DefToTarget, 404 bool DoIt) { 405 // Cannot do anything without successful known analysis. 406 if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() || 407 DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded()) 408 return FD_NotApplicable; 409 410 LoadInst *LI = dyn_cast<LoadInst>(Inst); 411 if (!LI) 412 return FD_NotApplicable; 413 414 // If the load is already in the statement, no forwarding is necessary. 415 // However, it might happen that the LoadInst is already present in the 416 // statement's instruction list. In that case we do as follows: 417 // - For the evaluation (DoIt==false), we can trivially forward it as it is 418 // benefit of forwarding an already present instruction. 419 // - For the execution (DoIt==true), prepend the instruction (to make it 420 // available to all instructions following in the instruction list), but 421 // do not add another MemoryAccess. 422 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 423 if (Access && !DoIt) 424 return FD_CanForwardTree; 425 426 if (DoIt) 427 TargetStmt->prependInstruction(LI); 428 429 ForwardingDecision OpDecision = 430 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, 431 DefToTarget, DoIt); 432 switch (OpDecision) { 433 case FD_CannotForward: 434 assert(!DoIt); 435 return OpDecision; 436 437 case FD_CanForwardLeaf: 438 case FD_CanForwardTree: 439 assert(!DoIt); 440 break; 441 442 case FD_DidForward: 443 assert(DoIt); 444 break; 445 446 default: 447 llvm_unreachable("Shouldn't return this"); 448 } 449 450 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 451 452 // { DomainDef[] -> ValInst[] } 453 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 454 455 // { DomainTarget[] -> ValInst[] } 456 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 457 isl::union_map TranslatedExpectedVal = 458 isl::union_map(TargetExpectedVal).apply_range(Translator); 459 460 // { DomainTarget[] -> Element[] } 461 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 462 463 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 464 if (!SameVal) 465 return FD_CannotForward; 466 467 if (!DoIt) 468 return FD_CanForwardTree; 469 470 if (Access) { 471 DEBUG(dbgs() << " forwarded known load with preexisting MemoryAccess" 472 << Access << "\n"); 473 } else { 474 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 475 DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access 476 << "\n"); 477 478 // { ValInst[] } 479 isl::space ValInstSpace = ExpectedVal.get_space().range(); 480 481 // After adding a new load to the SCoP, also update the Known content 482 // about it. The new load will have a known ValInst of 483 // { [DomainTarget[] -> Value[]] } 484 // but which -- because it is a copy of it -- has same value as the 485 // { [DomainDef[] -> Value[]] } 486 // that it replicates. Instead of cloning the known content of 487 // [DomainDef[] -> Value[]] 488 // for DomainTarget[], we add a 'translator' that maps 489 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 490 // before comparing to the known content. 491 // TODO: 'Translator' could also be used to map PHINodes to their incoming 492 // ValInsts. 493 if (ValInstSpace.is_wrapping()) { 494 // { DefDomain[] -> Value[] } 495 isl::map ValInsts = ExpectedVal.range().unwrap(); 496 497 // { DefDomain[] } 498 isl::set DefDomain = ValInsts.domain(); 499 500 // { Value[] } 501 isl::space ValSpace = ValInstSpace.unwrap().range(); 502 503 // { Value[] -> Value[] } 504 isl::map ValToVal = 505 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 506 507 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 508 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 509 510 Translator = Translator.add_map(LocalTranslator); 511 DEBUG(dbgs() << " local translator is " << LocalTranslator 512 << "\n"); 513 } 514 } 515 DEBUG(dbgs() << " expected values where " << TargetExpectedVal 516 << "\n"); 517 DEBUG(dbgs() << " candidate elements where " << Candidates << "\n"); 518 assert(Access); 519 520 NumKnownLoadsForwarded++; 521 TotalKnownLoadsForwarded++; 522 return FD_DidForward; 523 } 524 525 /// Forwards a speculatively executable instruction. 526 /// 527 /// @param TargetStmt The statement the operand tree will be copied to. 528 /// @param UseInst The (possibly speculatable) instruction to forward. 529 /// @param DefStmt The statement @p UseInst is defined in. 530 /// @param DefLoop The loop which contains @p UseInst. 531 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 532 /// A mapping from the statement instance @p UseInst is 533 /// defined to the statement instance it is forwarded to. 534 /// @param DoIt If false, only determine whether an operand tree can be 535 /// forwarded. If true, carry out the forwarding. Do not 536 /// use DoIt==true if an operand tree is not known to be 537 /// forwardable. 538 /// 539 /// @return FD_NotApplicable if @p UseInst is not speculatable. 540 /// FD_CannotForward if one of @p UseInst's operands is not 541 /// forwardable. 542 /// FD_CanForwardTree if @p UseInst is forwardable. 543 /// FD_DidForward if @p DoIt was true. 544 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 545 Instruction *UseInst, 546 ScopStmt *DefStmt, Loop *DefLoop, 547 isl::map DefToTarget, bool DoIt) { 548 // PHIs, unless synthesizable, are not yet supported. 549 if (isa<PHINode>(UseInst)) 550 return FD_NotApplicable; 551 552 // Compatible instructions must satisfy the following conditions: 553 // 1. Idempotent (instruction will be copied, not moved; although its 554 // original instance might be removed by simplification) 555 // 2. Not access memory (There might be memory writes between) 556 // 3. Not cause undefined behaviour (we might copy to a location when the 557 // original instruction was no executed; this is currently not possible 558 // because we do not forward PHINodes) 559 // 4. Not leak memory if executed multiple times (i.e. malloc) 560 // 561 // Instruction::mayHaveSideEffects is not sufficient because it considers 562 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 563 // not sufficient because it allows memory accesses. 564 if (mayBeMemoryDependent(*UseInst)) 565 return FD_NotApplicable; 566 567 if (DoIt) { 568 // To ensure the right order, prepend this instruction before its 569 // operands. This ensures that its operands are inserted before the 570 // instruction using them. 571 // TODO: The operand tree is not really a tree, but a DAG. We should be 572 // able to handle DAGs without duplication. 573 TargetStmt->prependInstruction(UseInst); 574 NumInstructionsCopied++; 575 TotalInstructionsCopied++; 576 } 577 578 for (Value *OpVal : UseInst->operand_values()) { 579 ForwardingDecision OpDecision = 580 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt); 581 switch (OpDecision) { 582 case FD_CannotForward: 583 assert(!DoIt); 584 return FD_CannotForward; 585 586 case FD_CanForwardLeaf: 587 case FD_CanForwardTree: 588 assert(!DoIt); 589 break; 590 591 case FD_DidForward: 592 assert(DoIt); 593 break; 594 595 case FD_NotApplicable: 596 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 597 } 598 } 599 600 if (DoIt) 601 return FD_DidForward; 602 return FD_CanForwardTree; 603 } 604 605 /// Determines whether an operand tree can be forwarded or carries out a 606 /// forwarding, depending on the @p DoIt flag. 607 /// 608 /// @param TargetStmt The statement the operand tree will be copied to. 609 /// @param UseVal The value (usually an instruction) which is root of an 610 /// operand tree. 611 /// @param UseStmt The statement that uses @p UseVal. 612 /// @param UseLoop The loop @p UseVal is used in. 613 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 614 /// A mapping from the statement instance @p UseVal is used 615 /// to the statement instance it is forwarded to. 616 /// @param DoIt If false, only determine whether an operand tree can be 617 /// forwarded. If true, carry out the forwarding. Do not 618 /// use DoIt==true if an operand tree is not known to be 619 /// forwardable. 620 /// 621 /// @return If DoIt==false, return whether the operand tree can be forwarded. 622 /// If DoIt==true, return FD_DidForward. 623 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 624 ScopStmt *UseStmt, Loop *UseLoop, 625 isl::map UseToTarget, bool DoIt) { 626 ScopStmt *DefStmt = nullptr; 627 Loop *DefLoop = nullptr; 628 629 // { DefDomain[] -> TargetDomain[] } 630 isl::map DefToTarget; 631 632 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 633 switch (VUse.getKind()) { 634 case VirtualUse::Constant: 635 case VirtualUse::Block: 636 case VirtualUse::Hoisted: 637 // These can be used anywhere without special considerations. 638 if (DoIt) 639 return FD_DidForward; 640 return FD_CanForwardLeaf; 641 642 case VirtualUse::Synthesizable: { 643 // ScopExpander will take care for of generating the code at the new 644 // location. 645 if (DoIt) 646 return FD_DidForward; 647 648 // Check if the value is synthesizable at the new location as well. This 649 // might be possible when leaving a loop for which ScalarEvolution is 650 // unable to derive the exit value for. 651 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 652 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 653 // do not forward past its loop header. This would require us to use a 654 // previous loop induction variable instead the current one. We currently 655 // do not allow forwarding PHI nodes, thus this should never occur (the 656 // only exception where no phi is necessary being an unreachable loop 657 // without edge from the outside). 658 VirtualUse TargetUse = VirtualUse::create( 659 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 660 if (TargetUse.getKind() == VirtualUse::Synthesizable) 661 return FD_CanForwardLeaf; 662 663 DEBUG(dbgs() << " Synthesizable would not be synthesizable anymore: " 664 << *UseVal << "\n"); 665 return FD_CannotForward; 666 } 667 668 case VirtualUse::ReadOnly: 669 // Note that we cannot return FD_CanForwardTree here. With a operand tree 670 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 671 // With -polly-analyze-read-only-scalars=true we would ensure the 672 // existence of a MemoryAccess (which already exists for a leaf) and be 673 // removed again by tryForwardTree because it's goal is to remove this 674 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 675 // to do so. 676 if (!DoIt) 677 return FD_CanForwardLeaf; 678 679 // If we model read-only scalars, we need to create a MemoryAccess for it. 680 if (ModelReadOnlyScalars) 681 TargetStmt->ensureValueRead(UseVal); 682 683 NumReadOnlyCopied++; 684 TotalReadOnlyCopied++; 685 return FD_DidForward; 686 687 case VirtualUse::Intra: 688 // Knowing that UseStmt and DefStmt are the same statement instance, just 689 // reuse the information about UseStmt for DefStmt 690 DefStmt = UseStmt; 691 DefToTarget = UseToTarget; 692 693 LLVM_FALLTHROUGH; 694 case VirtualUse::Inter: 695 Instruction *Inst = cast<Instruction>(UseVal); 696 697 if (!DefStmt) { 698 DefStmt = S->getStmtFor(Inst); 699 if (!DefStmt) 700 return FD_CannotForward; 701 } 702 703 DefLoop = LI->getLoopFor(Inst->getParent()); 704 705 if (DefToTarget.is_null() && !Known.is_null()) { 706 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 707 708 // { UseDomain[] -> DefDomain[] } 709 isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt); 710 711 // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to 712 // { DefDomain[] -> TargetDomain[] } 713 DefToTarget = UseToTarget.apply_domain(UseToDef); 714 simplify(DefToTarget); 715 } 716 717 ForwardingDecision SpeculativeResult = forwardSpeculatable( 718 TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt); 719 if (SpeculativeResult != FD_NotApplicable) 720 return SpeculativeResult; 721 722 ForwardingDecision KnownResult = 723 forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, 724 DefStmt, DefLoop, DefToTarget, DoIt); 725 if (KnownResult != FD_NotApplicable) 726 return KnownResult; 727 728 // When no method is found to forward the operand tree, we effectively 729 // cannot handle it. 730 DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 731 return FD_CannotForward; 732 } 733 734 llvm_unreachable("Case unhandled"); 735 } 736 737 /// Try to forward an operand tree rooted in @p RA. 738 bool tryForwardTree(MemoryAccess *RA) { 739 assert(RA->isLatestScalarKind()); 740 DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 741 742 ScopStmt *Stmt = RA->getStatement(); 743 Loop *InLoop = Stmt->getSurroundingLoop(); 744 745 isl::map TargetToUse; 746 if (!Known.is_null()) { 747 isl::space DomSpace = Stmt->getDomainSpace(); 748 TargetToUse = 749 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 750 } 751 752 ForwardingDecision Assessment = forwardTree( 753 Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false); 754 assert(Assessment != FD_DidForward); 755 if (Assessment != FD_CanForwardTree) 756 return false; 757 758 ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, 759 InLoop, TargetToUse, true); 760 assert(Execution == FD_DidForward && 761 "A previous positive assessment must also be executable"); 762 (void)Execution; 763 764 Stmt->removeSingleMemoryAccess(RA); 765 return true; 766 } 767 768 /// Return which SCoP this instance is processing. 769 Scop *getScop() const { return S; } 770 771 /// Run the algorithm: Use value read accesses as operand tree roots and try 772 /// to forward them into the statement. 773 bool forwardOperandTrees() { 774 for (ScopStmt &Stmt : *S) { 775 bool StmtModified = false; 776 777 // Because we are modifying the MemoryAccess list, collect them first to 778 // avoid iterator invalidation. 779 SmallVector<MemoryAccess *, 16> Accs; 780 for (MemoryAccess *RA : Stmt) { 781 if (!RA->isRead()) 782 continue; 783 if (!RA->isLatestScalarKind()) 784 continue; 785 786 Accs.push_back(RA); 787 } 788 789 for (MemoryAccess *RA : Accs) { 790 if (tryForwardTree(RA)) { 791 Modified = true; 792 StmtModified = true; 793 NumForwardedTrees++; 794 TotalForwardedTrees++; 795 } 796 } 797 798 if (StmtModified) { 799 NumModifiedStmts++; 800 TotalModifiedStmts++; 801 } 802 } 803 804 if (Modified) 805 ScopsModified++; 806 return Modified; 807 } 808 809 /// Print the pass result, performed transformations and the SCoP after the 810 /// transformation. 811 void print(raw_ostream &OS, int Indent = 0) { 812 printStatistics(OS, Indent); 813 814 if (!Modified) { 815 // This line can easily be checked in regression tests. 816 OS << "ForwardOpTree executed, but did not modify anything\n"; 817 return; 818 } 819 820 printStatements(OS, Indent); 821 } 822 }; 823 824 /// Pass that redirects scalar reads to array elements that are known to contain 825 /// the same value. 826 /// 827 /// This reduces the number of scalar accesses and therefore potentially 828 /// increases the freedom of the scheduler. In the ideal case, all reads of a 829 /// scalar definition are redirected (We currently do not care about removing 830 /// the write in this case). This is also useful for the main DeLICM pass as 831 /// there are less scalars to be mapped. 832 class ForwardOpTree : public ScopPass { 833 private: 834 /// The pass implementation, also holding per-scop data. 835 std::unique_ptr<ForwardOpTreeImpl> Impl; 836 837 public: 838 static char ID; 839 840 explicit ForwardOpTree() : ScopPass(ID) {} 841 ForwardOpTree(const ForwardOpTree &) = delete; 842 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 843 844 void getAnalysisUsage(AnalysisUsage &AU) const override { 845 AU.addRequiredTransitive<ScopInfoRegionPass>(); 846 AU.addRequired<LoopInfoWrapperPass>(); 847 AU.setPreservesAll(); 848 } 849 850 bool runOnScop(Scop &S) override { 851 // Free resources for previous SCoP's computation, if not yet done. 852 releaseMemory(); 853 854 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 855 856 { 857 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx(), MaxOps, false); 858 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 859 860 if (AnalyzeKnown) { 861 DEBUG(dbgs() << "Prepare forwarders...\n"); 862 Impl->computeKnownValues(); 863 } 864 865 DEBUG(dbgs() << "Forwarding operand trees...\n"); 866 Impl->forwardOperandTrees(); 867 868 if (MaxOpGuard.hasQuotaExceeded()) { 869 DEBUG(dbgs() << "Not all operations completed because of " 870 "max_operations exceeded\n"); 871 KnownOutOfQuota++; 872 } 873 } 874 875 DEBUG(dbgs() << "\nFinal Scop:\n"); 876 DEBUG(dbgs() << S); 877 878 // Update statistics 879 auto ScopStats = S.getStatistics(); 880 NumValueWrites += ScopStats.NumValueWrites; 881 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 882 NumPHIWrites += ScopStats.NumPHIWrites; 883 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 884 NumSingletonWrites += ScopStats.NumSingletonWrites; 885 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 886 887 return false; 888 } 889 890 void printScop(raw_ostream &OS, Scop &S) const override { 891 if (!Impl) 892 return; 893 894 assert(Impl->getScop() == &S); 895 Impl->print(OS); 896 } 897 898 void releaseMemory() override { Impl.reset(); } 899 }; // class ForwardOpTree 900 901 char ForwardOpTree::ID; 902 903 } // namespace 904 905 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 906 907 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 908 "Polly - Forward operand tree", false, false) 909 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 910 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 911 "Polly - Forward operand tree", false, false) 912