1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Move instructions between statements. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/ForwardOpTree.h" 15 #include "polly/Options.h" 16 #include "polly/ScopBuilder.h" 17 #include "polly/ScopInfo.h" 18 #include "polly/ScopPass.h" 19 #include "polly/Support/GICHelper.h" 20 #include "polly/Support/ISLOStream.h" 21 #include "polly/Support/ISLTools.h" 22 #include "polly/Support/VirtualInstruction.h" 23 #include "polly/ZoneAlgo.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "isl/ctx.h" 40 #include "isl/isl-noexceptions.h" 41 #include <cassert> 42 #include <memory> 43 44 #define DEBUG_TYPE "polly-optree" 45 46 using namespace llvm; 47 using namespace polly; 48 49 static cl::opt<bool> 50 AnalyzeKnown("polly-optree-analyze-known", 51 cl::desc("Analyze array contents for load forwarding"), 52 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 53 54 static cl::opt<bool> 55 NormalizePHIs("polly-optree-normalize-phi", 56 cl::desc("Replace PHIs by their incoming values"), 57 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 58 59 static cl::opt<unsigned> 60 MaxOps("polly-optree-max-ops", 61 cl::desc("Maximum number of ISL operations to invest for known " 62 "analysis; 0=no limit"), 63 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 64 65 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 66 STATISTIC(KnownOutOfQuota, 67 "Analyses aborted because max_operations was reached"); 68 69 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 70 STATISTIC(TotalKnownLoadsForwarded, 71 "Number of forwarded loads because their value was known"); 72 STATISTIC(TotalReloads, "Number of reloaded values"); 73 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 74 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 75 STATISTIC(TotalModifiedStmts, 76 "Number of statements with at least one forwarded tree"); 77 78 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 79 80 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 81 STATISTIC(NumValueWritesInLoops, 82 "Number of scalar value writes nested in affine loops after OpTree"); 83 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 84 STATISTIC(NumPHIWritesInLoops, 85 "Number of scalar phi writes nested in affine loops after OpTree"); 86 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 87 STATISTIC(NumSingletonWritesInLoops, 88 "Number of singleton writes nested in affine loops after OpTree"); 89 90 namespace { 91 92 /// The state of whether an operand tree was/can be forwarded. 93 /// 94 /// The items apply to an instructions and its operand tree with the instruction 95 /// as the root element. If the value in question is not an instruction in the 96 /// SCoP, it can be a leaf of an instruction's operand tree. 97 enum ForwardingDecision { 98 /// The root instruction or value cannot be forwarded at all. 99 FD_CannotForward, 100 101 /// The root instruction or value can be forwarded as a leaf of a larger 102 /// operand tree. 103 /// It does not make sense to move the value itself, it would just replace it 104 /// by a use of itself. For instance, a constant "5" used in a statement can 105 /// be forwarded, but it would just replace it by the same constant "5". 106 /// However, it makes sense to move as an operand of 107 /// 108 /// %add = add 5, 5 109 /// 110 /// where "5" is moved as part of a larger operand tree. "5" would be placed 111 /// (disregarding for a moment that literal constants don't have a location 112 /// and can be used anywhere) into the same statement as %add would. 113 FD_CanForwardLeaf, 114 115 /// The root instruction can be forwarded and doing so avoids a scalar 116 /// dependency. 117 /// 118 /// This can be either because the operand tree can be moved to the target 119 /// statement, or a memory access is redirected to read from a different 120 /// location. 121 FD_CanForwardProfitably, 122 123 /// Used to indicate that a forwarding has be carried out successfully, and 124 /// the forwarded memory access can be deleted. 125 FD_DidForwardTree, 126 127 /// Used to indicate that a forwarding has be carried out successfully, and 128 /// the forwarded memory access is being reused. 129 FD_DidForwardLeaf, 130 131 /// A forwarding method cannot be applied to the operand tree. 132 /// The difference to FD_CannotForward is that there might be other methods 133 /// that can handle it. 134 /// The conditions that make an operand tree applicable must be checked even 135 /// with DoIt==true because a method following the one that returned 136 /// FD_NotApplicable might have returned FD_CanForwardTree. 137 FD_NotApplicable 138 }; 139 140 /// Implementation of operand tree forwarding for a specific SCoP. 141 /// 142 /// For a statement that requires a scalar value (through a value read 143 /// MemoryAccess), see if its operand can be moved into the statement. If so, 144 /// the MemoryAccess is removed and the all the operand tree instructions are 145 /// moved into the statement. All original instructions are left in the source 146 /// statements. The simplification pass can clean these up. 147 class ForwardOpTreeImpl : ZoneAlgorithm { 148 private: 149 /// Scope guard to limit the number of isl operations for this pass. 150 IslMaxOperationsGuard &MaxOpGuard; 151 152 /// How many instructions have been copied to other statements. 153 int NumInstructionsCopied = 0; 154 155 /// Number of loads forwarded because their value was known. 156 int NumKnownLoadsForwarded = 0; 157 158 /// Number of values reloaded from known array elements. 159 int NumReloads = 0; 160 161 /// How many read-only accesses have been copied. 162 int NumReadOnlyCopied = 0; 163 164 /// How many operand trees have been forwarded. 165 int NumForwardedTrees = 0; 166 167 /// Number of statements with at least one forwarded operand tree. 168 int NumModifiedStmts = 0; 169 170 /// Whether we carried out at least one change to the SCoP. 171 bool Modified = false; 172 173 /// Contains the zones where array elements are known to contain a specific 174 /// value. 175 /// { [Element[] -> Zone[]] -> ValInst[] } 176 /// @see computeKnown() 177 isl::union_map Known; 178 179 /// Translator for newly introduced ValInsts to already existing ValInsts such 180 /// that new introduced load instructions can reuse the Known analysis of its 181 /// original load. { ValInst[] -> ValInst[] } 182 isl::union_map Translator; 183 184 /// A cache for getDefToTarget(). 185 DenseMap<std::pair<ScopStmt *, ScopStmt *>, isl::map> DefToTargetCache; 186 187 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 188 /// 189 /// @param ValInst { Domain[] -> ValInst[] } 190 /// The values for which we search for alternative locations, 191 /// per statement instance. 192 /// 193 /// @return { Domain[] -> Element[] } 194 /// For each statement instance, the array elements that contain the 195 /// same ValInst. 196 isl::union_map findSameContentElements(isl::union_map ValInst) { 197 assert(!ValInst.is_single_valued().is_false()); 198 199 // { Domain[] } 200 isl::union_set Domain = ValInst.domain(); 201 202 // { Domain[] -> Scatter[] } 203 isl::union_map Schedule = getScatterFor(Domain); 204 205 // { Element[] -> [Scatter[] -> ValInst[]] } 206 isl::union_map MustKnownCurried = 207 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 208 209 // { [Domain[] -> ValInst[]] -> Scatter[] } 210 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 211 212 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 213 isl::union_map SchedValDomVal = 214 DomValSched.range_product(ValInst.range_map()).reverse(); 215 216 // { Element[] -> [Domain[] -> ValInst[]] } 217 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 218 219 // { Domain[] -> Element[] } 220 isl::union_map MustKnownMap = 221 MustKnownInst.uncurry().domain().unwrap().reverse(); 222 simplify(MustKnownMap); 223 224 return MustKnownMap; 225 } 226 227 /// Find a single array element for each statement instance, within a single 228 /// array. 229 /// 230 /// @param MustKnown { Domain[] -> Element[] } 231 /// Set of candidate array elements. 232 /// @param Domain { Domain[] } 233 /// The statement instance for which we need elements for. 234 /// 235 /// @return { Domain[] -> Element[] } 236 /// For each statement instance, an array element out of @p MustKnown. 237 /// All array elements must be in the same array (Polly does not yet 238 /// support reading from different accesses using the same 239 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 240 /// null. 241 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 242 // { Domain[] -> Element[] } 243 isl::map Result; 244 245 // MemoryAccesses can read only elements from a single array 246 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 247 // Look through all spaces until we find one that contains at least the 248 // wanted statement instance.s 249 MustKnown.foreach_map([&](isl::map Map) -> isl::stat { 250 // Get the array this is accessing. 251 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 252 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 253 254 // No support for generation of indirect array accesses. 255 if (SAI->getBasePtrOriginSAI()) 256 return isl::stat::ok; // continue 257 258 // Determine whether this map contains all wanted values. 259 isl::set MapDom = Map.domain(); 260 if (!Domain.is_subset(MapDom).is_true()) 261 return isl::stat::ok; // continue 262 263 // There might be multiple array elements that contain the same value, but 264 // choose only one of them. lexmin is used because it returns a one-value 265 // mapping, we do not care about which one. 266 // TODO: Get the simplest access function. 267 Result = Map.lexmin(); 268 return isl::stat::error; // break 269 }); 270 271 return Result; 272 } 273 274 public: 275 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 276 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 277 278 /// Compute the zones of known array element contents. 279 /// 280 /// @return True if the computed #Known is usable. 281 bool computeKnownValues() { 282 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 283 284 // Check that nothing strange occurs. 285 collectCompatibleElts(); 286 287 { 288 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 289 290 computeCommon(); 291 if (NormalizePHIs) 292 computeNormalizedPHIs(); 293 Known = computeKnown(true, true); 294 295 // Preexisting ValInsts use the known content analysis of themselves. 296 Translator = makeIdentityMap(Known.range(), false); 297 } 298 299 if (!Known || !Translator || !NormalizeMap) { 300 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 301 Known = nullptr; 302 Translator = nullptr; 303 NormalizeMap = nullptr; 304 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 305 return false; 306 } 307 308 KnownAnalyzed++; 309 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 310 311 return true; 312 } 313 314 void printStatistics(raw_ostream &OS, int Indent = 0) { 315 OS.indent(Indent) << "Statistics {\n"; 316 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 317 << '\n'; 318 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 319 << '\n'; 320 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 321 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 322 << '\n'; 323 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 324 << '\n'; 325 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 326 << NumModifiedStmts << '\n'; 327 OS.indent(Indent) << "}\n"; 328 } 329 330 void printStatements(raw_ostream &OS, int Indent = 0) const { 331 OS.indent(Indent) << "After statements {\n"; 332 for (auto &Stmt : *S) { 333 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 334 for (auto *MA : Stmt) 335 MA->print(OS); 336 337 OS.indent(Indent + 12); 338 Stmt.printInstructions(OS); 339 } 340 OS.indent(Indent) << "}\n"; 341 } 342 343 /// Create a new MemoryAccess of type read and MemoryKind::Array. 344 /// 345 /// @param Stmt The statement in which the access occurs. 346 /// @param LI The instruction that does the access. 347 /// @param AccessRelation The array element that each statement instance 348 /// accesses. 349 /// 350 /// @param The newly created access. 351 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 352 isl::map AccessRelation) { 353 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 354 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 355 356 // Create a dummy SCEV access, to be replaced anyway. 357 SmallVector<const SCEV *, 4> Sizes; 358 Sizes.reserve(SAI->getNumberOfDimensions()); 359 SmallVector<const SCEV *, 4> Subscripts; 360 Subscripts.reserve(SAI->getNumberOfDimensions()); 361 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 362 Sizes.push_back(SAI->getDimensionSize(i)); 363 Subscripts.push_back(nullptr); 364 } 365 366 MemoryAccess *Access = 367 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 368 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 369 S->addAccessFunction(Access); 370 Stmt->addAccess(Access, true); 371 372 Access->setNewAccessRelation(AccessRelation); 373 374 return Access; 375 } 376 377 /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a 378 /// use in every instance of @p UseStmt. 379 /// 380 /// @param UseStmt Statement a scalar is used in. 381 /// @param DefStmt Statement a scalar is defined in. 382 /// 383 /// @return { DomainUse[] -> DomainDef[] } 384 isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { 385 // { DomainUse[] -> Scatter[] } 386 isl::map UseScatter = getScatterFor(UseStmt); 387 388 // { Zone[] -> DomainDef[] } 389 isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); 390 391 // { Scatter[] -> DomainDef[] } 392 isl::map ReachDefTimepoints = 393 convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); 394 395 // { DomainUse[] -> DomainDef[] } 396 return UseScatter.apply_range(ReachDefTimepoints); 397 } 398 399 /// Is @p InnerLoop nested inside @p OuterLoop? 400 static bool isInsideLoop(Loop *OuterLoop, Loop *InnerLoop) { 401 // If OuterLoop is nullptr, we cannot call its contains() method. In this 402 // case OuterLoop represents the 'top level' and therefore contains all 403 // loop. 404 return !OuterLoop || OuterLoop->contains(InnerLoop); 405 } 406 407 /// Get a domain translation map from a (scalar) definition to the statement 408 /// where the definition is being moved to. 409 /// 410 /// @p TargetStmt can also be seen an llvm::Use of an llvm::Value in 411 /// @p DefStmt. In addition, we allow transitive uses: 412 /// 413 /// DefStmt -> MiddleStmt -> TargetStmt 414 /// 415 /// where an operand tree of instructions in DefStmt and MiddleStmt are to be 416 /// moved to TargetStmt. To be generally correct, we also need to know all the 417 /// intermediate statements. However, we make use of the fact that we 418 /// currently do not support a move from a loop body across its header such 419 /// that only the first definition and the target statement are relevant. 420 /// 421 /// @param DefStmt Statement from where a definition might be moved from. 422 /// @param TargetStmt Statement where the definition is potentially being 423 /// moved to (should contain a use of that definition). 424 /// 425 /// @return { DomainDef[] -> DomainTarget[] } 426 isl::map getDefToTarget(ScopStmt *DefStmt, ScopStmt *TargetStmt) { 427 // No translation required if the definition is already at the target. 428 if (TargetStmt == DefStmt) 429 return isl::map::identity( 430 getDomainFor(TargetStmt).get_space().map_from_set()); 431 432 isl::map &Result = DefToTargetCache[std::make_pair(TargetStmt, DefStmt)]; 433 434 // This is a shortcut in case the schedule is still the original and 435 // TargetStmt is in the same or nested inside DefStmt's loop. With the 436 // additional assumption that operand trees do not cross DefStmt's loop 437 // header, then TargetStmt's instance shared coordinated are the same as 438 // DefStmt's coordinated. All TargetStmt instances with this prefix share 439 // the same DefStmt instance. 440 // Model: 441 // 442 // for (int i < 0; i < N; i+=1) { 443 // DefStmt: 444 // D = ...; 445 // for (int j < 0; j < N; j+=1) { 446 // TargetStmt: 447 // use(D); 448 // } 449 // } 450 // 451 // Here, the value used in TargetStmt is defined in the corresponding 452 // DefStmt, i.e. 453 // 454 // { DefStmt[i] -> TargetStmt[i,j] } 455 // 456 // In practice, this should cover the majority of cases. 457 if (S->isOriginalSchedule() && 458 isInsideLoop(DefStmt->getSurroundingLoop(), 459 TargetStmt->getSurroundingLoop())) { 460 isl::set DefDomain = getDomainFor(DefStmt); 461 isl::set TargetDomain = getDomainFor(TargetStmt); 462 assert(DefDomain.dim(isl::dim::set) <= TargetDomain.dim(isl::dim::set)); 463 464 Result = isl::map::from_domain_and_range(DefDomain, TargetDomain); 465 for (unsigned i = 0, DefDims = DefDomain.dim(isl::dim::set); i < DefDims; 466 i += 1) 467 Result = Result.equate(isl::dim::in, i, isl::dim::out, i); 468 } 469 470 if (!Result) { 471 // { DomainDef[] -> DomainTarget[] } 472 Result = computeUseToDefFlowDependency(TargetStmt, DefStmt).reverse(); 473 simplify(Result); 474 } 475 476 return Result; 477 } 478 479 /// Forward a load by reading from an array element that contains the same 480 /// value. Typically the location it was loaded from. 481 /// 482 /// @param TargetStmt The statement the operand tree will be copied to. 483 /// @param Inst The (possibly speculatable) instruction to forward. 484 /// @param UseStmt The statement that uses @p Inst. 485 /// @param UseLoop The loop @p Inst is used in. 486 /// @param DefStmt The statement @p Inst is defined in. 487 /// @param DefLoop The loop which contains @p Inst. 488 /// @param DoIt If false, only determine whether an operand tree can be 489 /// forwarded. If true, carry out the forwarding. Do not 490 /// use DoIt==true if an operand tree is not known to be 491 /// forwardable. 492 /// 493 /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new 494 /// load. 495 /// FD_CannotForward if the pointer operand cannot be forwarded. 496 /// FD_CanForwardProfitably if @p Inst is forwardable. 497 /// FD_DidForwardTree if @p DoIt was true. 498 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 499 ScopStmt *UseStmt, Loop *UseLoop, 500 ScopStmt *DefStmt, Loop *DefLoop, 501 bool DoIt) { 502 // Cannot do anything without successful known analysis. 503 if (Known.is_null() || Translator.is_null() || 504 MaxOpGuard.hasQuotaExceeded()) 505 return FD_NotApplicable; 506 507 LoadInst *LI = dyn_cast<LoadInst>(Inst); 508 if (!LI) 509 return FD_NotApplicable; 510 511 // If the load is already in the statement, no forwarding is necessary. 512 // However, it might happen that the LoadInst is already present in the 513 // statement's instruction list. In that case we do as follows: 514 // - For the evaluation (DoIt==false), we can trivially forward it as it is 515 // benefit of forwarding an already present instruction. 516 // - For the execution (DoIt==true), prepend the instruction (to make it 517 // available to all instructions following in the instruction list), but 518 // do not add another MemoryAccess. 519 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 520 if (Access && !DoIt) 521 return FD_CanForwardProfitably; 522 523 ForwardingDecision OpDecision = forwardTree( 524 TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt); 525 switch (OpDecision) { 526 case FD_CannotForward: 527 assert(!DoIt); 528 return OpDecision; 529 530 case FD_CanForwardLeaf: 531 case FD_CanForwardProfitably: 532 assert(!DoIt); 533 break; 534 535 case FD_DidForwardLeaf: 536 case FD_DidForwardTree: 537 assert(DoIt); 538 break; 539 540 default: 541 llvm_unreachable("Shouldn't return this"); 542 } 543 544 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 545 546 // { DomainDef[] -> ValInst[] } 547 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 548 assert(!isNormalized(ExpectedVal).is_false() && 549 "LoadInsts are always normalized"); 550 551 // { DomainUse[] -> DomainTarget[] } 552 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 553 554 // { DomainTarget[] -> ValInst[] } 555 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 556 isl::union_map TranslatedExpectedVal = 557 isl::union_map(TargetExpectedVal).apply_range(Translator); 558 559 // { DomainTarget[] -> Element[] } 560 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 561 562 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 563 if (!SameVal) 564 return FD_NotApplicable; 565 566 if (DoIt) 567 TargetStmt->prependInstruction(LI); 568 569 if (!DoIt) 570 return FD_CanForwardProfitably; 571 572 if (Access) { 573 LLVM_DEBUG( 574 dbgs() << " forwarded known load with preexisting MemoryAccess" 575 << Access << "\n"); 576 } else { 577 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 578 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 579 << Access << "\n"); 580 581 // { ValInst[] } 582 isl::space ValInstSpace = ExpectedVal.get_space().range(); 583 584 // After adding a new load to the SCoP, also update the Known content 585 // about it. The new load will have a known ValInst of 586 // { [DomainTarget[] -> Value[]] } 587 // but which -- because it is a copy of it -- has same value as the 588 // { [DomainDef[] -> Value[]] } 589 // that it replicates. Instead of cloning the known content of 590 // [DomainDef[] -> Value[]] 591 // for DomainTarget[], we add a 'translator' that maps 592 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 593 // before comparing to the known content. 594 // TODO: 'Translator' could also be used to map PHINodes to their incoming 595 // ValInsts. 596 if (ValInstSpace.is_wrapping()) { 597 // { DefDomain[] -> Value[] } 598 isl::map ValInsts = ExpectedVal.range().unwrap(); 599 600 // { DefDomain[] } 601 isl::set DefDomain = ValInsts.domain(); 602 603 // { Value[] } 604 isl::space ValSpace = ValInstSpace.unwrap().range(); 605 606 // { Value[] -> Value[] } 607 isl::map ValToVal = 608 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 609 610 // { DomainDef[] -> DomainTarget[] } 611 isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); 612 613 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 614 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 615 616 Translator = Translator.add_map(LocalTranslator); 617 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 618 << "\n"); 619 } 620 } 621 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 622 << "\n"); 623 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 624 << "\n"); 625 assert(Access); 626 627 NumKnownLoadsForwarded++; 628 TotalKnownLoadsForwarded++; 629 return FD_DidForwardTree; 630 } 631 632 /// Forward a scalar by redirecting the access to an array element that stores 633 /// the same value. 634 /// 635 /// @param TargetStmt The statement the operand tree will be copied to. 636 /// @param Inst The scalar to forward. 637 /// @param UseStmt The statement that uses @p Inst. 638 /// @param UseLoop The loop @p Inst is used in. 639 /// @param DefStmt The statement @p Inst is defined in. 640 /// @param DefLoop The loop which contains @p Inst. 641 /// @param DoIt If false, only determine whether an operand tree can be 642 /// forwarded. If true, carry out the forwarding. Do not 643 /// use DoIt==true if an operand tree is not known to be 644 /// forwardable. 645 /// 646 /// @return FD_NotApplicable if @p Inst cannot be reloaded. 647 /// FD_CanForwardLeaf if @p Inst can be reloaded. 648 /// FD_CanForwardProfitably if @p Inst has been reloaded. 649 /// FD_DidForwardLeaf if @p DoIt was true. 650 ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 651 ScopStmt *UseStmt, Loop *UseLoop, 652 ScopStmt *DefStmt, Loop *DefLoop, 653 bool DoIt) { 654 // Cannot do anything without successful known analysis. 655 if (Known.is_null() || Translator.is_null() || 656 MaxOpGuard.hasQuotaExceeded()) 657 return FD_NotApplicable; 658 659 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 660 if (Access && Access->isLatestArrayKind()) { 661 if (DoIt) 662 return FD_DidForwardLeaf; 663 return FD_CanForwardLeaf; 664 } 665 666 // Don't spend too much time analyzing whether it can be reloaded. When 667 // carrying-out the forwarding, we cannot bail-out in the middle of the 668 // transformation. It also shouldn't take as long because some results are 669 // cached. 670 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 671 672 // { DomainDef[] -> ValInst[] } 673 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 674 675 // { DomainUse[] -> DomainTarget[] } 676 isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); 677 678 // { DomainTarget[] -> ValInst[] } 679 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 680 isl::union_map TranslatedExpectedVal = 681 TargetExpectedVal.apply_range(Translator); 682 683 // { DomainTarget[] -> Element[] } 684 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 685 686 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 687 if (!SameVal) 688 return FD_NotApplicable; 689 690 if (!DoIt) 691 return FD_CanForwardProfitably; 692 693 if (!Access) 694 Access = TargetStmt->ensureValueRead(Inst); 695 696 simplify(SameVal); 697 Access->setNewAccessRelation(SameVal); 698 699 TotalReloads++; 700 NumReloads++; 701 return FD_DidForwardLeaf; 702 } 703 704 /// Forwards a speculatively executable instruction. 705 /// 706 /// @param TargetStmt The statement the operand tree will be copied to. 707 /// @param UseInst The (possibly speculatable) instruction to forward. 708 /// @param DefStmt The statement @p UseInst is defined in. 709 /// @param DefLoop The loop which contains @p UseInst. 710 /// @param DoIt If false, only determine whether an operand tree can be 711 /// forwarded. If true, carry out the forwarding. Do not 712 /// use DoIt==true if an operand tree is not known to be 713 /// forwardable. 714 /// 715 /// @return FD_NotApplicable if @p UseInst is not speculatable. 716 /// FD_CannotForward if one of @p UseInst's operands is not 717 /// forwardable. 718 /// FD_CanForwardTree if @p UseInst is forwardable. 719 /// FD_DidForward if @p DoIt was true. 720 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 721 Instruction *UseInst, 722 ScopStmt *DefStmt, Loop *DefLoop, 723 bool DoIt) { 724 // PHIs, unless synthesizable, are not yet supported. 725 if (isa<PHINode>(UseInst)) 726 return FD_NotApplicable; 727 728 // Compatible instructions must satisfy the following conditions: 729 // 1. Idempotent (instruction will be copied, not moved; although its 730 // original instance might be removed by simplification) 731 // 2. Not access memory (There might be memory writes between) 732 // 3. Not cause undefined behaviour (we might copy to a location when the 733 // original instruction was no executed; this is currently not possible 734 // because we do not forward PHINodes) 735 // 4. Not leak memory if executed multiple times (i.e. malloc) 736 // 737 // Instruction::mayHaveSideEffects is not sufficient because it considers 738 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 739 // not sufficient because it allows memory accesses. 740 if (mayBeMemoryDependent(*UseInst)) 741 return FD_NotApplicable; 742 743 if (DoIt) { 744 // To ensure the right order, prepend this instruction before its 745 // operands. This ensures that its operands are inserted before the 746 // instruction using them. 747 // TODO: The operand tree is not really a tree, but a DAG. We should be 748 // able to handle DAGs without duplication. 749 TargetStmt->prependInstruction(UseInst); 750 NumInstructionsCopied++; 751 TotalInstructionsCopied++; 752 } 753 754 for (Value *OpVal : UseInst->operand_values()) { 755 ForwardingDecision OpDecision = 756 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt); 757 switch (OpDecision) { 758 case FD_CannotForward: 759 assert(!DoIt); 760 return FD_CannotForward; 761 762 case FD_CanForwardLeaf: 763 case FD_CanForwardProfitably: 764 assert(!DoIt); 765 break; 766 767 case FD_DidForwardLeaf: 768 case FD_DidForwardTree: 769 assert(DoIt); 770 break; 771 772 case FD_NotApplicable: 773 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 774 } 775 } 776 777 if (DoIt) 778 return FD_DidForwardTree; 779 return FD_CanForwardProfitably; 780 } 781 782 /// Determines whether an operand tree can be forwarded or carries out a 783 /// forwarding, depending on the @p DoIt flag. 784 /// 785 /// @param TargetStmt The statement the operand tree will be copied to. 786 /// @param UseVal The value (usually an instruction) which is root of an 787 /// operand tree. 788 /// @param UseStmt The statement that uses @p UseVal. 789 /// @param UseLoop The loop @p UseVal is used in. 790 /// @param DoIt If false, only determine whether an operand tree can be 791 /// forwarded. If true, carry out the forwarding. Do not 792 /// use DoIt==true if an operand tree is not known to be 793 /// forwardable. 794 /// 795 /// @return If DoIt==false, return whether the operand tree can be forwarded. 796 /// If DoIt==true, return FD_DidForward. 797 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 798 ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) { 799 ScopStmt *DefStmt = nullptr; 800 Loop *DefLoop = nullptr; 801 802 // { DefDomain[] -> TargetDomain[] } 803 isl::map DefToTarget; 804 805 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 806 switch (VUse.getKind()) { 807 case VirtualUse::Constant: 808 case VirtualUse::Block: 809 case VirtualUse::Hoisted: 810 // These can be used anywhere without special considerations. 811 if (DoIt) 812 return FD_DidForwardTree; 813 return FD_CanForwardLeaf; 814 815 case VirtualUse::Synthesizable: { 816 // ScopExpander will take care for of generating the code at the new 817 // location. 818 if (DoIt) 819 return FD_DidForwardTree; 820 821 // Check if the value is synthesizable at the new location as well. This 822 // might be possible when leaving a loop for which ScalarEvolution is 823 // unable to derive the exit value for. 824 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 825 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 826 // do not forward past its loop header. This would require us to use a 827 // previous loop induction variable instead the current one. We currently 828 // do not allow forwarding PHI nodes, thus this should never occur (the 829 // only exception where no phi is necessary being an unreachable loop 830 // without edge from the outside). 831 VirtualUse TargetUse = VirtualUse::create( 832 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 833 if (TargetUse.getKind() == VirtualUse::Synthesizable) 834 return FD_CanForwardLeaf; 835 836 LLVM_DEBUG( 837 dbgs() << " Synthesizable would not be synthesizable anymore: " 838 << *UseVal << "\n"); 839 return FD_CannotForward; 840 } 841 842 case VirtualUse::ReadOnly: 843 // Note that we cannot return FD_CanForwardTree here. With a operand tree 844 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 845 // With -polly-analyze-read-only-scalars=true we would ensure the 846 // existence of a MemoryAccess (which already exists for a leaf) and be 847 // removed again by tryForwardTree because it's goal is to remove this 848 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 849 // to do so. 850 if (!DoIt) 851 return FD_CanForwardLeaf; 852 853 // If we model read-only scalars, we need to create a MemoryAccess for it. 854 if (ModelReadOnlyScalars) 855 TargetStmt->ensureValueRead(UseVal); 856 857 NumReadOnlyCopied++; 858 TotalReadOnlyCopied++; 859 return FD_DidForwardLeaf; 860 861 case VirtualUse::Intra: 862 // Knowing that UseStmt and DefStmt are the same statement instance, just 863 // reuse the information about UseStmt for DefStmt 864 DefStmt = UseStmt; 865 866 LLVM_FALLTHROUGH; 867 case VirtualUse::Inter: 868 Instruction *Inst = cast<Instruction>(UseVal); 869 870 if (!DefStmt) { 871 DefStmt = S->getStmtFor(Inst); 872 if (!DefStmt) 873 return FD_CannotForward; 874 } 875 876 DefLoop = LI->getLoopFor(Inst->getParent()); 877 878 ForwardingDecision SpeculativeResult = 879 forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt); 880 if (SpeculativeResult != FD_NotApplicable) 881 return SpeculativeResult; 882 883 ForwardingDecision KnownResult = forwardKnownLoad( 884 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); 885 if (KnownResult != FD_NotApplicable) 886 return KnownResult; 887 888 ForwardingDecision ReloadResult = reloadKnownContent( 889 TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); 890 if (ReloadResult != FD_NotApplicable) 891 return ReloadResult; 892 893 // When no method is found to forward the operand tree, we effectively 894 // cannot handle it. 895 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 896 return FD_CannotForward; 897 } 898 899 llvm_unreachable("Case unhandled"); 900 } 901 902 /// Try to forward an operand tree rooted in @p RA. 903 bool tryForwardTree(MemoryAccess *RA) { 904 assert(RA->isLatestScalarKind()); 905 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 906 907 ScopStmt *Stmt = RA->getStatement(); 908 Loop *InLoop = Stmt->getSurroundingLoop(); 909 910 isl::map TargetToUse; 911 if (!Known.is_null()) { 912 isl::space DomSpace = Stmt->getDomainSpace(); 913 TargetToUse = 914 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 915 } 916 917 ForwardingDecision Assessment = 918 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false); 919 assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf); 920 if (Assessment != FD_CanForwardProfitably) 921 return false; 922 923 ForwardingDecision Execution = 924 forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true); 925 assert(((Execution == FD_DidForwardTree) || 926 (Execution == FD_DidForwardLeaf)) && 927 "A previous positive assessment must also be executable"); 928 929 if (Execution == FD_DidForwardTree) 930 Stmt->removeSingleMemoryAccess(RA); 931 return true; 932 } 933 934 /// Return which SCoP this instance is processing. 935 Scop *getScop() const { return S; } 936 937 /// Run the algorithm: Use value read accesses as operand tree roots and try 938 /// to forward them into the statement. 939 bool forwardOperandTrees() { 940 for (ScopStmt &Stmt : *S) { 941 bool StmtModified = false; 942 943 // Because we are modifying the MemoryAccess list, collect them first to 944 // avoid iterator invalidation. 945 SmallVector<MemoryAccess *, 16> Accs; 946 for (MemoryAccess *RA : Stmt) { 947 if (!RA->isRead()) 948 continue; 949 if (!RA->isLatestScalarKind()) 950 continue; 951 952 Accs.push_back(RA); 953 } 954 955 for (MemoryAccess *RA : Accs) { 956 if (tryForwardTree(RA)) { 957 Modified = true; 958 StmtModified = true; 959 NumForwardedTrees++; 960 TotalForwardedTrees++; 961 } 962 } 963 964 if (StmtModified) { 965 NumModifiedStmts++; 966 TotalModifiedStmts++; 967 } 968 } 969 970 if (Modified) 971 ScopsModified++; 972 return Modified; 973 } 974 975 /// Print the pass result, performed transformations and the SCoP after the 976 /// transformation. 977 void print(raw_ostream &OS, int Indent = 0) { 978 printStatistics(OS, Indent); 979 980 if (!Modified) { 981 // This line can easily be checked in regression tests. 982 OS << "ForwardOpTree executed, but did not modify anything\n"; 983 return; 984 } 985 986 printStatements(OS, Indent); 987 } 988 }; 989 990 /// Pass that redirects scalar reads to array elements that are known to contain 991 /// the same value. 992 /// 993 /// This reduces the number of scalar accesses and therefore potentially 994 /// increases the freedom of the scheduler. In the ideal case, all reads of a 995 /// scalar definition are redirected (We currently do not care about removing 996 /// the write in this case). This is also useful for the main DeLICM pass as 997 /// there are less scalars to be mapped. 998 class ForwardOpTree : public ScopPass { 999 private: 1000 /// The pass implementation, also holding per-scop data. 1001 std::unique_ptr<ForwardOpTreeImpl> Impl; 1002 1003 public: 1004 static char ID; 1005 1006 explicit ForwardOpTree() : ScopPass(ID) {} 1007 ForwardOpTree(const ForwardOpTree &) = delete; 1008 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 1009 1010 void getAnalysisUsage(AnalysisUsage &AU) const override { 1011 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1012 AU.addRequired<LoopInfoWrapperPass>(); 1013 AU.setPreservesAll(); 1014 } 1015 1016 bool runOnScop(Scop &S) override { 1017 // Free resources for previous SCoP's computation, if not yet done. 1018 releaseMemory(); 1019 1020 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1021 1022 { 1023 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 1024 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 1025 1026 if (AnalyzeKnown) { 1027 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 1028 Impl->computeKnownValues(); 1029 } 1030 1031 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 1032 Impl->forwardOperandTrees(); 1033 1034 if (MaxOpGuard.hasQuotaExceeded()) { 1035 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 1036 "max_operations exceeded\n"); 1037 KnownOutOfQuota++; 1038 } 1039 } 1040 1041 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 1042 LLVM_DEBUG(dbgs() << S); 1043 1044 // Update statistics 1045 auto ScopStats = S.getStatistics(); 1046 NumValueWrites += ScopStats.NumValueWrites; 1047 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1048 NumPHIWrites += ScopStats.NumPHIWrites; 1049 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1050 NumSingletonWrites += ScopStats.NumSingletonWrites; 1051 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1052 1053 return false; 1054 } 1055 1056 void printScop(raw_ostream &OS, Scop &S) const override { 1057 if (!Impl) 1058 return; 1059 1060 assert(Impl->getScop() == &S); 1061 Impl->print(OS); 1062 } 1063 1064 void releaseMemory() override { Impl.reset(); } 1065 }; // class ForwardOpTree 1066 1067 char ForwardOpTree::ID; 1068 } // namespace 1069 1070 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 1071 1072 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 1073 "Polly - Forward operand tree", false, false) 1074 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1075 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 1076 "Polly - Forward operand tree", false, false) 1077