1 //===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Move instructions between statements. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/ForwardOpTree.h" 15 #include "polly/Options.h" 16 #include "polly/ScopBuilder.h" 17 #include "polly/ScopInfo.h" 18 #include "polly/ScopPass.h" 19 #include "polly/Support/GICHelper.h" 20 #include "polly/Support/ISLOStream.h" 21 #include "polly/Support/ISLTools.h" 22 #include "polly/Support/VirtualInstruction.h" 23 #include "polly/ZoneAlgo.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Pass.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "isl/ctx.h" 40 #include "isl/isl-noexceptions.h" 41 #include <cassert> 42 #include <memory> 43 44 #define DEBUG_TYPE "polly-optree" 45 46 using namespace llvm; 47 using namespace polly; 48 49 static cl::opt<bool> 50 AnalyzeKnown("polly-optree-analyze-known", 51 cl::desc("Analyze array contents for load forwarding"), 52 cl::cat(PollyCategory), cl::init(true), cl::Hidden); 53 54 static cl::opt<bool> 55 NormalizePHIs("polly-optree-normalize-phi", 56 cl::desc("Replace PHIs by their incoming values"), 57 cl::cat(PollyCategory), cl::init(false), cl::Hidden); 58 59 static cl::opt<unsigned> 60 MaxOps("polly-optree-max-ops", 61 cl::desc("Maximum number of ISL operations to invest for known " 62 "analysis; 0=no limit"), 63 cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); 64 65 STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); 66 STATISTIC(KnownOutOfQuota, 67 "Analyses aborted because max_operations was reached"); 68 69 STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); 70 STATISTIC(TotalKnownLoadsForwarded, 71 "Number of forwarded loads because their value was known"); 72 STATISTIC(TotalReloads, "Number of reloaded values"); 73 STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); 74 STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); 75 STATISTIC(TotalModifiedStmts, 76 "Number of statements with at least one forwarded tree"); 77 78 STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); 79 80 STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); 81 STATISTIC(NumValueWritesInLoops, 82 "Number of scalar value writes nested in affine loops after OpTree"); 83 STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); 84 STATISTIC(NumPHIWritesInLoops, 85 "Number of scalar phi writes nested in affine loops after OpTree"); 86 STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); 87 STATISTIC(NumSingletonWritesInLoops, 88 "Number of singleton writes nested in affine loops after OpTree"); 89 90 namespace { 91 92 /// The state of whether an operand tree was/can be forwarded. 93 /// 94 /// The items apply to an instructions and its operand tree with the instruction 95 /// as the root element. If the value in question is not an instruction in the 96 /// SCoP, it can be a leaf of an instruction's operand tree. 97 enum ForwardingDecision { 98 /// The root instruction or value cannot be forwarded at all. 99 FD_CannotForward, 100 101 /// The root instruction or value can be forwarded as a leaf of a larger 102 /// operand tree. 103 /// It does not make sense to move the value itself, it would just replace it 104 /// by a use of itself. For instance, a constant "5" used in a statement can 105 /// be forwarded, but it would just replace it by the same constant "5". 106 /// However, it makes sense to move as an operand of 107 /// 108 /// %add = add 5, 5 109 /// 110 /// where "5" is moved as part of a larger operand tree. "5" would be placed 111 /// (disregarding for a moment that literal constants don't have a location 112 /// and can be used anywhere) into the same statement as %add would. 113 FD_CanForwardLeaf, 114 115 /// The root instruction can be forwarded and doing so avoids a scalar 116 /// dependency. 117 /// 118 /// This can be either because the operand tree can be moved to the target 119 /// statement, or a memory access is redirected to read from a different 120 /// location. 121 FD_CanForwardProfitably, 122 123 /// Used to indicate that a forwarding has be carried out successfully, and 124 /// the forwarded memory access can be deleted. 125 FD_DidForwardTree, 126 127 /// Used to indicate that a forwarding has be carried out successfully, and 128 /// the forwarded memory access is being reused. 129 FD_DidForwardLeaf, 130 131 /// A forwarding method cannot be applied to the operand tree. 132 /// The difference to FD_CannotForward is that there might be other methods 133 /// that can handle it. 134 /// The conditions that make an operand tree applicable must be checked even 135 /// with DoIt==true because a method following the one that returned 136 /// FD_NotApplicable might have returned FD_CanForwardTree. 137 FD_NotApplicable 138 }; 139 140 /// Implementation of operand tree forwarding for a specific SCoP. 141 /// 142 /// For a statement that requires a scalar value (through a value read 143 /// MemoryAccess), see if its operand can be moved into the statement. If so, 144 /// the MemoryAccess is removed and the all the operand tree instructions are 145 /// moved into the statement. All original instructions are left in the source 146 /// statements. The simplification pass can clean these up. 147 class ForwardOpTreeImpl : ZoneAlgorithm { 148 private: 149 /// Scope guard to limit the number of isl operations for this pass. 150 IslMaxOperationsGuard &MaxOpGuard; 151 152 /// How many instructions have been copied to other statements. 153 int NumInstructionsCopied = 0; 154 155 /// Number of loads forwarded because their value was known. 156 int NumKnownLoadsForwarded = 0; 157 158 /// Number of values reloaded from known array elements. 159 int NumReloads = 0; 160 161 /// How many read-only accesses have been copied. 162 int NumReadOnlyCopied = 0; 163 164 /// How many operand trees have been forwarded. 165 int NumForwardedTrees = 0; 166 167 /// Number of statements with at least one forwarded operand tree. 168 int NumModifiedStmts = 0; 169 170 /// Whether we carried out at least one change to the SCoP. 171 bool Modified = false; 172 173 /// Contains the zones where array elements are known to contain a specific 174 /// value. 175 /// { [Element[] -> Zone[]] -> ValInst[] } 176 /// @see computeKnown() 177 isl::union_map Known; 178 179 /// Translator for newly introduced ValInsts to already existing ValInsts such 180 /// that new introduced load instructions can reuse the Known analysis of its 181 /// original load. { ValInst[] -> ValInst[] } 182 isl::union_map Translator; 183 184 /// Get list of array elements that do contain the same ValInst[] at Domain[]. 185 /// 186 /// @param ValInst { Domain[] -> ValInst[] } 187 /// The values for which we search for alternative locations, 188 /// per statement instance. 189 /// 190 /// @return { Domain[] -> Element[] } 191 /// For each statement instance, the array elements that contain the 192 /// same ValInst. 193 isl::union_map findSameContentElements(isl::union_map ValInst) { 194 assert(!ValInst.is_single_valued().is_false()); 195 196 // { Domain[] } 197 isl::union_set Domain = ValInst.domain(); 198 199 // { Domain[] -> Scatter[] } 200 isl::union_map Schedule = getScatterFor(Domain); 201 202 // { Element[] -> [Scatter[] -> ValInst[]] } 203 isl::union_map MustKnownCurried = 204 convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); 205 206 // { [Domain[] -> ValInst[]] -> Scatter[] } 207 isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); 208 209 // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } 210 isl::union_map SchedValDomVal = 211 DomValSched.range_product(ValInst.range_map()).reverse(); 212 213 // { Element[] -> [Domain[] -> ValInst[]] } 214 isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); 215 216 // { Domain[] -> Element[] } 217 isl::union_map MustKnownMap = 218 MustKnownInst.uncurry().domain().unwrap().reverse(); 219 simplify(MustKnownMap); 220 221 return MustKnownMap; 222 } 223 224 /// Find a single array element for each statement instance, within a single 225 /// array. 226 /// 227 /// @param MustKnown { Domain[] -> Element[] } 228 /// Set of candidate array elements. 229 /// @param Domain { Domain[] } 230 /// The statement instance for which we need elements for. 231 /// 232 /// @return { Domain[] -> Element[] } 233 /// For each statement instance, an array element out of @p MustKnown. 234 /// All array elements must be in the same array (Polly does not yet 235 /// support reading from different accesses using the same 236 /// MemoryAccess). If no mapping for all of @p Domain exists, returns 237 /// null. 238 isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { 239 // { Domain[] -> Element[] } 240 isl::map Result; 241 242 // MemoryAccesses can read only elements from a single array 243 // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). 244 // Look through all spaces until we find one that contains at least the 245 // wanted statement instance.s 246 MustKnown.foreach_map([&](isl::map Map) -> isl::stat { 247 // Get the array this is accessing. 248 isl::id ArrayId = Map.get_tuple_id(isl::dim::out); 249 ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user()); 250 251 // No support for generation of indirect array accesses. 252 if (SAI->getBasePtrOriginSAI()) 253 return isl::stat::ok; // continue 254 255 // Determine whether this map contains all wanted values. 256 isl::set MapDom = Map.domain(); 257 if (!Domain.is_subset(MapDom).is_true()) 258 return isl::stat::ok; // continue 259 260 // There might be multiple array elements that contain the same value, but 261 // choose only one of them. lexmin is used because it returns a one-value 262 // mapping, we do not care about which one. 263 // TODO: Get the simplest access function. 264 Result = Map.lexmin(); 265 return isl::stat::error; // break 266 }); 267 268 return Result; 269 } 270 271 public: 272 ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) 273 : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} 274 275 /// Compute the zones of known array element contents. 276 /// 277 /// @return True if the computed #Known is usable. 278 bool computeKnownValues() { 279 isl::union_map MustKnown, KnownFromLoad, KnownFromInit; 280 281 // Check that nothing strange occurs. 282 collectCompatibleElts(); 283 284 { 285 IslQuotaScope QuotaScope = MaxOpGuard.enter(); 286 287 computeCommon(); 288 if (NormalizePHIs) 289 computeNormalizedPHIs(); 290 Known = computeKnown(true, true); 291 292 // Preexisting ValInsts use the known content analysis of themselves. 293 Translator = makeIdentityMap(Known.range(), false); 294 } 295 296 if (!Known || !Translator || !NormalizeMap) { 297 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); 298 Known = nullptr; 299 Translator = nullptr; 300 NormalizeMap = nullptr; 301 LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); 302 return false; 303 } 304 305 KnownAnalyzed++; 306 LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); 307 308 return true; 309 } 310 311 void printStatistics(raw_ostream &OS, int Indent = 0) { 312 OS.indent(Indent) << "Statistics {\n"; 313 OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied 314 << '\n'; 315 OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded 316 << '\n'; 317 OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; 318 OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied 319 << '\n'; 320 OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees 321 << '\n'; 322 OS.indent(Indent + 4) << "Statements with forwarded operand trees: " 323 << NumModifiedStmts << '\n'; 324 OS.indent(Indent) << "}\n"; 325 } 326 327 void printStatements(raw_ostream &OS, int Indent = 0) const { 328 OS.indent(Indent) << "After statements {\n"; 329 for (auto &Stmt : *S) { 330 OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; 331 for (auto *MA : Stmt) 332 MA->print(OS); 333 334 OS.indent(Indent + 12); 335 Stmt.printInstructions(OS); 336 } 337 OS.indent(Indent) << "}\n"; 338 } 339 340 /// Create a new MemoryAccess of type read and MemoryKind::Array. 341 /// 342 /// @param Stmt The statement in which the access occurs. 343 /// @param LI The instruction that does the access. 344 /// @param AccessRelation The array element that each statement instance 345 /// accesses. 346 /// 347 /// @param The newly created access. 348 MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, 349 isl::map AccessRelation) { 350 isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); 351 ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user()); 352 353 // Create a dummy SCEV access, to be replaced anyway. 354 SmallVector<const SCEV *, 4> Sizes; 355 Sizes.reserve(SAI->getNumberOfDimensions()); 356 SmallVector<const SCEV *, 4> Subscripts; 357 Subscripts.reserve(SAI->getNumberOfDimensions()); 358 for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { 359 Sizes.push_back(SAI->getDimensionSize(i)); 360 Subscripts.push_back(nullptr); 361 } 362 363 MemoryAccess *Access = 364 new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), 365 LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); 366 S->addAccessFunction(Access); 367 Stmt->addAccess(Access, true); 368 369 Access->setNewAccessRelation(AccessRelation); 370 371 return Access; 372 } 373 374 /// For an llvm::Value defined in @p DefStmt, compute the RAW dependency for a 375 /// use in every instance of @p UseStmt. 376 /// 377 /// @param UseStmt Statement a scalar is used in. 378 /// @param DefStmt Statement a scalar is defined in. 379 /// 380 /// @return { DomainUse[] -> DomainDef[] } 381 isl::map computeUseToDefFlowDependency(ScopStmt *UseStmt, ScopStmt *DefStmt) { 382 // { DomainUse[] -> Scatter[] } 383 isl::map UseScatter = getScatterFor(UseStmt); 384 385 // { Zone[] -> DomainDef[] } 386 isl::map ReachDefZone = getScalarReachingDefinition(DefStmt); 387 388 // { Scatter[] -> DomainDef[] } 389 isl::map ReachDefTimepoints = 390 convertZoneToTimepoints(ReachDefZone, isl::dim::in, false, true); 391 392 // { DomainUse[] -> DomainDef[] } 393 return UseScatter.apply_range(ReachDefTimepoints); 394 } 395 396 /// Forward a load by reading from an array element that contains the same 397 /// value. Typically the location it was loaded from. 398 /// 399 /// @param TargetStmt The statement the operand tree will be copied to. 400 /// @param Inst The (possibly speculatable) instruction to forward. 401 /// @param UseStmt The statement that uses @p Inst. 402 /// @param UseLoop The loop @p Inst is used in. 403 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 404 /// A mapping from the statement instance @p Inst is used 405 /// to the statement instance it is forwarded to. 406 /// @param DefStmt The statement @p Inst is defined in. 407 /// @param DefLoop The loop which contains @p Inst. 408 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 409 /// A mapping from the statement instance @p Inst is 410 /// defined to the statement instance it is forwarded to. 411 /// @param DoIt If false, only determine whether an operand tree can be 412 /// forwarded. If true, carry out the forwarding. Do not 413 /// use DoIt==true if an operand tree is not known to be 414 /// forwardable. 415 /// 416 /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new 417 /// load. 418 /// FD_CannotForward if the pointer operand cannot be forwarded. 419 /// FD_CanForwardProfitably if @p Inst is forwardable. 420 /// FD_DidForwardTree if @p DoIt was true. 421 ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, 422 ScopStmt *UseStmt, Loop *UseLoop, 423 isl::map UseToTarget, ScopStmt *DefStmt, 424 Loop *DefLoop, isl::map DefToTarget, 425 bool DoIt) { 426 // Cannot do anything without successful known analysis. 427 if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() || 428 DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded()) 429 return FD_NotApplicable; 430 431 LoadInst *LI = dyn_cast<LoadInst>(Inst); 432 if (!LI) 433 return FD_NotApplicable; 434 435 // If the load is already in the statement, no forwarding is necessary. 436 // However, it might happen that the LoadInst is already present in the 437 // statement's instruction list. In that case we do as follows: 438 // - For the evaluation (DoIt==false), we can trivially forward it as it is 439 // benefit of forwarding an already present instruction. 440 // - For the execution (DoIt==true), prepend the instruction (to make it 441 // available to all instructions following in the instruction list), but 442 // do not add another MemoryAccess. 443 MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); 444 if (Access && !DoIt) 445 return FD_CanForwardProfitably; 446 447 ForwardingDecision OpDecision = 448 forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, 449 DefToTarget, DoIt); 450 switch (OpDecision) { 451 case FD_CannotForward: 452 assert(!DoIt); 453 return OpDecision; 454 455 case FD_CanForwardLeaf: 456 case FD_CanForwardProfitably: 457 assert(!DoIt); 458 break; 459 460 case FD_DidForwardLeaf: 461 case FD_DidForwardTree: 462 assert(DoIt); 463 break; 464 465 default: 466 llvm_unreachable("Shouldn't return this"); 467 } 468 469 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 470 471 // { DomainDef[] -> ValInst[] } 472 isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); 473 assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized"); 474 475 // { DomainTarget[] -> ValInst[] } 476 isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 477 isl::union_map TranslatedExpectedVal = 478 isl::union_map(TargetExpectedVal).apply_range(Translator); 479 480 // { DomainTarget[] -> Element[] } 481 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 482 483 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 484 if (!SameVal) 485 return FD_NotApplicable; 486 487 if (DoIt) 488 TargetStmt->prependInstruction(LI); 489 490 if (!DoIt) 491 return FD_CanForwardProfitably; 492 493 if (Access) { 494 LLVM_DEBUG( 495 dbgs() << " forwarded known load with preexisting MemoryAccess" 496 << Access << "\n"); 497 } else { 498 Access = makeReadArrayAccess(TargetStmt, LI, SameVal); 499 LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" 500 << Access << "\n"); 501 502 // { ValInst[] } 503 isl::space ValInstSpace = ExpectedVal.get_space().range(); 504 505 // After adding a new load to the SCoP, also update the Known content 506 // about it. The new load will have a known ValInst of 507 // { [DomainTarget[] -> Value[]] } 508 // but which -- because it is a copy of it -- has same value as the 509 // { [DomainDef[] -> Value[]] } 510 // that it replicates. Instead of cloning the known content of 511 // [DomainDef[] -> Value[]] 512 // for DomainTarget[], we add a 'translator' that maps 513 // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] 514 // before comparing to the known content. 515 // TODO: 'Translator' could also be used to map PHINodes to their incoming 516 // ValInsts. 517 if (ValInstSpace.is_wrapping()) { 518 // { DefDomain[] -> Value[] } 519 isl::map ValInsts = ExpectedVal.range().unwrap(); 520 521 // { DefDomain[] } 522 isl::set DefDomain = ValInsts.domain(); 523 524 // { Value[] } 525 isl::space ValSpace = ValInstSpace.unwrap().range(); 526 527 // { Value[] -> Value[] } 528 isl::map ValToVal = 529 isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); 530 531 // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } 532 isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); 533 534 Translator = Translator.add_map(LocalTranslator); 535 LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator 536 << "\n"); 537 } 538 } 539 LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal 540 << "\n"); 541 LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates 542 << "\n"); 543 assert(Access); 544 545 NumKnownLoadsForwarded++; 546 TotalKnownLoadsForwarded++; 547 return FD_DidForwardTree; 548 } 549 550 /// Forward a scalar by redirecting the access to an array element that stores 551 /// the same value. 552 /// 553 /// @param TargetStmt The statement the operand tree will be copied to. 554 /// @param Inst The scalar to forward. 555 /// @param UseStmt The statement that uses @p Inst. 556 /// @param UseLoop The loop @p Inst is used in. 557 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 558 /// A mapping from the statement instance @p Inst is used 559 /// in, to the statement instance it is forwarded to. 560 /// @param DefStmt The statement @p Inst is defined in. 561 /// @param DefLoop The loop which contains @p Inst. 562 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 563 /// A mapping from the statement instance @p Inst is 564 /// defined in, to the statement instance it is forwarded 565 /// to. 566 /// @param DoIt If false, only determine whether an operand tree can be 567 /// forwarded. If true, carry out the forwarding. Do not 568 /// use DoIt==true if an operand tree is not known to be 569 /// forwardable. 570 /// 571 /// @return FD_NotApplicable if @p Inst cannot be reloaded. 572 /// FD_CanForwardLeaf if @p Inst can be reloaded. 573 /// FD_CanForwardProfitably if @p Inst has been reloaded. 574 /// FD_DidForwardLeaf if @p DoIt was true. 575 ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, 576 ScopStmt *UseStmt, Loop *UseLoop, 577 isl::map UseToTarget, ScopStmt *DefStmt, 578 Loop *DefLoop, isl::map DefToTarget, 579 bool DoIt) { 580 // Cannot do anything without successful known analysis. 581 if (Known.is_null() || Translator.is_null() || UseToTarget.is_null() || 582 DefToTarget.is_null() || MaxOpGuard.hasQuotaExceeded()) 583 return FD_NotApplicable; 584 585 MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); 586 if (Access && Access->isLatestArrayKind()) { 587 if (DoIt) 588 return FD_DidForwardLeaf; 589 return FD_CanForwardLeaf; 590 } 591 592 // Don't spend too much time analyzing whether it can be reloaded. When 593 // carrying-out the forwarding, we cannot bail-out in the middle of the 594 // transformation. It also shouldn't take as long because some results are 595 // cached. 596 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 597 598 // { DomainDef[] -> ValInst[] } 599 isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); 600 601 // { DomainTarget[] -> ValInst[] } 602 isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); 603 isl::union_map TranslatedExpectedVal = 604 TargetExpectedVal.apply_range(Translator); 605 606 // { DomainTarget[] -> Element[] } 607 isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); 608 609 isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); 610 if (!SameVal) 611 return FD_NotApplicable; 612 613 if (!DoIt) 614 return FD_CanForwardProfitably; 615 616 if (!Access) 617 Access = TargetStmt->ensureValueRead(Inst); 618 619 simplify(SameVal); 620 Access->setNewAccessRelation(SameVal); 621 622 TotalReloads++; 623 NumReloads++; 624 return FD_DidForwardLeaf; 625 } 626 627 /// Forwards a speculatively executable instruction. 628 /// 629 /// @param TargetStmt The statement the operand tree will be copied to. 630 /// @param UseInst The (possibly speculatable) instruction to forward. 631 /// @param DefStmt The statement @p UseInst is defined in. 632 /// @param DefLoop The loop which contains @p UseInst. 633 /// @param DefToTarget { DomainDef[] -> DomainTarget[] } 634 /// A mapping from the statement instance @p UseInst is 635 /// defined to the statement instance it is forwarded to. 636 /// @param DoIt If false, only determine whether an operand tree can be 637 /// forwarded. If true, carry out the forwarding. Do not 638 /// use DoIt==true if an operand tree is not known to be 639 /// forwardable. 640 /// 641 /// @return FD_NotApplicable if @p UseInst is not speculatable. 642 /// FD_CannotForward if one of @p UseInst's operands is not 643 /// forwardable. 644 /// FD_CanForwardTree if @p UseInst is forwardable. 645 /// FD_DidForward if @p DoIt was true. 646 ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, 647 Instruction *UseInst, 648 ScopStmt *DefStmt, Loop *DefLoop, 649 isl::map DefToTarget, bool DoIt) { 650 // PHIs, unless synthesizable, are not yet supported. 651 if (isa<PHINode>(UseInst)) 652 return FD_NotApplicable; 653 654 // Compatible instructions must satisfy the following conditions: 655 // 1. Idempotent (instruction will be copied, not moved; although its 656 // original instance might be removed by simplification) 657 // 2. Not access memory (There might be memory writes between) 658 // 3. Not cause undefined behaviour (we might copy to a location when the 659 // original instruction was no executed; this is currently not possible 660 // because we do not forward PHINodes) 661 // 4. Not leak memory if executed multiple times (i.e. malloc) 662 // 663 // Instruction::mayHaveSideEffects is not sufficient because it considers 664 // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is 665 // not sufficient because it allows memory accesses. 666 if (mayBeMemoryDependent(*UseInst)) 667 return FD_NotApplicable; 668 669 if (DoIt) { 670 // To ensure the right order, prepend this instruction before its 671 // operands. This ensures that its operands are inserted before the 672 // instruction using them. 673 // TODO: The operand tree is not really a tree, but a DAG. We should be 674 // able to handle DAGs without duplication. 675 TargetStmt->prependInstruction(UseInst); 676 NumInstructionsCopied++; 677 TotalInstructionsCopied++; 678 } 679 680 for (Value *OpVal : UseInst->operand_values()) { 681 ForwardingDecision OpDecision = 682 forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DefToTarget, DoIt); 683 switch (OpDecision) { 684 case FD_CannotForward: 685 assert(!DoIt); 686 return FD_CannotForward; 687 688 case FD_CanForwardLeaf: 689 case FD_CanForwardProfitably: 690 assert(!DoIt); 691 break; 692 693 case FD_DidForwardLeaf: 694 case FD_DidForwardTree: 695 assert(DoIt); 696 break; 697 698 case FD_NotApplicable: 699 llvm_unreachable("forwardTree should never return FD_NotApplicable"); 700 } 701 } 702 703 if (DoIt) 704 return FD_DidForwardTree; 705 return FD_CanForwardProfitably; 706 } 707 708 /// Determines whether an operand tree can be forwarded or carries out a 709 /// forwarding, depending on the @p DoIt flag. 710 /// 711 /// @param TargetStmt The statement the operand tree will be copied to. 712 /// @param UseVal The value (usually an instruction) which is root of an 713 /// operand tree. 714 /// @param UseStmt The statement that uses @p UseVal. 715 /// @param UseLoop The loop @p UseVal is used in. 716 /// @param UseToTarget { DomainUse[] -> DomainTarget[] } 717 /// A mapping from the statement instance @p UseVal is used 718 /// to the statement instance it is forwarded to. 719 /// @param DoIt If false, only determine whether an operand tree can be 720 /// forwarded. If true, carry out the forwarding. Do not 721 /// use DoIt==true if an operand tree is not known to be 722 /// forwardable. 723 /// 724 /// @return If DoIt==false, return whether the operand tree can be forwarded. 725 /// If DoIt==true, return FD_DidForward. 726 ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, 727 ScopStmt *UseStmt, Loop *UseLoop, 728 isl::map UseToTarget, bool DoIt) { 729 ScopStmt *DefStmt = nullptr; 730 Loop *DefLoop = nullptr; 731 732 // { DefDomain[] -> TargetDomain[] } 733 isl::map DefToTarget; 734 735 VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); 736 switch (VUse.getKind()) { 737 case VirtualUse::Constant: 738 case VirtualUse::Block: 739 case VirtualUse::Hoisted: 740 // These can be used anywhere without special considerations. 741 if (DoIt) 742 return FD_DidForwardTree; 743 return FD_CanForwardLeaf; 744 745 case VirtualUse::Synthesizable: { 746 // ScopExpander will take care for of generating the code at the new 747 // location. 748 if (DoIt) 749 return FD_DidForwardTree; 750 751 // Check if the value is synthesizable at the new location as well. This 752 // might be possible when leaving a loop for which ScalarEvolution is 753 // unable to derive the exit value for. 754 // TODO: If there is a LCSSA PHI at the loop exit, use that one. 755 // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we 756 // do not forward past its loop header. This would require us to use a 757 // previous loop induction variable instead the current one. We currently 758 // do not allow forwarding PHI nodes, thus this should never occur (the 759 // only exception where no phi is necessary being an unreachable loop 760 // without edge from the outside). 761 VirtualUse TargetUse = VirtualUse::create( 762 S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); 763 if (TargetUse.getKind() == VirtualUse::Synthesizable) 764 return FD_CanForwardLeaf; 765 766 LLVM_DEBUG( 767 dbgs() << " Synthesizable would not be synthesizable anymore: " 768 << *UseVal << "\n"); 769 return FD_CannotForward; 770 } 771 772 case VirtualUse::ReadOnly: 773 // Note that we cannot return FD_CanForwardTree here. With a operand tree 774 // depth of 0, UseVal is the use in TargetStmt that we try to replace. 775 // With -polly-analyze-read-only-scalars=true we would ensure the 776 // existence of a MemoryAccess (which already exists for a leaf) and be 777 // removed again by tryForwardTree because it's goal is to remove this 778 // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission 779 // to do so. 780 if (!DoIt) 781 return FD_CanForwardLeaf; 782 783 // If we model read-only scalars, we need to create a MemoryAccess for it. 784 if (ModelReadOnlyScalars) 785 TargetStmt->ensureValueRead(UseVal); 786 787 NumReadOnlyCopied++; 788 TotalReadOnlyCopied++; 789 return FD_DidForwardLeaf; 790 791 case VirtualUse::Intra: 792 // Knowing that UseStmt and DefStmt are the same statement instance, just 793 // reuse the information about UseStmt for DefStmt 794 DefStmt = UseStmt; 795 DefToTarget = UseToTarget; 796 797 LLVM_FALLTHROUGH; 798 case VirtualUse::Inter: 799 Instruction *Inst = cast<Instruction>(UseVal); 800 801 if (!DefStmt) { 802 DefStmt = S->getStmtFor(Inst); 803 if (!DefStmt) 804 return FD_CannotForward; 805 } 806 807 DefLoop = LI->getLoopFor(Inst->getParent()); 808 809 if (DefToTarget.is_null() && !Known.is_null()) { 810 IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); 811 812 // { UseDomain[] -> DefDomain[] } 813 isl::map UseToDef = computeUseToDefFlowDependency(UseStmt, DefStmt); 814 815 // { DefDomain[] -> UseDomain[] -> TargetDomain[] } shortened to 816 // { DefDomain[] -> TargetDomain[] } 817 DefToTarget = UseToTarget.apply_domain(UseToDef); 818 simplify(DefToTarget); 819 } 820 821 ForwardingDecision SpeculativeResult = forwardSpeculatable( 822 TargetStmt, Inst, DefStmt, DefLoop, DefToTarget, DoIt); 823 if (SpeculativeResult != FD_NotApplicable) 824 return SpeculativeResult; 825 826 ForwardingDecision KnownResult = 827 forwardKnownLoad(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, 828 DefStmt, DefLoop, DefToTarget, DoIt); 829 if (KnownResult != FD_NotApplicable) 830 return KnownResult; 831 832 ForwardingDecision ReloadResult = 833 reloadKnownContent(TargetStmt, Inst, UseStmt, UseLoop, UseToTarget, 834 DefStmt, DefLoop, DefToTarget, DoIt); 835 if (ReloadResult != FD_NotApplicable) 836 return ReloadResult; 837 838 // When no method is found to forward the operand tree, we effectively 839 // cannot handle it. 840 LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); 841 return FD_CannotForward; 842 } 843 844 llvm_unreachable("Case unhandled"); 845 } 846 847 /// Try to forward an operand tree rooted in @p RA. 848 bool tryForwardTree(MemoryAccess *RA) { 849 assert(RA->isLatestScalarKind()); 850 LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); 851 852 ScopStmt *Stmt = RA->getStatement(); 853 Loop *InLoop = Stmt->getSurroundingLoop(); 854 855 isl::map TargetToUse; 856 if (!Known.is_null()) { 857 isl::space DomSpace = Stmt->getDomainSpace(); 858 TargetToUse = 859 isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); 860 } 861 862 ForwardingDecision Assessment = forwardTree( 863 Stmt, RA->getAccessValue(), Stmt, InLoop, TargetToUse, false); 864 assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf); 865 if (Assessment != FD_CanForwardProfitably) 866 return false; 867 868 ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, 869 InLoop, TargetToUse, true); 870 assert(((Execution == FD_DidForwardTree) || 871 (Execution == FD_DidForwardLeaf)) && 872 "A previous positive assessment must also be executable"); 873 874 if (Execution == FD_DidForwardTree) 875 Stmt->removeSingleMemoryAccess(RA); 876 return true; 877 } 878 879 /// Return which SCoP this instance is processing. 880 Scop *getScop() const { return S; } 881 882 /// Run the algorithm: Use value read accesses as operand tree roots and try 883 /// to forward them into the statement. 884 bool forwardOperandTrees() { 885 for (ScopStmt &Stmt : *S) { 886 bool StmtModified = false; 887 888 // Because we are modifying the MemoryAccess list, collect them first to 889 // avoid iterator invalidation. 890 SmallVector<MemoryAccess *, 16> Accs; 891 for (MemoryAccess *RA : Stmt) { 892 if (!RA->isRead()) 893 continue; 894 if (!RA->isLatestScalarKind()) 895 continue; 896 897 Accs.push_back(RA); 898 } 899 900 for (MemoryAccess *RA : Accs) { 901 if (tryForwardTree(RA)) { 902 Modified = true; 903 StmtModified = true; 904 NumForwardedTrees++; 905 TotalForwardedTrees++; 906 } 907 } 908 909 if (StmtModified) { 910 NumModifiedStmts++; 911 TotalModifiedStmts++; 912 } 913 } 914 915 if (Modified) 916 ScopsModified++; 917 return Modified; 918 } 919 920 /// Print the pass result, performed transformations and the SCoP after the 921 /// transformation. 922 void print(raw_ostream &OS, int Indent = 0) { 923 printStatistics(OS, Indent); 924 925 if (!Modified) { 926 // This line can easily be checked in regression tests. 927 OS << "ForwardOpTree executed, but did not modify anything\n"; 928 return; 929 } 930 931 printStatements(OS, Indent); 932 } 933 }; 934 935 /// Pass that redirects scalar reads to array elements that are known to contain 936 /// the same value. 937 /// 938 /// This reduces the number of scalar accesses and therefore potentially 939 /// increases the freedom of the scheduler. In the ideal case, all reads of a 940 /// scalar definition are redirected (We currently do not care about removing 941 /// the write in this case). This is also useful for the main DeLICM pass as 942 /// there are less scalars to be mapped. 943 class ForwardOpTree : public ScopPass { 944 private: 945 /// The pass implementation, also holding per-scop data. 946 std::unique_ptr<ForwardOpTreeImpl> Impl; 947 948 public: 949 static char ID; 950 951 explicit ForwardOpTree() : ScopPass(ID) {} 952 ForwardOpTree(const ForwardOpTree &) = delete; 953 ForwardOpTree &operator=(const ForwardOpTree &) = delete; 954 955 void getAnalysisUsage(AnalysisUsage &AU) const override { 956 AU.addRequiredTransitive<ScopInfoRegionPass>(); 957 AU.addRequired<LoopInfoWrapperPass>(); 958 AU.setPreservesAll(); 959 } 960 961 bool runOnScop(Scop &S) override { 962 // Free resources for previous SCoP's computation, if not yet done. 963 releaseMemory(); 964 965 LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 966 967 { 968 IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); 969 Impl = llvm::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard); 970 971 if (AnalyzeKnown) { 972 LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); 973 Impl->computeKnownValues(); 974 } 975 976 LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); 977 Impl->forwardOperandTrees(); 978 979 if (MaxOpGuard.hasQuotaExceeded()) { 980 LLVM_DEBUG(dbgs() << "Not all operations completed because of " 981 "max_operations exceeded\n"); 982 KnownOutOfQuota++; 983 } 984 } 985 986 LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); 987 LLVM_DEBUG(dbgs() << S); 988 989 // Update statistics 990 auto ScopStats = S.getStatistics(); 991 NumValueWrites += ScopStats.NumValueWrites; 992 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 993 NumPHIWrites += ScopStats.NumPHIWrites; 994 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 995 NumSingletonWrites += ScopStats.NumSingletonWrites; 996 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 997 998 return false; 999 } 1000 1001 void printScop(raw_ostream &OS, Scop &S) const override { 1002 if (!Impl) 1003 return; 1004 1005 assert(Impl->getScop() == &S); 1006 Impl->print(OS); 1007 } 1008 1009 void releaseMemory() override { Impl.reset(); } 1010 }; // class ForwardOpTree 1011 1012 char ForwardOpTree::ID; 1013 } // namespace 1014 1015 ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } 1016 1017 INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", 1018 "Polly - Forward operand tree", false, false) 1019 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1020 INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", 1021 "Polly - Forward operand tree", false, false) 1022