1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/Statistic.h" 54 #include "llvm/Analysis/AliasAnalysis.h" 55 #include "llvm/Analysis/Delinearization.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/IR/InstIterator.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/InitializePasses.h" 63 #include "llvm/Support/CommandLine.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/raw_ostream.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "da" 71 72 //===----------------------------------------------------------------------===// 73 // statistics 74 75 STATISTIC(TotalArrayPairs, "Array pairs tested"); 76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 79 STATISTIC(ZIVapplications, "ZIV applications"); 80 STATISTIC(ZIVindependence, "ZIV independence"); 81 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 83 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 87 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 89 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 97 STATISTIC(DeltaApplications, "Delta applications"); 98 STATISTIC(DeltaSuccesses, "Delta successes"); 99 STATISTIC(DeltaIndependence, "Delta independence"); 100 STATISTIC(DeltaPropagations, "Delta propagations"); 101 STATISTIC(GCDapplications, "GCD applications"); 102 STATISTIC(GCDsuccesses, "GCD successes"); 103 STATISTIC(GCDindependence, "GCD independence"); 104 STATISTIC(BanerjeeApplications, "Banerjee applications"); 105 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 106 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 107 108 static cl::opt<bool> 109 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 110 cl::desc("Try to delinearize array references.")); 111 static cl::opt<bool> DisableDelinearizationChecks( 112 "da-disable-delinearization-checks", cl::init(false), cl::Hidden, 113 cl::ZeroOrMore, 114 cl::desc( 115 "Disable checks that try to statically verify validity of " 116 "delinearized subscripts. Enabling this option may result in incorrect " 117 "dependence vectors for languages that allow the subscript of one " 118 "dimension to underflow or overflow into another dimension.")); 119 120 static cl::opt<unsigned> MIVMaxLevelThreshold( 121 "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore, 122 cl::desc("Maximum depth allowed for the recursive algorithm used to " 123 "explore MIV direction vectors.")); 124 125 //===----------------------------------------------------------------------===// 126 // basics 127 128 DependenceAnalysis::Result 129 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 130 auto &AA = FAM.getResult<AAManager>(F); 131 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 132 auto &LI = FAM.getResult<LoopAnalysis>(F); 133 return DependenceInfo(&F, &AA, &SE, &LI); 134 } 135 136 AnalysisKey DependenceAnalysis::Key; 137 138 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 139 "Dependence Analysis", true, true) 140 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 141 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 142 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 143 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 144 true, true) 145 146 char DependenceAnalysisWrapperPass::ID = 0; 147 148 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 149 : FunctionPass(ID) { 150 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 151 } 152 153 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 154 return new DependenceAnalysisWrapperPass(); 155 } 156 157 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 158 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 159 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 160 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 161 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 162 return false; 163 } 164 165 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 166 167 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 168 169 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 170 AU.setPreservesAll(); 171 AU.addRequiredTransitive<AAResultsWrapperPass>(); 172 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 173 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 174 } 175 176 // Used to test the dependence analyzer. 177 // Looks through the function, noting instructions that may access memory. 178 // Calls depends() on every possible pair and prints out the result. 179 // Ignores all other instructions. 180 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 181 auto *F = DA->getFunction(); 182 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 183 ++SrcI) { 184 if (SrcI->mayReadOrWriteMemory()) { 185 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 186 DstI != DstE; ++DstI) { 187 if (DstI->mayReadOrWriteMemory()) { 188 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 189 OS << " da analyze - "; 190 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 191 D->dump(OS); 192 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 193 if (D->isSplitable(Level)) { 194 OS << " da analyze - split level = " << Level; 195 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 196 OS << "!\n"; 197 } 198 } 199 } 200 else 201 OS << "none!\n"; 202 } 203 } 204 } 205 } 206 } 207 208 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 209 const Module *) const { 210 dumpExampleDependence(OS, info.get()); 211 } 212 213 PreservedAnalyses 214 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 215 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 216 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 217 return PreservedAnalyses::all(); 218 } 219 220 //===----------------------------------------------------------------------===// 221 // Dependence methods 222 223 // Returns true if this is an input dependence. 224 bool Dependence::isInput() const { 225 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 226 } 227 228 229 // Returns true if this is an output dependence. 230 bool Dependence::isOutput() const { 231 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 232 } 233 234 235 // Returns true if this is an flow (aka true) dependence. 236 bool Dependence::isFlow() const { 237 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 238 } 239 240 241 // Returns true if this is an anti dependence. 242 bool Dependence::isAnti() const { 243 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 244 } 245 246 247 // Returns true if a particular level is scalar; that is, 248 // if no subscript in the source or destination mention the induction 249 // variable associated with the loop at this level. 250 // Leave this out of line, so it will serve as a virtual method anchor 251 bool Dependence::isScalar(unsigned level) const { 252 return false; 253 } 254 255 256 //===----------------------------------------------------------------------===// 257 // FullDependence methods 258 259 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 260 bool PossiblyLoopIndependent, 261 unsigned CommonLevels) 262 : Dependence(Source, Destination), Levels(CommonLevels), 263 LoopIndependent(PossiblyLoopIndependent) { 264 Consistent = true; 265 if (CommonLevels) 266 DV = std::make_unique<DVEntry[]>(CommonLevels); 267 } 268 269 // The rest are simple getters that hide the implementation. 270 271 // getDirection - Returns the direction associated with a particular level. 272 unsigned FullDependence::getDirection(unsigned Level) const { 273 assert(0 < Level && Level <= Levels && "Level out of range"); 274 return DV[Level - 1].Direction; 275 } 276 277 278 // Returns the distance (or NULL) associated with a particular level. 279 const SCEV *FullDependence::getDistance(unsigned Level) const { 280 assert(0 < Level && Level <= Levels && "Level out of range"); 281 return DV[Level - 1].Distance; 282 } 283 284 285 // Returns true if a particular level is scalar; that is, 286 // if no subscript in the source or destination mention the induction 287 // variable associated with the loop at this level. 288 bool FullDependence::isScalar(unsigned Level) const { 289 assert(0 < Level && Level <= Levels && "Level out of range"); 290 return DV[Level - 1].Scalar; 291 } 292 293 294 // Returns true if peeling the first iteration from this loop 295 // will break this dependence. 296 bool FullDependence::isPeelFirst(unsigned Level) const { 297 assert(0 < Level && Level <= Levels && "Level out of range"); 298 return DV[Level - 1].PeelFirst; 299 } 300 301 302 // Returns true if peeling the last iteration from this loop 303 // will break this dependence. 304 bool FullDependence::isPeelLast(unsigned Level) const { 305 assert(0 < Level && Level <= Levels && "Level out of range"); 306 return DV[Level - 1].PeelLast; 307 } 308 309 310 // Returns true if splitting this loop will break the dependence. 311 bool FullDependence::isSplitable(unsigned Level) const { 312 assert(0 < Level && Level <= Levels && "Level out of range"); 313 return DV[Level - 1].Splitable; 314 } 315 316 317 //===----------------------------------------------------------------------===// 318 // DependenceInfo::Constraint methods 319 320 // If constraint is a point <X, Y>, returns X. 321 // Otherwise assert. 322 const SCEV *DependenceInfo::Constraint::getX() const { 323 assert(Kind == Point && "Kind should be Point"); 324 return A; 325 } 326 327 328 // If constraint is a point <X, Y>, returns Y. 329 // Otherwise assert. 330 const SCEV *DependenceInfo::Constraint::getY() const { 331 assert(Kind == Point && "Kind should be Point"); 332 return B; 333 } 334 335 336 // If constraint is a line AX + BY = C, returns A. 337 // Otherwise assert. 338 const SCEV *DependenceInfo::Constraint::getA() const { 339 assert((Kind == Line || Kind == Distance) && 340 "Kind should be Line (or Distance)"); 341 return A; 342 } 343 344 345 // If constraint is a line AX + BY = C, returns B. 346 // Otherwise assert. 347 const SCEV *DependenceInfo::Constraint::getB() const { 348 assert((Kind == Line || Kind == Distance) && 349 "Kind should be Line (or Distance)"); 350 return B; 351 } 352 353 354 // If constraint is a line AX + BY = C, returns C. 355 // Otherwise assert. 356 const SCEV *DependenceInfo::Constraint::getC() const { 357 assert((Kind == Line || Kind == Distance) && 358 "Kind should be Line (or Distance)"); 359 return C; 360 } 361 362 363 // If constraint is a distance, returns D. 364 // Otherwise assert. 365 const SCEV *DependenceInfo::Constraint::getD() const { 366 assert(Kind == Distance && "Kind should be Distance"); 367 return SE->getNegativeSCEV(C); 368 } 369 370 371 // Returns the loop associated with this constraint. 372 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 373 assert((Kind == Distance || Kind == Line || Kind == Point) && 374 "Kind should be Distance, Line, or Point"); 375 return AssociatedLoop; 376 } 377 378 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 379 const Loop *CurLoop) { 380 Kind = Point; 381 A = X; 382 B = Y; 383 AssociatedLoop = CurLoop; 384 } 385 386 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 387 const SCEV *CC, const Loop *CurLoop) { 388 Kind = Line; 389 A = AA; 390 B = BB; 391 C = CC; 392 AssociatedLoop = CurLoop; 393 } 394 395 void DependenceInfo::Constraint::setDistance(const SCEV *D, 396 const Loop *CurLoop) { 397 Kind = Distance; 398 A = SE->getOne(D->getType()); 399 B = SE->getNegativeSCEV(A); 400 C = SE->getNegativeSCEV(D); 401 AssociatedLoop = CurLoop; 402 } 403 404 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 405 406 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 407 SE = NewSE; 408 Kind = Any; 409 } 410 411 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 412 // For debugging purposes. Dumps the constraint out to OS. 413 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 414 if (isEmpty()) 415 OS << " Empty\n"; 416 else if (isAny()) 417 OS << " Any\n"; 418 else if (isPoint()) 419 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 420 else if (isDistance()) 421 OS << " Distance is " << *getD() << 422 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 423 else if (isLine()) 424 OS << " Line is " << *getA() << "*X + " << 425 *getB() << "*Y = " << *getC() << "\n"; 426 else 427 llvm_unreachable("unknown constraint type in Constraint::dump"); 428 } 429 #endif 430 431 432 // Updates X with the intersection 433 // of the Constraints X and Y. Returns true if X has changed. 434 // Corresponds to Figure 4 from the paper 435 // 436 // Practical Dependence Testing 437 // Goff, Kennedy, Tseng 438 // PLDI 1991 439 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 440 ++DeltaApplications; 441 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 442 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 443 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 444 assert(!Y->isPoint() && "Y must not be a Point"); 445 if (X->isAny()) { 446 if (Y->isAny()) 447 return false; 448 *X = *Y; 449 return true; 450 } 451 if (X->isEmpty()) 452 return false; 453 if (Y->isEmpty()) { 454 X->setEmpty(); 455 return true; 456 } 457 458 if (X->isDistance() && Y->isDistance()) { 459 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 460 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 461 return false; 462 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 463 X->setEmpty(); 464 ++DeltaSuccesses; 465 return true; 466 } 467 // Hmmm, interesting situation. 468 // I guess if either is constant, keep it and ignore the other. 469 if (isa<SCEVConstant>(Y->getD())) { 470 *X = *Y; 471 return true; 472 } 473 return false; 474 } 475 476 // At this point, the pseudo-code in Figure 4 of the paper 477 // checks if (X->isPoint() && Y->isPoint()). 478 // This case can't occur in our implementation, 479 // since a Point can only arise as the result of intersecting 480 // two Line constraints, and the right-hand value, Y, is never 481 // the result of an intersection. 482 assert(!(X->isPoint() && Y->isPoint()) && 483 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 484 485 if (X->isLine() && Y->isLine()) { 486 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 487 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 488 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 489 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 490 // slopes are equal, so lines are parallel 491 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 492 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 493 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 494 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 495 return false; 496 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 497 X->setEmpty(); 498 ++DeltaSuccesses; 499 return true; 500 } 501 return false; 502 } 503 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 504 // slopes differ, so lines intersect 505 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 506 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 507 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 508 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 509 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 510 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 511 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 512 const SCEVConstant *C1A2_C2A1 = 513 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 514 const SCEVConstant *C1B2_C2B1 = 515 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 516 const SCEVConstant *A1B2_A2B1 = 517 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 518 const SCEVConstant *A2B1_A1B2 = 519 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 520 if (!C1B2_C2B1 || !C1A2_C2A1 || 521 !A1B2_A2B1 || !A2B1_A1B2) 522 return false; 523 APInt Xtop = C1B2_C2B1->getAPInt(); 524 APInt Xbot = A1B2_A2B1->getAPInt(); 525 APInt Ytop = C1A2_C2A1->getAPInt(); 526 APInt Ybot = A2B1_A1B2->getAPInt(); 527 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 528 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 529 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 530 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 531 APInt Xq = Xtop; // these need to be initialized, even 532 APInt Xr = Xtop; // though they're just going to be overwritten 533 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 534 APInt Yq = Ytop; 535 APInt Yr = Ytop; 536 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 537 if (Xr != 0 || Yr != 0) { 538 X->setEmpty(); 539 ++DeltaSuccesses; 540 return true; 541 } 542 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 543 if (Xq.slt(0) || Yq.slt(0)) { 544 X->setEmpty(); 545 ++DeltaSuccesses; 546 return true; 547 } 548 if (const SCEVConstant *CUB = 549 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 550 const APInt &UpperBound = CUB->getAPInt(); 551 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 552 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 553 X->setEmpty(); 554 ++DeltaSuccesses; 555 return true; 556 } 557 } 558 X->setPoint(SE->getConstant(Xq), 559 SE->getConstant(Yq), 560 X->getAssociatedLoop()); 561 ++DeltaSuccesses; 562 return true; 563 } 564 return false; 565 } 566 567 // if (X->isLine() && Y->isPoint()) This case can't occur. 568 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 569 570 if (X->isPoint() && Y->isLine()) { 571 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 572 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 573 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 574 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 575 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 576 return false; 577 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 578 X->setEmpty(); 579 ++DeltaSuccesses; 580 return true; 581 } 582 return false; 583 } 584 585 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 586 return false; 587 } 588 589 590 //===----------------------------------------------------------------------===// 591 // DependenceInfo methods 592 593 // For debugging purposes. Dumps a dependence to OS. 594 void Dependence::dump(raw_ostream &OS) const { 595 bool Splitable = false; 596 if (isConfused()) 597 OS << "confused"; 598 else { 599 if (isConsistent()) 600 OS << "consistent "; 601 if (isFlow()) 602 OS << "flow"; 603 else if (isOutput()) 604 OS << "output"; 605 else if (isAnti()) 606 OS << "anti"; 607 else if (isInput()) 608 OS << "input"; 609 unsigned Levels = getLevels(); 610 OS << " ["; 611 for (unsigned II = 1; II <= Levels; ++II) { 612 if (isSplitable(II)) 613 Splitable = true; 614 if (isPeelFirst(II)) 615 OS << 'p'; 616 const SCEV *Distance = getDistance(II); 617 if (Distance) 618 OS << *Distance; 619 else if (isScalar(II)) 620 OS << "S"; 621 else { 622 unsigned Direction = getDirection(II); 623 if (Direction == DVEntry::ALL) 624 OS << "*"; 625 else { 626 if (Direction & DVEntry::LT) 627 OS << "<"; 628 if (Direction & DVEntry::EQ) 629 OS << "="; 630 if (Direction & DVEntry::GT) 631 OS << ">"; 632 } 633 } 634 if (isPeelLast(II)) 635 OS << 'p'; 636 if (II < Levels) 637 OS << " "; 638 } 639 if (isLoopIndependent()) 640 OS << "|<"; 641 OS << "]"; 642 if (Splitable) 643 OS << " splitable"; 644 } 645 OS << "!\n"; 646 } 647 648 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 649 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 650 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 651 // Otherwise the underlying objects are checked to see if they point to 652 // different identifiable objects. 653 static AliasResult underlyingObjectsAlias(AAResults *AA, 654 const DataLayout &DL, 655 const MemoryLocation &LocA, 656 const MemoryLocation &LocB) { 657 // Check the original locations (minus size) for noalias, which can happen for 658 // tbaa, incompatible underlying object locations, etc. 659 MemoryLocation LocAS = 660 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 661 MemoryLocation LocBS = 662 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 663 if (AA->isNoAlias(LocAS, LocBS)) 664 return AliasResult::NoAlias; 665 666 // Check the underlying objects are the same 667 const Value *AObj = getUnderlyingObject(LocA.Ptr); 668 const Value *BObj = getUnderlyingObject(LocB.Ptr); 669 670 // If the underlying objects are the same, they must alias 671 if (AObj == BObj) 672 return AliasResult::MustAlias; 673 674 // We may have hit the recursion limit for underlying objects, or have 675 // underlying objects where we don't know they will alias. 676 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 677 return AliasResult::MayAlias; 678 679 // Otherwise we know the objects are different and both identified objects so 680 // must not alias. 681 return AliasResult::NoAlias; 682 } 683 684 685 // Returns true if the load or store can be analyzed. Atomic and volatile 686 // operations have properties which this analysis does not understand. 687 static 688 bool isLoadOrStore(const Instruction *I) { 689 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 690 return LI->isUnordered(); 691 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 692 return SI->isUnordered(); 693 return false; 694 } 695 696 697 // Examines the loop nesting of the Src and Dst 698 // instructions and establishes their shared loops. Sets the variables 699 // CommonLevels, SrcLevels, and MaxLevels. 700 // The source and destination instructions needn't be contained in the same 701 // loop. The routine establishNestingLevels finds the level of most deeply 702 // nested loop that contains them both, CommonLevels. An instruction that's 703 // not contained in a loop is at level = 0. MaxLevels is equal to the level 704 // of the source plus the level of the destination, minus CommonLevels. 705 // This lets us allocate vectors MaxLevels in length, with room for every 706 // distinct loop referenced in both the source and destination subscripts. 707 // The variable SrcLevels is the nesting depth of the source instruction. 708 // It's used to help calculate distinct loops referenced by the destination. 709 // Here's the map from loops to levels: 710 // 0 - unused 711 // 1 - outermost common loop 712 // ... - other common loops 713 // CommonLevels - innermost common loop 714 // ... - loops containing Src but not Dst 715 // SrcLevels - innermost loop containing Src but not Dst 716 // ... - loops containing Dst but not Src 717 // MaxLevels - innermost loops containing Dst but not Src 718 // Consider the follow code fragment: 719 // for (a = ...) { 720 // for (b = ...) { 721 // for (c = ...) { 722 // for (d = ...) { 723 // A[] = ...; 724 // } 725 // } 726 // for (e = ...) { 727 // for (f = ...) { 728 // for (g = ...) { 729 // ... = A[]; 730 // } 731 // } 732 // } 733 // } 734 // } 735 // If we're looking at the possibility of a dependence between the store 736 // to A (the Src) and the load from A (the Dst), we'll note that they 737 // have 2 loops in common, so CommonLevels will equal 2 and the direction 738 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 739 // A map from loop names to loop numbers would look like 740 // a - 1 741 // b - 2 = CommonLevels 742 // c - 3 743 // d - 4 = SrcLevels 744 // e - 5 745 // f - 6 746 // g - 7 = MaxLevels 747 void DependenceInfo::establishNestingLevels(const Instruction *Src, 748 const Instruction *Dst) { 749 const BasicBlock *SrcBlock = Src->getParent(); 750 const BasicBlock *DstBlock = Dst->getParent(); 751 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 752 unsigned DstLevel = LI->getLoopDepth(DstBlock); 753 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 754 const Loop *DstLoop = LI->getLoopFor(DstBlock); 755 SrcLevels = SrcLevel; 756 MaxLevels = SrcLevel + DstLevel; 757 while (SrcLevel > DstLevel) { 758 SrcLoop = SrcLoop->getParentLoop(); 759 SrcLevel--; 760 } 761 while (DstLevel > SrcLevel) { 762 DstLoop = DstLoop->getParentLoop(); 763 DstLevel--; 764 } 765 while (SrcLoop != DstLoop) { 766 SrcLoop = SrcLoop->getParentLoop(); 767 DstLoop = DstLoop->getParentLoop(); 768 SrcLevel--; 769 } 770 CommonLevels = SrcLevel; 771 MaxLevels -= CommonLevels; 772 } 773 774 775 // Given one of the loops containing the source, return 776 // its level index in our numbering scheme. 777 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 778 return SrcLoop->getLoopDepth(); 779 } 780 781 782 // Given one of the loops containing the destination, 783 // return its level index in our numbering scheme. 784 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 785 unsigned D = DstLoop->getLoopDepth(); 786 if (D > CommonLevels) 787 return D - CommonLevels + SrcLevels; 788 else 789 return D; 790 } 791 792 793 // Returns true if Expression is loop invariant in LoopNest. 794 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 795 const Loop *LoopNest) const { 796 if (!LoopNest) 797 return true; 798 return SE->isLoopInvariant(Expression, LoopNest) && 799 isLoopInvariant(Expression, LoopNest->getParentLoop()); 800 } 801 802 803 804 // Finds the set of loops from the LoopNest that 805 // have a level <= CommonLevels and are referred to by the SCEV Expression. 806 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 807 const Loop *LoopNest, 808 SmallBitVector &Loops) const { 809 while (LoopNest) { 810 unsigned Level = LoopNest->getLoopDepth(); 811 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 812 Loops.set(Level); 813 LoopNest = LoopNest->getParentLoop(); 814 } 815 } 816 817 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 818 819 unsigned widestWidthSeen = 0; 820 Type *widestType; 821 822 // Go through each pair and find the widest bit to which we need 823 // to extend all of them. 824 for (Subscript *Pair : Pairs) { 825 const SCEV *Src = Pair->Src; 826 const SCEV *Dst = Pair->Dst; 827 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 828 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 829 if (SrcTy == nullptr || DstTy == nullptr) { 830 assert(SrcTy == DstTy && "This function only unify integer types and " 831 "expect Src and Dst share the same type " 832 "otherwise."); 833 continue; 834 } 835 if (SrcTy->getBitWidth() > widestWidthSeen) { 836 widestWidthSeen = SrcTy->getBitWidth(); 837 widestType = SrcTy; 838 } 839 if (DstTy->getBitWidth() > widestWidthSeen) { 840 widestWidthSeen = DstTy->getBitWidth(); 841 widestType = DstTy; 842 } 843 } 844 845 846 assert(widestWidthSeen > 0); 847 848 // Now extend each pair to the widest seen. 849 for (Subscript *Pair : Pairs) { 850 const SCEV *Src = Pair->Src; 851 const SCEV *Dst = Pair->Dst; 852 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 853 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 854 if (SrcTy == nullptr || DstTy == nullptr) { 855 assert(SrcTy == DstTy && "This function only unify integer types and " 856 "expect Src and Dst share the same type " 857 "otherwise."); 858 continue; 859 } 860 if (SrcTy->getBitWidth() < widestWidthSeen) 861 // Sign-extend Src to widestType 862 Pair->Src = SE->getSignExtendExpr(Src, widestType); 863 if (DstTy->getBitWidth() < widestWidthSeen) { 864 // Sign-extend Dst to widestType 865 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 866 } 867 } 868 } 869 870 // removeMatchingExtensions - Examines a subscript pair. 871 // If the source and destination are identically sign (or zero) 872 // extended, it strips off the extension in an effect to simplify 873 // the actual analysis. 874 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 875 const SCEV *Src = Pair->Src; 876 const SCEV *Dst = Pair->Dst; 877 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 878 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 879 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 880 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 881 const SCEV *SrcCastOp = SrcCast->getOperand(); 882 const SCEV *DstCastOp = DstCast->getOperand(); 883 if (SrcCastOp->getType() == DstCastOp->getType()) { 884 Pair->Src = SrcCastOp; 885 Pair->Dst = DstCastOp; 886 } 887 } 888 } 889 890 // Examine the scev and return true iff it's linear. 891 // Collect any loops mentioned in the set of "Loops". 892 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 893 SmallBitVector &Loops, bool IsSrc) { 894 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 895 if (!AddRec) 896 return isLoopInvariant(Expr, LoopNest); 897 const SCEV *Start = AddRec->getStart(); 898 const SCEV *Step = AddRec->getStepRecurrence(*SE); 899 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 900 if (!isa<SCEVCouldNotCompute>(UB)) { 901 if (SE->getTypeSizeInBits(Start->getType()) < 902 SE->getTypeSizeInBits(UB->getType())) { 903 if (!AddRec->getNoWrapFlags()) 904 return false; 905 } 906 } 907 if (!isLoopInvariant(Step, LoopNest)) 908 return false; 909 if (IsSrc) 910 Loops.set(mapSrcLoop(AddRec->getLoop())); 911 else 912 Loops.set(mapDstLoop(AddRec->getLoop())); 913 return checkSubscript(Start, LoopNest, Loops, IsSrc); 914 } 915 916 // Examine the scev and return true iff it's linear. 917 // Collect any loops mentioned in the set of "Loops". 918 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 919 SmallBitVector &Loops) { 920 return checkSubscript(Src, LoopNest, Loops, true); 921 } 922 923 // Examine the scev and return true iff it's linear. 924 // Collect any loops mentioned in the set of "Loops". 925 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 926 SmallBitVector &Loops) { 927 return checkSubscript(Dst, LoopNest, Loops, false); 928 } 929 930 931 // Examines the subscript pair (the Src and Dst SCEVs) 932 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 933 // Collects the associated loops in a set. 934 DependenceInfo::Subscript::ClassificationKind 935 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 936 const SCEV *Dst, const Loop *DstLoopNest, 937 SmallBitVector &Loops) { 938 SmallBitVector SrcLoops(MaxLevels + 1); 939 SmallBitVector DstLoops(MaxLevels + 1); 940 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 941 return Subscript::NonLinear; 942 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 943 return Subscript::NonLinear; 944 Loops = SrcLoops; 945 Loops |= DstLoops; 946 unsigned N = Loops.count(); 947 if (N == 0) 948 return Subscript::ZIV; 949 if (N == 1) 950 return Subscript::SIV; 951 if (N == 2 && (SrcLoops.count() == 0 || 952 DstLoops.count() == 0 || 953 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 954 return Subscript::RDIV; 955 return Subscript::MIV; 956 } 957 958 959 // A wrapper around SCEV::isKnownPredicate. 960 // Looks for cases where we're interested in comparing for equality. 961 // If both X and Y have been identically sign or zero extended, 962 // it strips off the (confusing) extensions before invoking 963 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 964 // will be similarly updated. 965 // 966 // If SCEV::isKnownPredicate can't prove the predicate, 967 // we try simple subtraction, which seems to help in some cases 968 // involving symbolics. 969 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 970 const SCEV *Y) const { 971 if (Pred == CmpInst::ICMP_EQ || 972 Pred == CmpInst::ICMP_NE) { 973 if ((isa<SCEVSignExtendExpr>(X) && 974 isa<SCEVSignExtendExpr>(Y)) || 975 (isa<SCEVZeroExtendExpr>(X) && 976 isa<SCEVZeroExtendExpr>(Y))) { 977 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 978 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 979 const SCEV *Xop = CX->getOperand(); 980 const SCEV *Yop = CY->getOperand(); 981 if (Xop->getType() == Yop->getType()) { 982 X = Xop; 983 Y = Yop; 984 } 985 } 986 } 987 if (SE->isKnownPredicate(Pred, X, Y)) 988 return true; 989 // If SE->isKnownPredicate can't prove the condition, 990 // we try the brute-force approach of subtracting 991 // and testing the difference. 992 // By testing with SE->isKnownPredicate first, we avoid 993 // the possibility of overflow when the arguments are constants. 994 const SCEV *Delta = SE->getMinusSCEV(X, Y); 995 switch (Pred) { 996 case CmpInst::ICMP_EQ: 997 return Delta->isZero(); 998 case CmpInst::ICMP_NE: 999 return SE->isKnownNonZero(Delta); 1000 case CmpInst::ICMP_SGE: 1001 return SE->isKnownNonNegative(Delta); 1002 case CmpInst::ICMP_SLE: 1003 return SE->isKnownNonPositive(Delta); 1004 case CmpInst::ICMP_SGT: 1005 return SE->isKnownPositive(Delta); 1006 case CmpInst::ICMP_SLT: 1007 return SE->isKnownNegative(Delta); 1008 default: 1009 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1010 } 1011 } 1012 1013 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1014 /// with some extra checking if S is an AddRec and we can prove less-than using 1015 /// the loop bounds. 1016 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1017 // First unify to the same type 1018 auto *SType = dyn_cast<IntegerType>(S->getType()); 1019 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1020 if (!SType || !SizeType) 1021 return false; 1022 Type *MaxType = 1023 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1024 S = SE->getTruncateOrZeroExtend(S, MaxType); 1025 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1026 1027 // Special check for addrecs using BE taken count 1028 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1029 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1030 if (AddRec->isAffine()) { 1031 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1032 if (!isa<SCEVCouldNotCompute>(BECount)) { 1033 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1034 if (SE->isKnownNegative(Limit)) 1035 return true; 1036 } 1037 } 1038 } 1039 1040 // Check using normal isKnownNegative 1041 const SCEV *LimitedBound = 1042 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1043 return SE->isKnownNegative(LimitedBound); 1044 } 1045 1046 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1047 bool Inbounds = false; 1048 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1049 Inbounds = SrcGEP->isInBounds(); 1050 if (Inbounds) { 1051 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1052 if (AddRec->isAffine()) { 1053 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1054 // If both parts are NonNegative, the end result will be NonNegative 1055 if (SE->isKnownNonNegative(AddRec->getStart()) && 1056 SE->isKnownNonNegative(AddRec->getOperand(1))) 1057 return true; 1058 } 1059 } 1060 } 1061 1062 return SE->isKnownNonNegative(S); 1063 } 1064 1065 // All subscripts are all the same type. 1066 // Loop bound may be smaller (e.g., a char). 1067 // Should zero extend loop bound, since it's always >= 0. 1068 // This routine collects upper bound and extends or truncates if needed. 1069 // Truncating is safe when subscripts are known not to wrap. Cases without 1070 // nowrap flags should have been rejected earlier. 1071 // Return null if no bound available. 1072 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1073 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1074 const SCEV *UB = SE->getBackedgeTakenCount(L); 1075 return SE->getTruncateOrZeroExtend(UB, T); 1076 } 1077 return nullptr; 1078 } 1079 1080 1081 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1082 // If the cast fails, returns NULL. 1083 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1084 Type *T) const { 1085 if (const SCEV *UB = collectUpperBound(L, T)) 1086 return dyn_cast<SCEVConstant>(UB); 1087 return nullptr; 1088 } 1089 1090 1091 // testZIV - 1092 // When we have a pair of subscripts of the form [c1] and [c2], 1093 // where c1 and c2 are both loop invariant, we attack it using 1094 // the ZIV test. Basically, we test by comparing the two values, 1095 // but there are actually three possible results: 1096 // 1) the values are equal, so there's a dependence 1097 // 2) the values are different, so there's no dependence 1098 // 3) the values might be equal, so we have to assume a dependence. 1099 // 1100 // Return true if dependence disproved. 1101 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1102 FullDependence &Result) const { 1103 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1104 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1105 ++ZIVapplications; 1106 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1107 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1108 return false; // provably dependent 1109 } 1110 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1111 LLVM_DEBUG(dbgs() << " provably independent\n"); 1112 ++ZIVindependence; 1113 return true; // provably independent 1114 } 1115 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1116 Result.Consistent = false; 1117 return false; // possibly dependent 1118 } 1119 1120 1121 // strongSIVtest - 1122 // From the paper, Practical Dependence Testing, Section 4.2.1 1123 // 1124 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1125 // where i is an induction variable, c1 and c2 are loop invariant, 1126 // and a is a constant, we can solve it exactly using the Strong SIV test. 1127 // 1128 // Can prove independence. Failing that, can compute distance (and direction). 1129 // In the presence of symbolic terms, we can sometimes make progress. 1130 // 1131 // If there's a dependence, 1132 // 1133 // c1 + a*i = c2 + a*i' 1134 // 1135 // The dependence distance is 1136 // 1137 // d = i' - i = (c1 - c2)/a 1138 // 1139 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1140 // loop's upper bound. If a dependence exists, the dependence direction is 1141 // defined as 1142 // 1143 // { < if d > 0 1144 // direction = { = if d = 0 1145 // { > if d < 0 1146 // 1147 // Return true if dependence disproved. 1148 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1149 const SCEV *DstConst, const Loop *CurLoop, 1150 unsigned Level, FullDependence &Result, 1151 Constraint &NewConstraint) const { 1152 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1153 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1154 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1155 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1156 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1157 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1158 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1159 ++StrongSIVapplications; 1160 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1161 Level--; 1162 1163 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1164 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1165 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1166 1167 // check that |Delta| < iteration count 1168 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1169 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1170 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1171 const SCEV *AbsDelta = 1172 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1173 const SCEV *AbsCoeff = 1174 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1175 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1176 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1177 // Distance greater than trip count - no dependence 1178 ++StrongSIVindependence; 1179 ++StrongSIVsuccesses; 1180 return true; 1181 } 1182 } 1183 1184 // Can we compute distance? 1185 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1186 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1187 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1188 APInt Distance = ConstDelta; // these need to be initialized 1189 APInt Remainder = ConstDelta; 1190 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1191 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1192 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1193 // Make sure Coeff divides Delta exactly 1194 if (Remainder != 0) { 1195 // Coeff doesn't divide Distance, no dependence 1196 ++StrongSIVindependence; 1197 ++StrongSIVsuccesses; 1198 return true; 1199 } 1200 Result.DV[Level].Distance = SE->getConstant(Distance); 1201 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1202 if (Distance.sgt(0)) 1203 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1204 else if (Distance.slt(0)) 1205 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1206 else 1207 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1208 ++StrongSIVsuccesses; 1209 } 1210 else if (Delta->isZero()) { 1211 // since 0/X == 0 1212 Result.DV[Level].Distance = Delta; 1213 NewConstraint.setDistance(Delta, CurLoop); 1214 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1215 ++StrongSIVsuccesses; 1216 } 1217 else { 1218 if (Coeff->isOne()) { 1219 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1220 Result.DV[Level].Distance = Delta; // since X/1 == X 1221 NewConstraint.setDistance(Delta, CurLoop); 1222 } 1223 else { 1224 Result.Consistent = false; 1225 NewConstraint.setLine(Coeff, 1226 SE->getNegativeSCEV(Coeff), 1227 SE->getNegativeSCEV(Delta), CurLoop); 1228 } 1229 1230 // maybe we can get a useful direction 1231 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1232 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1233 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1234 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1235 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1236 // The double negatives above are confusing. 1237 // It helps to read !SE->isKnownNonZero(Delta) 1238 // as "Delta might be Zero" 1239 unsigned NewDirection = Dependence::DVEntry::NONE; 1240 if ((DeltaMaybePositive && CoeffMaybePositive) || 1241 (DeltaMaybeNegative && CoeffMaybeNegative)) 1242 NewDirection = Dependence::DVEntry::LT; 1243 if (DeltaMaybeZero) 1244 NewDirection |= Dependence::DVEntry::EQ; 1245 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1246 (DeltaMaybePositive && CoeffMaybeNegative)) 1247 NewDirection |= Dependence::DVEntry::GT; 1248 if (NewDirection < Result.DV[Level].Direction) 1249 ++StrongSIVsuccesses; 1250 Result.DV[Level].Direction &= NewDirection; 1251 } 1252 return false; 1253 } 1254 1255 1256 // weakCrossingSIVtest - 1257 // From the paper, Practical Dependence Testing, Section 4.2.2 1258 // 1259 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1260 // where i is an induction variable, c1 and c2 are loop invariant, 1261 // and a is a constant, we can solve it exactly using the 1262 // Weak-Crossing SIV test. 1263 // 1264 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1265 // the two lines, where i = i', yielding 1266 // 1267 // c1 + a*i = c2 - a*i 1268 // 2a*i = c2 - c1 1269 // i = (c2 - c1)/2a 1270 // 1271 // If i < 0, there is no dependence. 1272 // If i > upperbound, there is no dependence. 1273 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1274 // If i = upperbound, there's a dependence with distance = 0. 1275 // If i is integral, there's a dependence (all directions). 1276 // If the non-integer part = 1/2, there's a dependence (<> directions). 1277 // Otherwise, there's no dependence. 1278 // 1279 // Can prove independence. Failing that, 1280 // can sometimes refine the directions. 1281 // Can determine iteration for splitting. 1282 // 1283 // Return true if dependence disproved. 1284 bool DependenceInfo::weakCrossingSIVtest( 1285 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1286 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1287 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1288 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1289 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1290 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1291 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1292 ++WeakCrossingSIVapplications; 1293 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1294 Level--; 1295 Result.Consistent = false; 1296 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1297 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1298 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1299 if (Delta->isZero()) { 1300 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1301 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1302 ++WeakCrossingSIVsuccesses; 1303 if (!Result.DV[Level].Direction) { 1304 ++WeakCrossingSIVindependence; 1305 return true; 1306 } 1307 Result.DV[Level].Distance = Delta; // = 0 1308 return false; 1309 } 1310 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1311 if (!ConstCoeff) 1312 return false; 1313 1314 Result.DV[Level].Splitable = true; 1315 if (SE->isKnownNegative(ConstCoeff)) { 1316 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1317 assert(ConstCoeff && 1318 "dynamic cast of negative of ConstCoeff should yield constant"); 1319 Delta = SE->getNegativeSCEV(Delta); 1320 } 1321 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1322 1323 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1324 SplitIter = SE->getUDivExpr( 1325 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1326 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1327 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1328 1329 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1330 if (!ConstDelta) 1331 return false; 1332 1333 // We're certain that ConstCoeff > 0; therefore, 1334 // if Delta < 0, then no dependence. 1335 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1336 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1337 if (SE->isKnownNegative(Delta)) { 1338 // No dependence, Delta < 0 1339 ++WeakCrossingSIVindependence; 1340 ++WeakCrossingSIVsuccesses; 1341 return true; 1342 } 1343 1344 // We're certain that Delta > 0 and ConstCoeff > 0. 1345 // Check Delta/(2*ConstCoeff) against upper loop bound 1346 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1347 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1348 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1349 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1350 ConstantTwo); 1351 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1352 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1353 // Delta too big, no dependence 1354 ++WeakCrossingSIVindependence; 1355 ++WeakCrossingSIVsuccesses; 1356 return true; 1357 } 1358 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1359 // i = i' = UB 1360 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1361 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1362 ++WeakCrossingSIVsuccesses; 1363 if (!Result.DV[Level].Direction) { 1364 ++WeakCrossingSIVindependence; 1365 return true; 1366 } 1367 Result.DV[Level].Splitable = false; 1368 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1369 return false; 1370 } 1371 } 1372 1373 // check that Coeff divides Delta 1374 APInt APDelta = ConstDelta->getAPInt(); 1375 APInt APCoeff = ConstCoeff->getAPInt(); 1376 APInt Distance = APDelta; // these need to be initialzed 1377 APInt Remainder = APDelta; 1378 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1379 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1380 if (Remainder != 0) { 1381 // Coeff doesn't divide Delta, no dependence 1382 ++WeakCrossingSIVindependence; 1383 ++WeakCrossingSIVsuccesses; 1384 return true; 1385 } 1386 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1387 1388 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1389 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1390 Remainder = Distance.srem(Two); 1391 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1392 if (Remainder != 0) { 1393 // Equal direction isn't possible 1394 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1395 ++WeakCrossingSIVsuccesses; 1396 } 1397 return false; 1398 } 1399 1400 1401 // Kirch's algorithm, from 1402 // 1403 // Optimizing Supercompilers for Supercomputers 1404 // Michael Wolfe 1405 // MIT Press, 1989 1406 // 1407 // Program 2.1, page 29. 1408 // Computes the GCD of AM and BM. 1409 // Also finds a solution to the equation ax - by = gcd(a, b). 1410 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1411 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1412 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1413 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1414 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1415 APInt G0 = AM.abs(); 1416 APInt G1 = BM.abs(); 1417 APInt Q = G0; // these need to be initialized 1418 APInt R = G0; 1419 APInt::sdivrem(G0, G1, Q, R); 1420 while (R != 0) { 1421 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1422 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1423 G0 = G1; G1 = R; 1424 APInt::sdivrem(G0, G1, Q, R); 1425 } 1426 G = G1; 1427 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1428 X = AM.slt(0) ? -A1 : A1; 1429 Y = BM.slt(0) ? B1 : -B1; 1430 1431 // make sure gcd divides Delta 1432 R = Delta.srem(G); 1433 if (R != 0) 1434 return true; // gcd doesn't divide Delta, no dependence 1435 Q = Delta.sdiv(G); 1436 return false; 1437 } 1438 1439 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1440 APInt Q = A; // these need to be initialized 1441 APInt R = A; 1442 APInt::sdivrem(A, B, Q, R); 1443 if (R == 0) 1444 return Q; 1445 if ((A.sgt(0) && B.sgt(0)) || 1446 (A.slt(0) && B.slt(0))) 1447 return Q; 1448 else 1449 return Q - 1; 1450 } 1451 1452 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1453 APInt Q = A; // these need to be initialized 1454 APInt R = A; 1455 APInt::sdivrem(A, B, Q, R); 1456 if (R == 0) 1457 return Q; 1458 if ((A.sgt(0) && B.sgt(0)) || 1459 (A.slt(0) && B.slt(0))) 1460 return Q + 1; 1461 else 1462 return Q; 1463 } 1464 1465 // exactSIVtest - 1466 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1467 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1468 // and a2 are constant, we can solve it exactly using an algorithm developed 1469 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1470 // 1471 // Dependence Analysis for Supercomputing 1472 // Utpal Banerjee 1473 // Kluwer Academic Publishers, 1988 1474 // 1475 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1476 // so use them if possible. They're also a bit better with symbolics and, 1477 // in the case of the strong SIV test, can compute Distances. 1478 // 1479 // Return true if dependence disproved. 1480 // 1481 // This is a modified version of the original Banerjee algorithm. The original 1482 // only tested whether Dst depends on Src. This algorithm extends that and 1483 // returns all the dependencies that exist between Dst and Src. 1484 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1485 const SCEV *SrcConst, const SCEV *DstConst, 1486 const Loop *CurLoop, unsigned Level, 1487 FullDependence &Result, 1488 Constraint &NewConstraint) const { 1489 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1490 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1491 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1492 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1493 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1494 ++ExactSIVapplications; 1495 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1496 Level--; 1497 Result.Consistent = false; 1498 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1499 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1500 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1501 CurLoop); 1502 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1503 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1504 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1505 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1506 return false; 1507 1508 // find gcd 1509 APInt G, X, Y; 1510 APInt AM = ConstSrcCoeff->getAPInt(); 1511 APInt BM = ConstDstCoeff->getAPInt(); 1512 APInt CM = ConstDelta->getAPInt(); 1513 unsigned Bits = AM.getBitWidth(); 1514 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1515 // gcd doesn't divide Delta, no dependence 1516 ++ExactSIVindependence; 1517 ++ExactSIVsuccesses; 1518 return true; 1519 } 1520 1521 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1522 1523 // since SCEV construction normalizes, LM = 0 1524 APInt UM(Bits, 1, true); 1525 bool UMValid = false; 1526 // UM is perhaps unavailable, let's check 1527 if (const SCEVConstant *CUB = 1528 collectConstantUpperBound(CurLoop, Delta->getType())) { 1529 UM = CUB->getAPInt(); 1530 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1531 UMValid = true; 1532 } 1533 1534 APInt TU(APInt::getSignedMaxValue(Bits)); 1535 APInt TL(APInt::getSignedMinValue(Bits)); 1536 APInt TC = CM.sdiv(G); 1537 APInt TX = X * TC; 1538 APInt TY = Y * TC; 1539 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1540 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1541 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1542 1543 SmallVector<APInt, 2> TLVec, TUVec; 1544 APInt TB = BM.sdiv(G); 1545 if (TB.sgt(0)) { 1546 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1547 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1548 // New bound check - modification to Banerjee's e3 check 1549 if (UMValid) { 1550 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1551 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1552 } 1553 } else { 1554 TUVec.push_back(floorOfQuotient(-TX, TB)); 1555 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1556 // New bound check - modification to Banerjee's e3 check 1557 if (UMValid) { 1558 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1559 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1560 } 1561 } 1562 1563 APInt TA = AM.sdiv(G); 1564 if (TA.sgt(0)) { 1565 if (UMValid) { 1566 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1567 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1568 } 1569 // New bound check - modification to Banerjee's e3 check 1570 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1571 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1572 } else { 1573 if (UMValid) { 1574 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1575 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1576 } 1577 // New bound check - modification to Banerjee's e3 check 1578 TUVec.push_back(floorOfQuotient(-TY, TA)); 1579 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1580 } 1581 1582 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1583 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1584 1585 if (TLVec.empty() || TUVec.empty()) 1586 return false; 1587 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1588 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1589 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1590 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1591 1592 if (TL.sgt(TU)) { 1593 ++ExactSIVindependence; 1594 ++ExactSIVsuccesses; 1595 return true; 1596 } 1597 1598 // explore directions 1599 unsigned NewDirection = Dependence::DVEntry::NONE; 1600 APInt LowerDistance, UpperDistance; 1601 if (TA.sgt(TB)) { 1602 LowerDistance = (TY - TX) + (TA - TB) * TL; 1603 UpperDistance = (TY - TX) + (TA - TB) * TU; 1604 } else { 1605 LowerDistance = (TY - TX) + (TA - TB) * TU; 1606 UpperDistance = (TY - TX) + (TA - TB) * TL; 1607 } 1608 1609 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1610 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1611 1612 APInt Zero(Bits, 0, true); 1613 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1614 NewDirection |= Dependence::DVEntry::EQ; 1615 ++ExactSIVsuccesses; 1616 } 1617 if (LowerDistance.slt(0)) { 1618 NewDirection |= Dependence::DVEntry::GT; 1619 ++ExactSIVsuccesses; 1620 } 1621 if (UpperDistance.sgt(0)) { 1622 NewDirection |= Dependence::DVEntry::LT; 1623 ++ExactSIVsuccesses; 1624 } 1625 1626 // finished 1627 Result.DV[Level].Direction &= NewDirection; 1628 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1629 ++ExactSIVindependence; 1630 LLVM_DEBUG(dbgs() << "\t Result = "); 1631 LLVM_DEBUG(Result.dump(dbgs())); 1632 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1633 } 1634 1635 1636 // Return true if the divisor evenly divides the dividend. 1637 static 1638 bool isRemainderZero(const SCEVConstant *Dividend, 1639 const SCEVConstant *Divisor) { 1640 const APInt &ConstDividend = Dividend->getAPInt(); 1641 const APInt &ConstDivisor = Divisor->getAPInt(); 1642 return ConstDividend.srem(ConstDivisor) == 0; 1643 } 1644 1645 1646 // weakZeroSrcSIVtest - 1647 // From the paper, Practical Dependence Testing, Section 4.2.2 1648 // 1649 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1650 // where i is an induction variable, c1 and c2 are loop invariant, 1651 // and a is a constant, we can solve it exactly using the 1652 // Weak-Zero SIV test. 1653 // 1654 // Given 1655 // 1656 // c1 = c2 + a*i 1657 // 1658 // we get 1659 // 1660 // (c1 - c2)/a = i 1661 // 1662 // If i is not an integer, there's no dependence. 1663 // If i < 0 or > UB, there's no dependence. 1664 // If i = 0, the direction is >= and peeling the 1665 // 1st iteration will break the dependence. 1666 // If i = UB, the direction is <= and peeling the 1667 // last iteration will break the dependence. 1668 // Otherwise, the direction is *. 1669 // 1670 // Can prove independence. Failing that, we can sometimes refine 1671 // the directions. Can sometimes show that first or last 1672 // iteration carries all the dependences (so worth peeling). 1673 // 1674 // (see also weakZeroDstSIVtest) 1675 // 1676 // Return true if dependence disproved. 1677 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1678 const SCEV *SrcConst, 1679 const SCEV *DstConst, 1680 const Loop *CurLoop, unsigned Level, 1681 FullDependence &Result, 1682 Constraint &NewConstraint) const { 1683 // For the WeakSIV test, it's possible the loop isn't common to 1684 // the Src and Dst loops. If it isn't, then there's no need to 1685 // record a direction. 1686 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1687 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1688 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1689 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1690 ++WeakZeroSIVapplications; 1691 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1692 Level--; 1693 Result.Consistent = false; 1694 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1695 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1696 CurLoop); 1697 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1698 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1699 if (Level < CommonLevels) { 1700 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1701 Result.DV[Level].PeelFirst = true; 1702 ++WeakZeroSIVsuccesses; 1703 } 1704 return false; // dependences caused by first iteration 1705 } 1706 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1707 if (!ConstCoeff) 1708 return false; 1709 const SCEV *AbsCoeff = 1710 SE->isKnownNegative(ConstCoeff) ? 1711 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1712 const SCEV *NewDelta = 1713 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1714 1715 // check that Delta/SrcCoeff < iteration count 1716 // really check NewDelta < count*AbsCoeff 1717 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1718 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1719 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1720 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1721 ++WeakZeroSIVindependence; 1722 ++WeakZeroSIVsuccesses; 1723 return true; 1724 } 1725 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1726 // dependences caused by last iteration 1727 if (Level < CommonLevels) { 1728 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1729 Result.DV[Level].PeelLast = true; 1730 ++WeakZeroSIVsuccesses; 1731 } 1732 return false; 1733 } 1734 } 1735 1736 // check that Delta/SrcCoeff >= 0 1737 // really check that NewDelta >= 0 1738 if (SE->isKnownNegative(NewDelta)) { 1739 // No dependence, newDelta < 0 1740 ++WeakZeroSIVindependence; 1741 ++WeakZeroSIVsuccesses; 1742 return true; 1743 } 1744 1745 // if SrcCoeff doesn't divide Delta, then no dependence 1746 if (isa<SCEVConstant>(Delta) && 1747 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1748 ++WeakZeroSIVindependence; 1749 ++WeakZeroSIVsuccesses; 1750 return true; 1751 } 1752 return false; 1753 } 1754 1755 1756 // weakZeroDstSIVtest - 1757 // From the paper, Practical Dependence Testing, Section 4.2.2 1758 // 1759 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1760 // where i is an induction variable, c1 and c2 are loop invariant, 1761 // and a is a constant, we can solve it exactly using the 1762 // Weak-Zero SIV test. 1763 // 1764 // Given 1765 // 1766 // c1 + a*i = c2 1767 // 1768 // we get 1769 // 1770 // i = (c2 - c1)/a 1771 // 1772 // If i is not an integer, there's no dependence. 1773 // If i < 0 or > UB, there's no dependence. 1774 // If i = 0, the direction is <= and peeling the 1775 // 1st iteration will break the dependence. 1776 // If i = UB, the direction is >= and peeling the 1777 // last iteration will break the dependence. 1778 // Otherwise, the direction is *. 1779 // 1780 // Can prove independence. Failing that, we can sometimes refine 1781 // the directions. Can sometimes show that first or last 1782 // iteration carries all the dependences (so worth peeling). 1783 // 1784 // (see also weakZeroSrcSIVtest) 1785 // 1786 // Return true if dependence disproved. 1787 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1788 const SCEV *SrcConst, 1789 const SCEV *DstConst, 1790 const Loop *CurLoop, unsigned Level, 1791 FullDependence &Result, 1792 Constraint &NewConstraint) const { 1793 // For the WeakSIV test, it's possible the loop isn't common to the 1794 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1795 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1796 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1797 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1798 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1799 ++WeakZeroSIVapplications; 1800 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1801 Level--; 1802 Result.Consistent = false; 1803 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1804 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1805 CurLoop); 1806 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1807 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1808 if (Level < CommonLevels) { 1809 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1810 Result.DV[Level].PeelFirst = true; 1811 ++WeakZeroSIVsuccesses; 1812 } 1813 return false; // dependences caused by first iteration 1814 } 1815 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1816 if (!ConstCoeff) 1817 return false; 1818 const SCEV *AbsCoeff = 1819 SE->isKnownNegative(ConstCoeff) ? 1820 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1821 const SCEV *NewDelta = 1822 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1823 1824 // check that Delta/SrcCoeff < iteration count 1825 // really check NewDelta < count*AbsCoeff 1826 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1827 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1828 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1829 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1830 ++WeakZeroSIVindependence; 1831 ++WeakZeroSIVsuccesses; 1832 return true; 1833 } 1834 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1835 // dependences caused by last iteration 1836 if (Level < CommonLevels) { 1837 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1838 Result.DV[Level].PeelLast = true; 1839 ++WeakZeroSIVsuccesses; 1840 } 1841 return false; 1842 } 1843 } 1844 1845 // check that Delta/SrcCoeff >= 0 1846 // really check that NewDelta >= 0 1847 if (SE->isKnownNegative(NewDelta)) { 1848 // No dependence, newDelta < 0 1849 ++WeakZeroSIVindependence; 1850 ++WeakZeroSIVsuccesses; 1851 return true; 1852 } 1853 1854 // if SrcCoeff doesn't divide Delta, then no dependence 1855 if (isa<SCEVConstant>(Delta) && 1856 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1857 ++WeakZeroSIVindependence; 1858 ++WeakZeroSIVsuccesses; 1859 return true; 1860 } 1861 return false; 1862 } 1863 1864 1865 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1866 // Things of the form [c1 + a*i] and [c2 + b*j], 1867 // where i and j are induction variable, c1 and c2 are loop invariant, 1868 // and a and b are constants. 1869 // Returns true if any possible dependence is disproved. 1870 // Marks the result as inconsistent. 1871 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1872 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1873 const SCEV *SrcConst, const SCEV *DstConst, 1874 const Loop *SrcLoop, const Loop *DstLoop, 1875 FullDependence &Result) const { 1876 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1877 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1878 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1879 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1880 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1881 ++ExactRDIVapplications; 1882 Result.Consistent = false; 1883 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1884 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1885 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1886 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1887 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1888 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1889 return false; 1890 1891 // find gcd 1892 APInt G, X, Y; 1893 APInt AM = ConstSrcCoeff->getAPInt(); 1894 APInt BM = ConstDstCoeff->getAPInt(); 1895 APInt CM = ConstDelta->getAPInt(); 1896 unsigned Bits = AM.getBitWidth(); 1897 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1898 // gcd doesn't divide Delta, no dependence 1899 ++ExactRDIVindependence; 1900 return true; 1901 } 1902 1903 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1904 1905 // since SCEV construction seems to normalize, LM = 0 1906 APInt SrcUM(Bits, 1, true); 1907 bool SrcUMvalid = false; 1908 // SrcUM is perhaps unavailable, let's check 1909 if (const SCEVConstant *UpperBound = 1910 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1911 SrcUM = UpperBound->getAPInt(); 1912 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1913 SrcUMvalid = true; 1914 } 1915 1916 APInt DstUM(Bits, 1, true); 1917 bool DstUMvalid = false; 1918 // UM is perhaps unavailable, let's check 1919 if (const SCEVConstant *UpperBound = 1920 collectConstantUpperBound(DstLoop, Delta->getType())) { 1921 DstUM = UpperBound->getAPInt(); 1922 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1923 DstUMvalid = true; 1924 } 1925 1926 APInt TU(APInt::getSignedMaxValue(Bits)); 1927 APInt TL(APInt::getSignedMinValue(Bits)); 1928 APInt TC = CM.sdiv(G); 1929 APInt TX = X * TC; 1930 APInt TY = Y * TC; 1931 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1932 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1933 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1934 1935 SmallVector<APInt, 2> TLVec, TUVec; 1936 APInt TB = BM.sdiv(G); 1937 if (TB.sgt(0)) { 1938 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1939 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1940 if (SrcUMvalid) { 1941 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 1942 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1943 } 1944 } else { 1945 TUVec.push_back(floorOfQuotient(-TX, TB)); 1946 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1947 if (SrcUMvalid) { 1948 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 1949 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1950 } 1951 } 1952 1953 APInt TA = AM.sdiv(G); 1954 if (TA.sgt(0)) { 1955 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1956 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1957 if (DstUMvalid) { 1958 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 1959 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1960 } 1961 } else { 1962 TUVec.push_back(floorOfQuotient(-TY, TA)); 1963 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1964 if (DstUMvalid) { 1965 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 1966 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1967 } 1968 } 1969 1970 if (TLVec.empty() || TUVec.empty()) 1971 return false; 1972 1973 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1974 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1975 1976 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1977 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1978 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1979 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1980 1981 if (TL.sgt(TU)) 1982 ++ExactRDIVindependence; 1983 return TL.sgt(TU); 1984 } 1985 1986 1987 // symbolicRDIVtest - 1988 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1989 // introduce a special case of Banerjee's Inequalities (also called the 1990 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1991 // particularly cases with symbolics. Since it's only able to disprove 1992 // dependence (not compute distances or directions), we'll use it as a 1993 // fall back for the other tests. 1994 // 1995 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1996 // where i and j are induction variables and c1 and c2 are loop invariants, 1997 // we can use the symbolic tests to disprove some dependences, serving as a 1998 // backup for the RDIV test. Note that i and j can be the same variable, 1999 // letting this test serve as a backup for the various SIV tests. 2000 // 2001 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2002 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2003 // loop bounds for the i and j loops, respectively. So, ... 2004 // 2005 // c1 + a1*i = c2 + a2*j 2006 // a1*i - a2*j = c2 - c1 2007 // 2008 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2009 // range of the maximum and minimum possible values of a1*i - a2*j. 2010 // Considering the signs of a1 and a2, we have 4 possible cases: 2011 // 2012 // 1) If a1 >= 0 and a2 >= 0, then 2013 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2014 // -a2*N2 <= c2 - c1 <= a1*N1 2015 // 2016 // 2) If a1 >= 0 and a2 <= 0, then 2017 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2018 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2019 // 2020 // 3) If a1 <= 0 and a2 >= 0, then 2021 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2022 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2023 // 2024 // 4) If a1 <= 0 and a2 <= 0, then 2025 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2026 // a1*N1 <= c2 - c1 <= -a2*N2 2027 // 2028 // return true if dependence disproved 2029 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2030 const SCEV *C1, const SCEV *C2, 2031 const Loop *Loop1, 2032 const Loop *Loop2) const { 2033 ++SymbolicRDIVapplications; 2034 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2035 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2036 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2037 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2038 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2039 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2040 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2041 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2042 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2043 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2044 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2045 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2046 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2047 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2048 if (SE->isKnownNonNegative(A1)) { 2049 if (SE->isKnownNonNegative(A2)) { 2050 // A1 >= 0 && A2 >= 0 2051 if (N1) { 2052 // make sure that c2 - c1 <= a1*N1 2053 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2054 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2055 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2056 ++SymbolicRDIVindependence; 2057 return true; 2058 } 2059 } 2060 if (N2) { 2061 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2062 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2063 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2064 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2065 ++SymbolicRDIVindependence; 2066 return true; 2067 } 2068 } 2069 } 2070 else if (SE->isKnownNonPositive(A2)) { 2071 // a1 >= 0 && a2 <= 0 2072 if (N1 && N2) { 2073 // make sure that c2 - c1 <= a1*N1 - a2*N2 2074 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2075 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2076 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2077 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2078 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2079 ++SymbolicRDIVindependence; 2080 return true; 2081 } 2082 } 2083 // make sure that 0 <= c2 - c1 2084 if (SE->isKnownNegative(C2_C1)) { 2085 ++SymbolicRDIVindependence; 2086 return true; 2087 } 2088 } 2089 } 2090 else if (SE->isKnownNonPositive(A1)) { 2091 if (SE->isKnownNonNegative(A2)) { 2092 // a1 <= 0 && a2 >= 0 2093 if (N1 && N2) { 2094 // make sure that a1*N1 - a2*N2 <= c2 - c1 2095 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2096 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2097 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2098 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2099 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2100 ++SymbolicRDIVindependence; 2101 return true; 2102 } 2103 } 2104 // make sure that c2 - c1 <= 0 2105 if (SE->isKnownPositive(C2_C1)) { 2106 ++SymbolicRDIVindependence; 2107 return true; 2108 } 2109 } 2110 else if (SE->isKnownNonPositive(A2)) { 2111 // a1 <= 0 && a2 <= 0 2112 if (N1) { 2113 // make sure that a1*N1 <= c2 - c1 2114 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2115 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2116 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2117 ++SymbolicRDIVindependence; 2118 return true; 2119 } 2120 } 2121 if (N2) { 2122 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2123 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2124 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2125 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2126 ++SymbolicRDIVindependence; 2127 return true; 2128 } 2129 } 2130 } 2131 } 2132 return false; 2133 } 2134 2135 2136 // testSIV - 2137 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2138 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2139 // a2 are constant, we attack it with an SIV test. While they can all be 2140 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2141 // they apply; they're cheaper and sometimes more precise. 2142 // 2143 // Return true if dependence disproved. 2144 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2145 FullDependence &Result, Constraint &NewConstraint, 2146 const SCEV *&SplitIter) const { 2147 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2148 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2149 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2150 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2151 if (SrcAddRec && DstAddRec) { 2152 const SCEV *SrcConst = SrcAddRec->getStart(); 2153 const SCEV *DstConst = DstAddRec->getStart(); 2154 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2155 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2156 const Loop *CurLoop = SrcAddRec->getLoop(); 2157 assert(CurLoop == DstAddRec->getLoop() && 2158 "both loops in SIV should be same"); 2159 Level = mapSrcLoop(CurLoop); 2160 bool disproven; 2161 if (SrcCoeff == DstCoeff) 2162 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2163 Level, Result, NewConstraint); 2164 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2165 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2166 Level, Result, NewConstraint, SplitIter); 2167 else 2168 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2169 Level, Result, NewConstraint); 2170 return disproven || 2171 gcdMIVtest(Src, Dst, Result) || 2172 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2173 } 2174 if (SrcAddRec) { 2175 const SCEV *SrcConst = SrcAddRec->getStart(); 2176 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2177 const SCEV *DstConst = Dst; 2178 const Loop *CurLoop = SrcAddRec->getLoop(); 2179 Level = mapSrcLoop(CurLoop); 2180 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2181 Level, Result, NewConstraint) || 2182 gcdMIVtest(Src, Dst, Result); 2183 } 2184 if (DstAddRec) { 2185 const SCEV *DstConst = DstAddRec->getStart(); 2186 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2187 const SCEV *SrcConst = Src; 2188 const Loop *CurLoop = DstAddRec->getLoop(); 2189 Level = mapDstLoop(CurLoop); 2190 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2191 CurLoop, Level, Result, NewConstraint) || 2192 gcdMIVtest(Src, Dst, Result); 2193 } 2194 llvm_unreachable("SIV test expected at least one AddRec"); 2195 return false; 2196 } 2197 2198 2199 // testRDIV - 2200 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2201 // where i and j are induction variables, c1 and c2 are loop invariant, 2202 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2203 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2204 // It doesn't make sense to talk about distance or direction in this case, 2205 // so there's no point in making special versions of the Strong SIV test or 2206 // the Weak-crossing SIV test. 2207 // 2208 // With minor algebra, this test can also be used for things like 2209 // [c1 + a1*i + a2*j][c2]. 2210 // 2211 // Return true if dependence disproved. 2212 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2213 FullDependence &Result) const { 2214 // we have 3 possible situations here: 2215 // 1) [a*i + b] and [c*j + d] 2216 // 2) [a*i + c*j + b] and [d] 2217 // 3) [b] and [a*i + c*j + d] 2218 // We need to find what we've got and get organized 2219 2220 const SCEV *SrcConst, *DstConst; 2221 const SCEV *SrcCoeff, *DstCoeff; 2222 const Loop *SrcLoop, *DstLoop; 2223 2224 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2225 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2226 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2227 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2228 if (SrcAddRec && DstAddRec) { 2229 SrcConst = SrcAddRec->getStart(); 2230 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2231 SrcLoop = SrcAddRec->getLoop(); 2232 DstConst = DstAddRec->getStart(); 2233 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2234 DstLoop = DstAddRec->getLoop(); 2235 } 2236 else if (SrcAddRec) { 2237 if (const SCEVAddRecExpr *tmpAddRec = 2238 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2239 SrcConst = tmpAddRec->getStart(); 2240 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2241 SrcLoop = tmpAddRec->getLoop(); 2242 DstConst = Dst; 2243 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2244 DstLoop = SrcAddRec->getLoop(); 2245 } 2246 else 2247 llvm_unreachable("RDIV reached by surprising SCEVs"); 2248 } 2249 else if (DstAddRec) { 2250 if (const SCEVAddRecExpr *tmpAddRec = 2251 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2252 DstConst = tmpAddRec->getStart(); 2253 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2254 DstLoop = tmpAddRec->getLoop(); 2255 SrcConst = Src; 2256 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2257 SrcLoop = DstAddRec->getLoop(); 2258 } 2259 else 2260 llvm_unreachable("RDIV reached by surprising SCEVs"); 2261 } 2262 else 2263 llvm_unreachable("RDIV expected at least one AddRec"); 2264 return exactRDIVtest(SrcCoeff, DstCoeff, 2265 SrcConst, DstConst, 2266 SrcLoop, DstLoop, 2267 Result) || 2268 gcdMIVtest(Src, Dst, Result) || 2269 symbolicRDIVtest(SrcCoeff, DstCoeff, 2270 SrcConst, DstConst, 2271 SrcLoop, DstLoop); 2272 } 2273 2274 2275 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2276 // Return true if dependence disproved. 2277 // Can sometimes refine direction vectors. 2278 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2279 const SmallBitVector &Loops, 2280 FullDependence &Result) const { 2281 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2282 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2283 Result.Consistent = false; 2284 return gcdMIVtest(Src, Dst, Result) || 2285 banerjeeMIVtest(Src, Dst, Loops, Result); 2286 } 2287 2288 2289 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2290 // in this case 10. If there is no constant part, returns NULL. 2291 static 2292 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2293 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2294 return Constant; 2295 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2296 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2297 return Constant; 2298 return nullptr; 2299 } 2300 2301 2302 //===----------------------------------------------------------------------===// 2303 // gcdMIVtest - 2304 // Tests an MIV subscript pair for dependence. 2305 // Returns true if any possible dependence is disproved. 2306 // Marks the result as inconsistent. 2307 // Can sometimes disprove the equal direction for 1 or more loops, 2308 // as discussed in Michael Wolfe's book, 2309 // High Performance Compilers for Parallel Computing, page 235. 2310 // 2311 // We spend some effort (code!) to handle cases like 2312 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2313 // but M and N are just loop-invariant variables. 2314 // This should help us handle linearized subscripts; 2315 // also makes this test a useful backup to the various SIV tests. 2316 // 2317 // It occurs to me that the presence of loop-invariant variables 2318 // changes the nature of the test from "greatest common divisor" 2319 // to "a common divisor". 2320 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2321 FullDependence &Result) const { 2322 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2323 ++GCDapplications; 2324 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2325 APInt RunningGCD = APInt::getZero(BitWidth); 2326 2327 // Examine Src coefficients. 2328 // Compute running GCD and record source constant. 2329 // Because we're looking for the constant at the end of the chain, 2330 // we can't quit the loop just because the GCD == 1. 2331 const SCEV *Coefficients = Src; 2332 while (const SCEVAddRecExpr *AddRec = 2333 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2334 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2335 // If the coefficient is the product of a constant and other stuff, 2336 // we can use the constant in the GCD computation. 2337 const auto *Constant = getConstantPart(Coeff); 2338 if (!Constant) 2339 return false; 2340 APInt ConstCoeff = Constant->getAPInt(); 2341 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2342 Coefficients = AddRec->getStart(); 2343 } 2344 const SCEV *SrcConst = Coefficients; 2345 2346 // Examine Dst coefficients. 2347 // Compute running GCD and record destination constant. 2348 // Because we're looking for the constant at the end of the chain, 2349 // we can't quit the loop just because the GCD == 1. 2350 Coefficients = Dst; 2351 while (const SCEVAddRecExpr *AddRec = 2352 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2353 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2354 // If the coefficient is the product of a constant and other stuff, 2355 // we can use the constant in the GCD computation. 2356 const auto *Constant = getConstantPart(Coeff); 2357 if (!Constant) 2358 return false; 2359 APInt ConstCoeff = Constant->getAPInt(); 2360 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2361 Coefficients = AddRec->getStart(); 2362 } 2363 const SCEV *DstConst = Coefficients; 2364 2365 APInt ExtraGCD = APInt::getZero(BitWidth); 2366 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2367 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2368 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2369 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2370 // If Delta is a sum of products, we may be able to make further progress. 2371 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2372 const SCEV *Operand = Sum->getOperand(Op); 2373 if (isa<SCEVConstant>(Operand)) { 2374 assert(!Constant && "Surprised to find multiple constants"); 2375 Constant = cast<SCEVConstant>(Operand); 2376 } 2377 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2378 // Search for constant operand to participate in GCD; 2379 // If none found; return false. 2380 const SCEVConstant *ConstOp = getConstantPart(Product); 2381 if (!ConstOp) 2382 return false; 2383 APInt ConstOpValue = ConstOp->getAPInt(); 2384 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2385 ConstOpValue.abs()); 2386 } 2387 else 2388 return false; 2389 } 2390 } 2391 if (!Constant) 2392 return false; 2393 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2394 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2395 if (ConstDelta == 0) 2396 return false; 2397 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2398 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2399 APInt Remainder = ConstDelta.srem(RunningGCD); 2400 if (Remainder != 0) { 2401 ++GCDindependence; 2402 return true; 2403 } 2404 2405 // Try to disprove equal directions. 2406 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2407 // the code above can't disprove the dependence because the GCD = 1. 2408 // So we consider what happen if i = i' and what happens if j = j'. 2409 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2410 // which is infeasible, so we can disallow the = direction for the i level. 2411 // Setting j = j' doesn't help matters, so we end up with a direction vector 2412 // of [<>, *] 2413 // 2414 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2415 // we need to remember that the constant part is 5 and the RunningGCD should 2416 // be initialized to ExtraGCD = 30. 2417 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2418 2419 bool Improved = false; 2420 Coefficients = Src; 2421 while (const SCEVAddRecExpr *AddRec = 2422 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2423 Coefficients = AddRec->getStart(); 2424 const Loop *CurLoop = AddRec->getLoop(); 2425 RunningGCD = ExtraGCD; 2426 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2427 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2428 const SCEV *Inner = Src; 2429 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2430 AddRec = cast<SCEVAddRecExpr>(Inner); 2431 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2432 if (CurLoop == AddRec->getLoop()) 2433 ; // SrcCoeff == Coeff 2434 else { 2435 // If the coefficient is the product of a constant and other stuff, 2436 // we can use the constant in the GCD computation. 2437 Constant = getConstantPart(Coeff); 2438 if (!Constant) 2439 return false; 2440 APInt ConstCoeff = Constant->getAPInt(); 2441 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2442 } 2443 Inner = AddRec->getStart(); 2444 } 2445 Inner = Dst; 2446 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2447 AddRec = cast<SCEVAddRecExpr>(Inner); 2448 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2449 if (CurLoop == AddRec->getLoop()) 2450 DstCoeff = Coeff; 2451 else { 2452 // If the coefficient is the product of a constant and other stuff, 2453 // we can use the constant in the GCD computation. 2454 Constant = getConstantPart(Coeff); 2455 if (!Constant) 2456 return false; 2457 APInt ConstCoeff = Constant->getAPInt(); 2458 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2459 } 2460 Inner = AddRec->getStart(); 2461 } 2462 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2463 // If the coefficient is the product of a constant and other stuff, 2464 // we can use the constant in the GCD computation. 2465 Constant = getConstantPart(Delta); 2466 if (!Constant) 2467 // The difference of the two coefficients might not be a product 2468 // or constant, in which case we give up on this direction. 2469 continue; 2470 APInt ConstCoeff = Constant->getAPInt(); 2471 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2472 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2473 if (RunningGCD != 0) { 2474 Remainder = ConstDelta.srem(RunningGCD); 2475 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2476 if (Remainder != 0) { 2477 unsigned Level = mapSrcLoop(CurLoop); 2478 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2479 Improved = true; 2480 } 2481 } 2482 } 2483 if (Improved) 2484 ++GCDsuccesses; 2485 LLVM_DEBUG(dbgs() << "all done\n"); 2486 return false; 2487 } 2488 2489 2490 //===----------------------------------------------------------------------===// 2491 // banerjeeMIVtest - 2492 // Use Banerjee's Inequalities to test an MIV subscript pair. 2493 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2494 // Generally follows the discussion in Section 2.5.2 of 2495 // 2496 // Optimizing Supercompilers for Supercomputers 2497 // Michael Wolfe 2498 // 2499 // The inequalities given on page 25 are simplified in that loops are 2500 // normalized so that the lower bound is always 0 and the stride is always 1. 2501 // For example, Wolfe gives 2502 // 2503 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2504 // 2505 // where A_k is the coefficient of the kth index in the source subscript, 2506 // B_k is the coefficient of the kth index in the destination subscript, 2507 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2508 // index, and N_k is the stride of the kth index. Since all loops are normalized 2509 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2510 // equation to 2511 // 2512 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2513 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2514 // 2515 // Similar simplifications are possible for the other equations. 2516 // 2517 // When we can't determine the number of iterations for a loop, 2518 // we use NULL as an indicator for the worst case, infinity. 2519 // When computing the upper bound, NULL denotes +inf; 2520 // for the lower bound, NULL denotes -inf. 2521 // 2522 // Return true if dependence disproved. 2523 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2524 const SmallBitVector &Loops, 2525 FullDependence &Result) const { 2526 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2527 ++BanerjeeApplications; 2528 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2529 const SCEV *A0; 2530 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2531 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2532 const SCEV *B0; 2533 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2534 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2535 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2536 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2537 2538 // Compute bounds for all the * directions. 2539 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2540 for (unsigned K = 1; K <= MaxLevels; ++K) { 2541 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2542 Bound[K].Direction = Dependence::DVEntry::ALL; 2543 Bound[K].DirSet = Dependence::DVEntry::NONE; 2544 findBoundsALL(A, B, Bound, K); 2545 #ifndef NDEBUG 2546 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2547 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2548 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2549 else 2550 LLVM_DEBUG(dbgs() << "-inf\t"); 2551 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2552 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2553 else 2554 LLVM_DEBUG(dbgs() << "+inf\n"); 2555 #endif 2556 } 2557 2558 // Test the *, *, *, ... case. 2559 bool Disproved = false; 2560 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2561 // Explore the direction vector hierarchy. 2562 unsigned DepthExpanded = 0; 2563 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2564 Loops, DepthExpanded, Delta); 2565 if (NewDeps > 0) { 2566 bool Improved = false; 2567 for (unsigned K = 1; K <= CommonLevels; ++K) { 2568 if (Loops[K]) { 2569 unsigned Old = Result.DV[K - 1].Direction; 2570 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2571 Improved |= Old != Result.DV[K - 1].Direction; 2572 if (!Result.DV[K - 1].Direction) { 2573 Improved = false; 2574 Disproved = true; 2575 break; 2576 } 2577 } 2578 } 2579 if (Improved) 2580 ++BanerjeeSuccesses; 2581 } 2582 else { 2583 ++BanerjeeIndependence; 2584 Disproved = true; 2585 } 2586 } 2587 else { 2588 ++BanerjeeIndependence; 2589 Disproved = true; 2590 } 2591 delete [] Bound; 2592 delete [] A; 2593 delete [] B; 2594 return Disproved; 2595 } 2596 2597 2598 // Hierarchically expands the direction vector 2599 // search space, combining the directions of discovered dependences 2600 // in the DirSet field of Bound. Returns the number of distinct 2601 // dependences discovered. If the dependence is disproved, 2602 // it will return 0. 2603 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2604 CoefficientInfo *B, BoundInfo *Bound, 2605 const SmallBitVector &Loops, 2606 unsigned &DepthExpanded, 2607 const SCEV *Delta) const { 2608 // This algorithm has worst case complexity of O(3^n), where 'n' is the number 2609 // of common loop levels. To avoid excessive compile-time, pessimize all the 2610 // results and immediately return when the number of common levels is beyond 2611 // the given threshold. 2612 if (CommonLevels > MIVMaxLevelThreshold) { 2613 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV " 2614 "direction exploration is terminated.\n"); 2615 for (unsigned K = 1; K <= CommonLevels; ++K) 2616 if (Loops[K]) 2617 Bound[K].DirSet = Dependence::DVEntry::ALL; 2618 return 1; 2619 } 2620 2621 if (Level > CommonLevels) { 2622 // record result 2623 LLVM_DEBUG(dbgs() << "\t["); 2624 for (unsigned K = 1; K <= CommonLevels; ++K) { 2625 if (Loops[K]) { 2626 Bound[K].DirSet |= Bound[K].Direction; 2627 #ifndef NDEBUG 2628 switch (Bound[K].Direction) { 2629 case Dependence::DVEntry::LT: 2630 LLVM_DEBUG(dbgs() << " <"); 2631 break; 2632 case Dependence::DVEntry::EQ: 2633 LLVM_DEBUG(dbgs() << " ="); 2634 break; 2635 case Dependence::DVEntry::GT: 2636 LLVM_DEBUG(dbgs() << " >"); 2637 break; 2638 case Dependence::DVEntry::ALL: 2639 LLVM_DEBUG(dbgs() << " *"); 2640 break; 2641 default: 2642 llvm_unreachable("unexpected Bound[K].Direction"); 2643 } 2644 #endif 2645 } 2646 } 2647 LLVM_DEBUG(dbgs() << " ]\n"); 2648 return 1; 2649 } 2650 if (Loops[Level]) { 2651 if (Level > DepthExpanded) { 2652 DepthExpanded = Level; 2653 // compute bounds for <, =, > at current level 2654 findBoundsLT(A, B, Bound, Level); 2655 findBoundsGT(A, B, Bound, Level); 2656 findBoundsEQ(A, B, Bound, Level); 2657 #ifndef NDEBUG 2658 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2659 LLVM_DEBUG(dbgs() << "\t <\t"); 2660 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2661 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2662 << '\t'); 2663 else 2664 LLVM_DEBUG(dbgs() << "-inf\t"); 2665 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2666 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2667 << '\n'); 2668 else 2669 LLVM_DEBUG(dbgs() << "+inf\n"); 2670 LLVM_DEBUG(dbgs() << "\t =\t"); 2671 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2672 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2673 << '\t'); 2674 else 2675 LLVM_DEBUG(dbgs() << "-inf\t"); 2676 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2677 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2678 << '\n'); 2679 else 2680 LLVM_DEBUG(dbgs() << "+inf\n"); 2681 LLVM_DEBUG(dbgs() << "\t >\t"); 2682 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2683 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2684 << '\t'); 2685 else 2686 LLVM_DEBUG(dbgs() << "-inf\t"); 2687 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2688 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2689 << '\n'); 2690 else 2691 LLVM_DEBUG(dbgs() << "+inf\n"); 2692 #endif 2693 } 2694 2695 unsigned NewDeps = 0; 2696 2697 // test bounds for <, *, *, ... 2698 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2699 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2700 Loops, DepthExpanded, Delta); 2701 2702 // Test bounds for =, *, *, ... 2703 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2704 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2705 Loops, DepthExpanded, Delta); 2706 2707 // test bounds for >, *, *, ... 2708 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2709 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2710 Loops, DepthExpanded, Delta); 2711 2712 Bound[Level].Direction = Dependence::DVEntry::ALL; 2713 return NewDeps; 2714 } 2715 else 2716 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2717 } 2718 2719 2720 // Returns true iff the current bounds are plausible. 2721 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2722 BoundInfo *Bound, const SCEV *Delta) const { 2723 Bound[Level].Direction = DirKind; 2724 if (const SCEV *LowerBound = getLowerBound(Bound)) 2725 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2726 return false; 2727 if (const SCEV *UpperBound = getUpperBound(Bound)) 2728 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2729 return false; 2730 return true; 2731 } 2732 2733 2734 // Computes the upper and lower bounds for level K 2735 // using the * direction. Records them in Bound. 2736 // Wolfe gives the equations 2737 // 2738 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2739 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2740 // 2741 // Since we normalize loops, we can simplify these equations to 2742 // 2743 // LB^*_k = (A^-_k - B^+_k)U_k 2744 // UB^*_k = (A^+_k - B^-_k)U_k 2745 // 2746 // We must be careful to handle the case where the upper bound is unknown. 2747 // Note that the lower bound is always <= 0 2748 // and the upper bound is always >= 0. 2749 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2750 BoundInfo *Bound, unsigned K) const { 2751 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2752 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2753 if (Bound[K].Iterations) { 2754 Bound[K].Lower[Dependence::DVEntry::ALL] = 2755 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2756 Bound[K].Iterations); 2757 Bound[K].Upper[Dependence::DVEntry::ALL] = 2758 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2759 Bound[K].Iterations); 2760 } 2761 else { 2762 // If the difference is 0, we won't need to know the number of iterations. 2763 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2764 Bound[K].Lower[Dependence::DVEntry::ALL] = 2765 SE->getZero(A[K].Coeff->getType()); 2766 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2767 Bound[K].Upper[Dependence::DVEntry::ALL] = 2768 SE->getZero(A[K].Coeff->getType()); 2769 } 2770 } 2771 2772 2773 // Computes the upper and lower bounds for level K 2774 // using the = direction. Records them in Bound. 2775 // Wolfe gives the equations 2776 // 2777 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2778 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2779 // 2780 // Since we normalize loops, we can simplify these equations to 2781 // 2782 // LB^=_k = (A_k - B_k)^- U_k 2783 // UB^=_k = (A_k - B_k)^+ U_k 2784 // 2785 // We must be careful to handle the case where the upper bound is unknown. 2786 // Note that the lower bound is always <= 0 2787 // and the upper bound is always >= 0. 2788 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2789 BoundInfo *Bound, unsigned K) const { 2790 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2791 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2792 if (Bound[K].Iterations) { 2793 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2794 const SCEV *NegativePart = getNegativePart(Delta); 2795 Bound[K].Lower[Dependence::DVEntry::EQ] = 2796 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2797 const SCEV *PositivePart = getPositivePart(Delta); 2798 Bound[K].Upper[Dependence::DVEntry::EQ] = 2799 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2800 } 2801 else { 2802 // If the positive/negative part of the difference is 0, 2803 // we won't need to know the number of iterations. 2804 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2805 const SCEV *NegativePart = getNegativePart(Delta); 2806 if (NegativePart->isZero()) 2807 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2808 const SCEV *PositivePart = getPositivePart(Delta); 2809 if (PositivePart->isZero()) 2810 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2811 } 2812 } 2813 2814 2815 // Computes the upper and lower bounds for level K 2816 // using the < direction. Records them in Bound. 2817 // Wolfe gives the equations 2818 // 2819 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2820 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2821 // 2822 // Since we normalize loops, we can simplify these equations to 2823 // 2824 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2825 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2826 // 2827 // We must be careful to handle the case where the upper bound is unknown. 2828 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2829 BoundInfo *Bound, unsigned K) const { 2830 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2831 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2832 if (Bound[K].Iterations) { 2833 const SCEV *Iter_1 = SE->getMinusSCEV( 2834 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2835 const SCEV *NegPart = 2836 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2837 Bound[K].Lower[Dependence::DVEntry::LT] = 2838 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2839 const SCEV *PosPart = 2840 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2841 Bound[K].Upper[Dependence::DVEntry::LT] = 2842 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2843 } 2844 else { 2845 // If the positive/negative part of the difference is 0, 2846 // we won't need to know the number of iterations. 2847 const SCEV *NegPart = 2848 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2849 if (NegPart->isZero()) 2850 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2851 const SCEV *PosPart = 2852 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2853 if (PosPart->isZero()) 2854 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2855 } 2856 } 2857 2858 2859 // Computes the upper and lower bounds for level K 2860 // using the > direction. Records them in Bound. 2861 // Wolfe gives the equations 2862 // 2863 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2864 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2865 // 2866 // Since we normalize loops, we can simplify these equations to 2867 // 2868 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2869 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2870 // 2871 // We must be careful to handle the case where the upper bound is unknown. 2872 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2873 BoundInfo *Bound, unsigned K) const { 2874 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2875 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2876 if (Bound[K].Iterations) { 2877 const SCEV *Iter_1 = SE->getMinusSCEV( 2878 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2879 const SCEV *NegPart = 2880 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2881 Bound[K].Lower[Dependence::DVEntry::GT] = 2882 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2883 const SCEV *PosPart = 2884 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2885 Bound[K].Upper[Dependence::DVEntry::GT] = 2886 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2887 } 2888 else { 2889 // If the positive/negative part of the difference is 0, 2890 // we won't need to know the number of iterations. 2891 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2892 if (NegPart->isZero()) 2893 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2894 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2895 if (PosPart->isZero()) 2896 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2897 } 2898 } 2899 2900 2901 // X^+ = max(X, 0) 2902 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2903 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2904 } 2905 2906 2907 // X^- = min(X, 0) 2908 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2909 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2910 } 2911 2912 2913 // Walks through the subscript, 2914 // collecting each coefficient, the associated loop bounds, 2915 // and recording its positive and negative parts for later use. 2916 DependenceInfo::CoefficientInfo * 2917 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2918 const SCEV *&Constant) const { 2919 const SCEV *Zero = SE->getZero(Subscript->getType()); 2920 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2921 for (unsigned K = 1; K <= MaxLevels; ++K) { 2922 CI[K].Coeff = Zero; 2923 CI[K].PosPart = Zero; 2924 CI[K].NegPart = Zero; 2925 CI[K].Iterations = nullptr; 2926 } 2927 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2928 const Loop *L = AddRec->getLoop(); 2929 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2930 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2931 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2932 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2933 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2934 Subscript = AddRec->getStart(); 2935 } 2936 Constant = Subscript; 2937 #ifndef NDEBUG 2938 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2939 for (unsigned K = 1; K <= MaxLevels; ++K) { 2940 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2941 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2942 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2943 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2944 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2945 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2946 if (CI[K].Iterations) 2947 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2948 else 2949 LLVM_DEBUG(dbgs() << "+inf"); 2950 LLVM_DEBUG(dbgs() << '\n'); 2951 } 2952 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2953 #endif 2954 return CI; 2955 } 2956 2957 2958 // Looks through all the bounds info and 2959 // computes the lower bound given the current direction settings 2960 // at each level. If the lower bound for any level is -inf, 2961 // the result is -inf. 2962 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2963 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2964 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2965 if (Bound[K].Lower[Bound[K].Direction]) 2966 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2967 else 2968 Sum = nullptr; 2969 } 2970 return Sum; 2971 } 2972 2973 2974 // Looks through all the bounds info and 2975 // computes the upper bound given the current direction settings 2976 // at each level. If the upper bound at any level is +inf, 2977 // the result is +inf. 2978 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2979 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2980 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2981 if (Bound[K].Upper[Bound[K].Direction]) 2982 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2983 else 2984 Sum = nullptr; 2985 } 2986 return Sum; 2987 } 2988 2989 2990 //===----------------------------------------------------------------------===// 2991 // Constraint manipulation for Delta test. 2992 2993 // Given a linear SCEV, 2994 // return the coefficient (the step) 2995 // corresponding to the specified loop. 2996 // If there isn't one, return 0. 2997 // For example, given a*i + b*j + c*k, finding the coefficient 2998 // corresponding to the j loop would yield b. 2999 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3000 const Loop *TargetLoop) const { 3001 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3002 if (!AddRec) 3003 return SE->getZero(Expr->getType()); 3004 if (AddRec->getLoop() == TargetLoop) 3005 return AddRec->getStepRecurrence(*SE); 3006 return findCoefficient(AddRec->getStart(), TargetLoop); 3007 } 3008 3009 3010 // Given a linear SCEV, 3011 // return the SCEV given by zeroing out the coefficient 3012 // corresponding to the specified loop. 3013 // For example, given a*i + b*j + c*k, zeroing the coefficient 3014 // corresponding to the j loop would yield a*i + c*k. 3015 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3016 const Loop *TargetLoop) const { 3017 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3018 if (!AddRec) 3019 return Expr; // ignore 3020 if (AddRec->getLoop() == TargetLoop) 3021 return AddRec->getStart(); 3022 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3023 AddRec->getStepRecurrence(*SE), 3024 AddRec->getLoop(), 3025 AddRec->getNoWrapFlags()); 3026 } 3027 3028 3029 // Given a linear SCEV Expr, 3030 // return the SCEV given by adding some Value to the 3031 // coefficient corresponding to the specified TargetLoop. 3032 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3033 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3034 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3035 const Loop *TargetLoop, 3036 const SCEV *Value) const { 3037 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3038 if (!AddRec) // create a new addRec 3039 return SE->getAddRecExpr(Expr, 3040 Value, 3041 TargetLoop, 3042 SCEV::FlagAnyWrap); // Worst case, with no info. 3043 if (AddRec->getLoop() == TargetLoop) { 3044 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3045 if (Sum->isZero()) 3046 return AddRec->getStart(); 3047 return SE->getAddRecExpr(AddRec->getStart(), 3048 Sum, 3049 AddRec->getLoop(), 3050 AddRec->getNoWrapFlags()); 3051 } 3052 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3053 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3054 return SE->getAddRecExpr( 3055 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3056 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3057 AddRec->getNoWrapFlags()); 3058 } 3059 3060 3061 // Review the constraints, looking for opportunities 3062 // to simplify a subscript pair (Src and Dst). 3063 // Return true if some simplification occurs. 3064 // If the simplification isn't exact (that is, if it is conservative 3065 // in terms of dependence), set consistent to false. 3066 // Corresponds to Figure 5 from the paper 3067 // 3068 // Practical Dependence Testing 3069 // Goff, Kennedy, Tseng 3070 // PLDI 1991 3071 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3072 SmallBitVector &Loops, 3073 SmallVectorImpl<Constraint> &Constraints, 3074 bool &Consistent) { 3075 bool Result = false; 3076 for (unsigned LI : Loops.set_bits()) { 3077 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3078 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3079 if (Constraints[LI].isDistance()) 3080 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3081 else if (Constraints[LI].isLine()) 3082 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3083 else if (Constraints[LI].isPoint()) 3084 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3085 } 3086 return Result; 3087 } 3088 3089 3090 // Attempt to propagate a distance 3091 // constraint into a subscript pair (Src and Dst). 3092 // Return true if some simplification occurs. 3093 // If the simplification isn't exact (that is, if it is conservative 3094 // in terms of dependence), set consistent to false. 3095 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3096 Constraint &CurConstraint, 3097 bool &Consistent) { 3098 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3099 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3100 const SCEV *A_K = findCoefficient(Src, CurLoop); 3101 if (A_K->isZero()) 3102 return false; 3103 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3104 Src = SE->getMinusSCEV(Src, DA_K); 3105 Src = zeroCoefficient(Src, CurLoop); 3106 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3107 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3108 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3109 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3110 if (!findCoefficient(Dst, CurLoop)->isZero()) 3111 Consistent = false; 3112 return true; 3113 } 3114 3115 3116 // Attempt to propagate a line 3117 // constraint into a subscript pair (Src and Dst). 3118 // Return true if some simplification occurs. 3119 // If the simplification isn't exact (that is, if it is conservative 3120 // in terms of dependence), set consistent to false. 3121 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3122 Constraint &CurConstraint, 3123 bool &Consistent) { 3124 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3125 const SCEV *A = CurConstraint.getA(); 3126 const SCEV *B = CurConstraint.getB(); 3127 const SCEV *C = CurConstraint.getC(); 3128 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3129 << "\n"); 3130 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3131 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3132 if (A->isZero()) { 3133 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3134 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3135 if (!Bconst || !Cconst) return false; 3136 APInt Beta = Bconst->getAPInt(); 3137 APInt Charlie = Cconst->getAPInt(); 3138 APInt CdivB = Charlie.sdiv(Beta); 3139 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3140 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3141 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3142 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3143 Dst = zeroCoefficient(Dst, CurLoop); 3144 if (!findCoefficient(Src, CurLoop)->isZero()) 3145 Consistent = false; 3146 } 3147 else if (B->isZero()) { 3148 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3149 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3150 if (!Aconst || !Cconst) return false; 3151 APInt Alpha = Aconst->getAPInt(); 3152 APInt Charlie = Cconst->getAPInt(); 3153 APInt CdivA = Charlie.sdiv(Alpha); 3154 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3155 const SCEV *A_K = findCoefficient(Src, CurLoop); 3156 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3157 Src = zeroCoefficient(Src, CurLoop); 3158 if (!findCoefficient(Dst, CurLoop)->isZero()) 3159 Consistent = false; 3160 } 3161 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3162 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3163 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3164 if (!Aconst || !Cconst) return false; 3165 APInt Alpha = Aconst->getAPInt(); 3166 APInt Charlie = Cconst->getAPInt(); 3167 APInt CdivA = Charlie.sdiv(Alpha); 3168 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3169 const SCEV *A_K = findCoefficient(Src, CurLoop); 3170 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3171 Src = zeroCoefficient(Src, CurLoop); 3172 Dst = addToCoefficient(Dst, CurLoop, A_K); 3173 if (!findCoefficient(Dst, CurLoop)->isZero()) 3174 Consistent = false; 3175 } 3176 else { 3177 // paper is incorrect here, or perhaps just misleading 3178 const SCEV *A_K = findCoefficient(Src, CurLoop); 3179 Src = SE->getMulExpr(Src, A); 3180 Dst = SE->getMulExpr(Dst, A); 3181 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3182 Src = zeroCoefficient(Src, CurLoop); 3183 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3184 if (!findCoefficient(Dst, CurLoop)->isZero()) 3185 Consistent = false; 3186 } 3187 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3188 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3189 return true; 3190 } 3191 3192 3193 // Attempt to propagate a point 3194 // constraint into a subscript pair (Src and Dst). 3195 // Return true if some simplification occurs. 3196 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3197 Constraint &CurConstraint) { 3198 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3199 const SCEV *A_K = findCoefficient(Src, CurLoop); 3200 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3201 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3202 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3203 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3204 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3205 Src = zeroCoefficient(Src, CurLoop); 3206 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3207 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3208 Dst = zeroCoefficient(Dst, CurLoop); 3209 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3210 return true; 3211 } 3212 3213 3214 // Update direction vector entry based on the current constraint. 3215 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3216 const Constraint &CurConstraint) const { 3217 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3218 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3219 if (CurConstraint.isAny()) 3220 ; // use defaults 3221 else if (CurConstraint.isDistance()) { 3222 // this one is consistent, the others aren't 3223 Level.Scalar = false; 3224 Level.Distance = CurConstraint.getD(); 3225 unsigned NewDirection = Dependence::DVEntry::NONE; 3226 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3227 NewDirection = Dependence::DVEntry::EQ; 3228 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3229 NewDirection |= Dependence::DVEntry::LT; 3230 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3231 NewDirection |= Dependence::DVEntry::GT; 3232 Level.Direction &= NewDirection; 3233 } 3234 else if (CurConstraint.isLine()) { 3235 Level.Scalar = false; 3236 Level.Distance = nullptr; 3237 // direction should be accurate 3238 } 3239 else if (CurConstraint.isPoint()) { 3240 Level.Scalar = false; 3241 Level.Distance = nullptr; 3242 unsigned NewDirection = Dependence::DVEntry::NONE; 3243 if (!isKnownPredicate(CmpInst::ICMP_NE, 3244 CurConstraint.getY(), 3245 CurConstraint.getX())) 3246 // if X may be = Y 3247 NewDirection |= Dependence::DVEntry::EQ; 3248 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3249 CurConstraint.getY(), 3250 CurConstraint.getX())) 3251 // if Y may be > X 3252 NewDirection |= Dependence::DVEntry::LT; 3253 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3254 CurConstraint.getY(), 3255 CurConstraint.getX())) 3256 // if Y may be < X 3257 NewDirection |= Dependence::DVEntry::GT; 3258 Level.Direction &= NewDirection; 3259 } 3260 else 3261 llvm_unreachable("constraint has unexpected kind"); 3262 } 3263 3264 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3265 /// source and destination array references are recurrences on a nested loop, 3266 /// this function flattens the nested recurrences into separate recurrences 3267 /// for each loop level. 3268 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3269 SmallVectorImpl<Subscript> &Pair) { 3270 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3271 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3272 Value *SrcPtr = getLoadStorePointerOperand(Src); 3273 Value *DstPtr = getLoadStorePointerOperand(Dst); 3274 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3275 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3276 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3277 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3278 const SCEVUnknown *SrcBase = 3279 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3280 const SCEVUnknown *DstBase = 3281 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3282 3283 if (!SrcBase || !DstBase || SrcBase != DstBase) 3284 return false; 3285 3286 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3287 3288 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3289 SrcSubscripts, DstSubscripts) && 3290 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3291 SrcSubscripts, DstSubscripts)) 3292 return false; 3293 3294 int Size = SrcSubscripts.size(); 3295 LLVM_DEBUG({ 3296 dbgs() << "\nSrcSubscripts: "; 3297 for (int I = 0; I < Size; I++) 3298 dbgs() << *SrcSubscripts[I]; 3299 dbgs() << "\nDstSubscripts: "; 3300 for (int I = 0; I < Size; I++) 3301 dbgs() << *DstSubscripts[I]; 3302 }); 3303 3304 // The delinearization transforms a single-subscript MIV dependence test into 3305 // a multi-subscript SIV dependence test that is easier to compute. So we 3306 // resize Pair to contain as many pairs of subscripts as the delinearization 3307 // has found, and then initialize the pairs following the delinearization. 3308 Pair.resize(Size); 3309 for (int I = 0; I < Size; ++I) { 3310 Pair[I].Src = SrcSubscripts[I]; 3311 Pair[I].Dst = DstSubscripts[I]; 3312 unifySubscriptType(&Pair[I]); 3313 } 3314 3315 return true; 3316 } 3317 3318 bool DependenceInfo::tryDelinearizeFixedSize( 3319 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3320 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3321 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3322 3323 Value *SrcPtr = getLoadStorePointerOperand(Src); 3324 Value *DstPtr = getLoadStorePointerOperand(Dst); 3325 const SCEVUnknown *SrcBase = 3326 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3327 const SCEVUnknown *DstBase = 3328 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3329 assert(SrcBase && DstBase && SrcBase == DstBase && 3330 "expected src and dst scev unknowns to be equal"); 3331 3332 // Check the simple case where the array dimensions are fixed size. 3333 auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr); 3334 auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr); 3335 if (!SrcGEP || !DstGEP) 3336 return false; 3337 3338 SmallVector<int, 4> SrcSizes, DstSizes; 3339 getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes); 3340 getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes); 3341 3342 // Check that the two size arrays are non-empty and equal in length and 3343 // value. 3344 if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || 3345 SrcSizes.size() != DstSizes.size() || 3346 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3347 SrcSubscripts.clear(); 3348 DstSubscripts.clear(); 3349 return false; 3350 } 3351 3352 Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts(); 3353 Value *DstBasePtr = DstGEP->getOperand(0)->stripPointerCasts(); 3354 3355 // Check that for identical base pointers we do not miss index offsets 3356 // that have been added before this GEP is applied. 3357 if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) { 3358 SrcSubscripts.clear(); 3359 DstSubscripts.clear(); 3360 return false; 3361 } 3362 3363 assert(SrcSubscripts.size() == DstSubscripts.size() && 3364 SrcSubscripts.size() == SrcSizes.size() + 1 && 3365 "Expected equal number of entries in the list of sizes and " 3366 "subscripts."); 3367 3368 // In general we cannot safely assume that the subscripts recovered from GEPs 3369 // are in the range of values defined for their corresponding array 3370 // dimensions. For example some C language usage/interpretation make it 3371 // impossible to verify this at compile-time. As such we can only delinearize 3372 // iff the subscripts are positive and are less than the range of the 3373 // dimension. 3374 if (!DisableDelinearizationChecks) { 3375 auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3376 SmallVectorImpl<const SCEV *> &Subscripts, 3377 Value *Ptr) { 3378 size_t SSize = Subscripts.size(); 3379 for (size_t I = 1; I < SSize; ++I) { 3380 const SCEV *S = Subscripts[I]; 3381 if (!isKnownNonNegative(S, Ptr)) 3382 return false; 3383 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3384 const SCEV *Range = SE->getConstant( 3385 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3386 if (!isKnownLessThan(S, Range)) 3387 return false; 3388 } 3389 } 3390 return true; 3391 }; 3392 3393 if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3394 !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { 3395 SrcSubscripts.clear(); 3396 DstSubscripts.clear(); 3397 return false; 3398 } 3399 } 3400 LLVM_DEBUG({ 3401 dbgs() << "Delinearized subscripts of fixed-size array\n" 3402 << "SrcGEP:" << *SrcGEP << "\n" 3403 << "DstGEP:" << *DstGEP << "\n"; 3404 }); 3405 return true; 3406 } 3407 3408 bool DependenceInfo::tryDelinearizeParametricSize( 3409 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3410 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3411 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3412 3413 Value *SrcPtr = getLoadStorePointerOperand(Src); 3414 Value *DstPtr = getLoadStorePointerOperand(Dst); 3415 const SCEVUnknown *SrcBase = 3416 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3417 const SCEVUnknown *DstBase = 3418 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3419 assert(SrcBase && DstBase && SrcBase == DstBase && 3420 "expected src and dst scev unknowns to be equal"); 3421 3422 const SCEV *ElementSize = SE->getElementSize(Src); 3423 if (ElementSize != SE->getElementSize(Dst)) 3424 return false; 3425 3426 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3427 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3428 3429 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3430 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3431 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3432 return false; 3433 3434 // First step: collect parametric terms in both array references. 3435 SmallVector<const SCEV *, 4> Terms; 3436 collectParametricTerms(*SE, SrcAR, Terms); 3437 collectParametricTerms(*SE, DstAR, Terms); 3438 3439 // Second step: find subscript sizes. 3440 SmallVector<const SCEV *, 4> Sizes; 3441 findArrayDimensions(*SE, Terms, Sizes, ElementSize); 3442 3443 // Third step: compute the access functions for each subscript. 3444 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); 3445 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); 3446 3447 // Fail when there is only a subscript: that's a linearized access function. 3448 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3449 SrcSubscripts.size() != DstSubscripts.size()) 3450 return false; 3451 3452 size_t Size = SrcSubscripts.size(); 3453 3454 // Statically check that the array bounds are in-range. The first subscript we 3455 // don't have a size for and it cannot overflow into another subscript, so is 3456 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3457 // and dst. 3458 // FIXME: It may be better to record these sizes and add them as constraints 3459 // to the dependency checks. 3460 if (!DisableDelinearizationChecks) 3461 for (size_t I = 1; I < Size; ++I) { 3462 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3463 return false; 3464 3465 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3466 return false; 3467 3468 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3469 return false; 3470 3471 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3472 return false; 3473 } 3474 3475 return true; 3476 } 3477 3478 //===----------------------------------------------------------------------===// 3479 3480 #ifndef NDEBUG 3481 // For debugging purposes, dump a small bit vector to dbgs(). 3482 static void dumpSmallBitVector(SmallBitVector &BV) { 3483 dbgs() << "{"; 3484 for (unsigned VI : BV.set_bits()) { 3485 dbgs() << VI; 3486 if (BV.find_next(VI) >= 0) 3487 dbgs() << ' '; 3488 } 3489 dbgs() << "}\n"; 3490 } 3491 #endif 3492 3493 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3494 FunctionAnalysisManager::Invalidator &Inv) { 3495 // Check if the analysis itself has been invalidated. 3496 auto PAC = PA.getChecker<DependenceAnalysis>(); 3497 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3498 return true; 3499 3500 // Check transitive dependencies. 3501 return Inv.invalidate<AAManager>(F, PA) || 3502 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3503 Inv.invalidate<LoopAnalysis>(F, PA); 3504 } 3505 3506 // depends - 3507 // Returns NULL if there is no dependence. 3508 // Otherwise, return a Dependence with as many details as possible. 3509 // Corresponds to Section 3.1 in the paper 3510 // 3511 // Practical Dependence Testing 3512 // Goff, Kennedy, Tseng 3513 // PLDI 1991 3514 // 3515 // Care is required to keep the routine below, getSplitIteration(), 3516 // up to date with respect to this routine. 3517 std::unique_ptr<Dependence> 3518 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3519 bool PossiblyLoopIndependent) { 3520 if (Src == Dst) 3521 PossiblyLoopIndependent = false; 3522 3523 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3524 // if both instructions don't reference memory, there's no dependence 3525 return nullptr; 3526 3527 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3528 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3529 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3530 return std::make_unique<Dependence>(Src, Dst); 3531 } 3532 3533 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3534 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3535 Value *SrcPtr = getLoadStorePointerOperand(Src); 3536 Value *DstPtr = getLoadStorePointerOperand(Dst); 3537 3538 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3539 MemoryLocation::get(Dst), 3540 MemoryLocation::get(Src))) { 3541 case AliasResult::MayAlias: 3542 case AliasResult::PartialAlias: 3543 // cannot analyse objects if we don't understand their aliasing. 3544 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3545 return std::make_unique<Dependence>(Src, Dst); 3546 case AliasResult::NoAlias: 3547 // If the objects noalias, they are distinct, accesses are independent. 3548 LLVM_DEBUG(dbgs() << "no alias\n"); 3549 return nullptr; 3550 case AliasResult::MustAlias: 3551 break; // The underlying objects alias; test accesses for dependence. 3552 } 3553 3554 // establish loop nesting levels 3555 establishNestingLevels(Src, Dst); 3556 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3557 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3558 3559 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3560 ++TotalArrayPairs; 3561 3562 unsigned Pairs = 1; 3563 SmallVector<Subscript, 2> Pair(Pairs); 3564 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3565 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3566 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3567 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3568 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { 3569 // If two pointers have different bases, trying to analyze indexes won't 3570 // work; we can't compare them to each other. This can happen, for example, 3571 // if one is produced by an LCSSA PHI node. 3572 // 3573 // We check this upfront so we don't crash in cases where getMinusSCEV() 3574 // returns a SCEVCouldNotCompute. 3575 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3576 return std::make_unique<Dependence>(Src, Dst); 3577 } 3578 Pair[0].Src = SrcSCEV; 3579 Pair[0].Dst = DstSCEV; 3580 3581 if (Delinearize) { 3582 if (tryDelinearize(Src, Dst, Pair)) { 3583 LLVM_DEBUG(dbgs() << " delinearized\n"); 3584 Pairs = Pair.size(); 3585 } 3586 } 3587 3588 for (unsigned P = 0; P < Pairs; ++P) { 3589 Pair[P].Loops.resize(MaxLevels + 1); 3590 Pair[P].GroupLoops.resize(MaxLevels + 1); 3591 Pair[P].Group.resize(Pairs); 3592 removeMatchingExtensions(&Pair[P]); 3593 Pair[P].Classification = 3594 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3595 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3596 Pair[P].Loops); 3597 Pair[P].GroupLoops = Pair[P].Loops; 3598 Pair[P].Group.set(P); 3599 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3600 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3601 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3602 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3603 LLVM_DEBUG(dbgs() << "\tloops = "); 3604 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3605 } 3606 3607 SmallBitVector Separable(Pairs); 3608 SmallBitVector Coupled(Pairs); 3609 3610 // Partition subscripts into separable and minimally-coupled groups 3611 // Algorithm in paper is algorithmically better; 3612 // this may be faster in practice. Check someday. 3613 // 3614 // Here's an example of how it works. Consider this code: 3615 // 3616 // for (i = ...) { 3617 // for (j = ...) { 3618 // for (k = ...) { 3619 // for (l = ...) { 3620 // for (m = ...) { 3621 // A[i][j][k][m] = ...; 3622 // ... = A[0][j][l][i + j]; 3623 // } 3624 // } 3625 // } 3626 // } 3627 // } 3628 // 3629 // There are 4 subscripts here: 3630 // 0 [i] and [0] 3631 // 1 [j] and [j] 3632 // 2 [k] and [l] 3633 // 3 [m] and [i + j] 3634 // 3635 // We've already classified each subscript pair as ZIV, SIV, etc., 3636 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3637 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3638 // and set Pair[P].Group = {P}. 3639 // 3640 // Src Dst Classification Loops GroupLoops Group 3641 // 0 [i] [0] SIV {1} {1} {0} 3642 // 1 [j] [j] SIV {2} {2} {1} 3643 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3644 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3645 // 3646 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3647 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3648 // 3649 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3650 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3651 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3652 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3653 // to either Separable or Coupled). 3654 // 3655 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3656 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3657 // so Pair[3].Group = {0, 1, 3} and Done = false. 3658 // 3659 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3660 // Since Done remains true, we add 2 to the set of Separable pairs. 3661 // 3662 // Finally, we consider 3. There's nothing to compare it with, 3663 // so Done remains true and we add it to the Coupled set. 3664 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3665 // 3666 // In the end, we've got 1 separable subscript and 1 coupled group. 3667 for (unsigned SI = 0; SI < Pairs; ++SI) { 3668 if (Pair[SI].Classification == Subscript::NonLinear) { 3669 // ignore these, but collect loops for later 3670 ++NonlinearSubscriptPairs; 3671 collectCommonLoops(Pair[SI].Src, 3672 LI->getLoopFor(Src->getParent()), 3673 Pair[SI].Loops); 3674 collectCommonLoops(Pair[SI].Dst, 3675 LI->getLoopFor(Dst->getParent()), 3676 Pair[SI].Loops); 3677 Result.Consistent = false; 3678 } else if (Pair[SI].Classification == Subscript::ZIV) { 3679 // always separable 3680 Separable.set(SI); 3681 } 3682 else { 3683 // SIV, RDIV, or MIV, so check for coupled group 3684 bool Done = true; 3685 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3686 SmallBitVector Intersection = Pair[SI].GroupLoops; 3687 Intersection &= Pair[SJ].GroupLoops; 3688 if (Intersection.any()) { 3689 // accumulate set of all the loops in group 3690 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3691 // accumulate set of all subscripts in group 3692 Pair[SJ].Group |= Pair[SI].Group; 3693 Done = false; 3694 } 3695 } 3696 if (Done) { 3697 if (Pair[SI].Group.count() == 1) { 3698 Separable.set(SI); 3699 ++SeparableSubscriptPairs; 3700 } 3701 else { 3702 Coupled.set(SI); 3703 ++CoupledSubscriptPairs; 3704 } 3705 } 3706 } 3707 } 3708 3709 LLVM_DEBUG(dbgs() << " Separable = "); 3710 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3711 LLVM_DEBUG(dbgs() << " Coupled = "); 3712 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3713 3714 Constraint NewConstraint; 3715 NewConstraint.setAny(SE); 3716 3717 // test separable subscripts 3718 for (unsigned SI : Separable.set_bits()) { 3719 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3720 switch (Pair[SI].Classification) { 3721 case Subscript::ZIV: 3722 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3723 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3724 return nullptr; 3725 break; 3726 case Subscript::SIV: { 3727 LLVM_DEBUG(dbgs() << ", SIV\n"); 3728 unsigned Level; 3729 const SCEV *SplitIter = nullptr; 3730 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3731 SplitIter)) 3732 return nullptr; 3733 break; 3734 } 3735 case Subscript::RDIV: 3736 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3737 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3738 return nullptr; 3739 break; 3740 case Subscript::MIV: 3741 LLVM_DEBUG(dbgs() << ", MIV\n"); 3742 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3743 return nullptr; 3744 break; 3745 default: 3746 llvm_unreachable("subscript has unexpected classification"); 3747 } 3748 } 3749 3750 if (Coupled.count()) { 3751 // test coupled subscript groups 3752 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3753 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3754 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3755 for (unsigned II = 0; II <= MaxLevels; ++II) 3756 Constraints[II].setAny(SE); 3757 for (unsigned SI : Coupled.set_bits()) { 3758 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3759 SmallBitVector Group(Pair[SI].Group); 3760 SmallBitVector Sivs(Pairs); 3761 SmallBitVector Mivs(Pairs); 3762 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3763 SmallVector<Subscript *, 4> PairsInGroup; 3764 for (unsigned SJ : Group.set_bits()) { 3765 LLVM_DEBUG(dbgs() << SJ << " "); 3766 if (Pair[SJ].Classification == Subscript::SIV) 3767 Sivs.set(SJ); 3768 else 3769 Mivs.set(SJ); 3770 PairsInGroup.push_back(&Pair[SJ]); 3771 } 3772 unifySubscriptType(PairsInGroup); 3773 LLVM_DEBUG(dbgs() << "}\n"); 3774 while (Sivs.any()) { 3775 bool Changed = false; 3776 for (unsigned SJ : Sivs.set_bits()) { 3777 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3778 // SJ is an SIV subscript that's part of the current coupled group 3779 unsigned Level; 3780 const SCEV *SplitIter = nullptr; 3781 LLVM_DEBUG(dbgs() << "SIV\n"); 3782 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3783 SplitIter)) 3784 return nullptr; 3785 ConstrainedLevels.set(Level); 3786 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3787 if (Constraints[Level].isEmpty()) { 3788 ++DeltaIndependence; 3789 return nullptr; 3790 } 3791 Changed = true; 3792 } 3793 Sivs.reset(SJ); 3794 } 3795 if (Changed) { 3796 // propagate, possibly creating new SIVs and ZIVs 3797 LLVM_DEBUG(dbgs() << " propagating\n"); 3798 LLVM_DEBUG(dbgs() << "\tMivs = "); 3799 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3800 for (unsigned SJ : Mivs.set_bits()) { 3801 // SJ is an MIV subscript that's part of the current coupled group 3802 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3803 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3804 Constraints, Result.Consistent)) { 3805 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3806 ++DeltaPropagations; 3807 Pair[SJ].Classification = 3808 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3809 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3810 Pair[SJ].Loops); 3811 switch (Pair[SJ].Classification) { 3812 case Subscript::ZIV: 3813 LLVM_DEBUG(dbgs() << "ZIV\n"); 3814 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3815 return nullptr; 3816 Mivs.reset(SJ); 3817 break; 3818 case Subscript::SIV: 3819 Sivs.set(SJ); 3820 Mivs.reset(SJ); 3821 break; 3822 case Subscript::RDIV: 3823 case Subscript::MIV: 3824 break; 3825 default: 3826 llvm_unreachable("bad subscript classification"); 3827 } 3828 } 3829 } 3830 } 3831 } 3832 3833 // test & propagate remaining RDIVs 3834 for (unsigned SJ : Mivs.set_bits()) { 3835 if (Pair[SJ].Classification == Subscript::RDIV) { 3836 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3837 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3838 return nullptr; 3839 // I don't yet understand how to propagate RDIV results 3840 Mivs.reset(SJ); 3841 } 3842 } 3843 3844 // test remaining MIVs 3845 // This code is temporary. 3846 // Better to somehow test all remaining subscripts simultaneously. 3847 for (unsigned SJ : Mivs.set_bits()) { 3848 if (Pair[SJ].Classification == Subscript::MIV) { 3849 LLVM_DEBUG(dbgs() << "MIV test\n"); 3850 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3851 return nullptr; 3852 } 3853 else 3854 llvm_unreachable("expected only MIV subscripts at this point"); 3855 } 3856 3857 // update Result.DV from constraint vector 3858 LLVM_DEBUG(dbgs() << " updating\n"); 3859 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3860 if (SJ > CommonLevels) 3861 break; 3862 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3863 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3864 return nullptr; 3865 } 3866 } 3867 } 3868 3869 // Make sure the Scalar flags are set correctly. 3870 SmallBitVector CompleteLoops(MaxLevels + 1); 3871 for (unsigned SI = 0; SI < Pairs; ++SI) 3872 CompleteLoops |= Pair[SI].Loops; 3873 for (unsigned II = 1; II <= CommonLevels; ++II) 3874 if (CompleteLoops[II]) 3875 Result.DV[II - 1].Scalar = false; 3876 3877 if (PossiblyLoopIndependent) { 3878 // Make sure the LoopIndependent flag is set correctly. 3879 // All directions must include equal, otherwise no 3880 // loop-independent dependence is possible. 3881 for (unsigned II = 1; II <= CommonLevels; ++II) { 3882 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3883 Result.LoopIndependent = false; 3884 break; 3885 } 3886 } 3887 } 3888 else { 3889 // On the other hand, if all directions are equal and there's no 3890 // loop-independent dependence possible, then no dependence exists. 3891 bool AllEqual = true; 3892 for (unsigned II = 1; II <= CommonLevels; ++II) { 3893 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3894 AllEqual = false; 3895 break; 3896 } 3897 } 3898 if (AllEqual) 3899 return nullptr; 3900 } 3901 3902 return std::make_unique<FullDependence>(std::move(Result)); 3903 } 3904 3905 //===----------------------------------------------------------------------===// 3906 // getSplitIteration - 3907 // Rather than spend rarely-used space recording the splitting iteration 3908 // during the Weak-Crossing SIV test, we re-compute it on demand. 3909 // The re-computation is basically a repeat of the entire dependence test, 3910 // though simplified since we know that the dependence exists. 3911 // It's tedious, since we must go through all propagations, etc. 3912 // 3913 // Care is required to keep this code up to date with respect to the routine 3914 // above, depends(). 3915 // 3916 // Generally, the dependence analyzer will be used to build 3917 // a dependence graph for a function (basically a map from instructions 3918 // to dependences). Looking for cycles in the graph shows us loops 3919 // that cannot be trivially vectorized/parallelized. 3920 // 3921 // We can try to improve the situation by examining all the dependences 3922 // that make up the cycle, looking for ones we can break. 3923 // Sometimes, peeling the first or last iteration of a loop will break 3924 // dependences, and we've got flags for those possibilities. 3925 // Sometimes, splitting a loop at some other iteration will do the trick, 3926 // and we've got a flag for that case. Rather than waste the space to 3927 // record the exact iteration (since we rarely know), we provide 3928 // a method that calculates the iteration. It's a drag that it must work 3929 // from scratch, but wonderful in that it's possible. 3930 // 3931 // Here's an example: 3932 // 3933 // for (i = 0; i < 10; i++) 3934 // A[i] = ... 3935 // ... = A[11 - i] 3936 // 3937 // There's a loop-carried flow dependence from the store to the load, 3938 // found by the weak-crossing SIV test. The dependence will have a flag, 3939 // indicating that the dependence can be broken by splitting the loop. 3940 // Calling getSplitIteration will return 5. 3941 // Splitting the loop breaks the dependence, like so: 3942 // 3943 // for (i = 0; i <= 5; i++) 3944 // A[i] = ... 3945 // ... = A[11 - i] 3946 // for (i = 6; i < 10; i++) 3947 // A[i] = ... 3948 // ... = A[11 - i] 3949 // 3950 // breaks the dependence and allows us to vectorize/parallelize 3951 // both loops. 3952 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3953 unsigned SplitLevel) { 3954 assert(Dep.isSplitable(SplitLevel) && 3955 "Dep should be splitable at SplitLevel"); 3956 Instruction *Src = Dep.getSrc(); 3957 Instruction *Dst = Dep.getDst(); 3958 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3959 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3960 assert(isLoadOrStore(Src)); 3961 assert(isLoadOrStore(Dst)); 3962 Value *SrcPtr = getLoadStorePointerOperand(Src); 3963 Value *DstPtr = getLoadStorePointerOperand(Dst); 3964 assert(underlyingObjectsAlias( 3965 AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), 3966 MemoryLocation::get(Src)) == AliasResult::MustAlias); 3967 3968 // establish loop nesting levels 3969 establishNestingLevels(Src, Dst); 3970 3971 FullDependence Result(Src, Dst, false, CommonLevels); 3972 3973 unsigned Pairs = 1; 3974 SmallVector<Subscript, 2> Pair(Pairs); 3975 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3976 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3977 Pair[0].Src = SrcSCEV; 3978 Pair[0].Dst = DstSCEV; 3979 3980 if (Delinearize) { 3981 if (tryDelinearize(Src, Dst, Pair)) { 3982 LLVM_DEBUG(dbgs() << " delinearized\n"); 3983 Pairs = Pair.size(); 3984 } 3985 } 3986 3987 for (unsigned P = 0; P < Pairs; ++P) { 3988 Pair[P].Loops.resize(MaxLevels + 1); 3989 Pair[P].GroupLoops.resize(MaxLevels + 1); 3990 Pair[P].Group.resize(Pairs); 3991 removeMatchingExtensions(&Pair[P]); 3992 Pair[P].Classification = 3993 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3994 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3995 Pair[P].Loops); 3996 Pair[P].GroupLoops = Pair[P].Loops; 3997 Pair[P].Group.set(P); 3998 } 3999 4000 SmallBitVector Separable(Pairs); 4001 SmallBitVector Coupled(Pairs); 4002 4003 // partition subscripts into separable and minimally-coupled groups 4004 for (unsigned SI = 0; SI < Pairs; ++SI) { 4005 if (Pair[SI].Classification == Subscript::NonLinear) { 4006 // ignore these, but collect loops for later 4007 collectCommonLoops(Pair[SI].Src, 4008 LI->getLoopFor(Src->getParent()), 4009 Pair[SI].Loops); 4010 collectCommonLoops(Pair[SI].Dst, 4011 LI->getLoopFor(Dst->getParent()), 4012 Pair[SI].Loops); 4013 Result.Consistent = false; 4014 } 4015 else if (Pair[SI].Classification == Subscript::ZIV) 4016 Separable.set(SI); 4017 else { 4018 // SIV, RDIV, or MIV, so check for coupled group 4019 bool Done = true; 4020 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4021 SmallBitVector Intersection = Pair[SI].GroupLoops; 4022 Intersection &= Pair[SJ].GroupLoops; 4023 if (Intersection.any()) { 4024 // accumulate set of all the loops in group 4025 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4026 // accumulate set of all subscripts in group 4027 Pair[SJ].Group |= Pair[SI].Group; 4028 Done = false; 4029 } 4030 } 4031 if (Done) { 4032 if (Pair[SI].Group.count() == 1) 4033 Separable.set(SI); 4034 else 4035 Coupled.set(SI); 4036 } 4037 } 4038 } 4039 4040 Constraint NewConstraint; 4041 NewConstraint.setAny(SE); 4042 4043 // test separable subscripts 4044 for (unsigned SI : Separable.set_bits()) { 4045 switch (Pair[SI].Classification) { 4046 case Subscript::SIV: { 4047 unsigned Level; 4048 const SCEV *SplitIter = nullptr; 4049 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4050 Result, NewConstraint, SplitIter); 4051 if (Level == SplitLevel) { 4052 assert(SplitIter != nullptr); 4053 return SplitIter; 4054 } 4055 break; 4056 } 4057 case Subscript::ZIV: 4058 case Subscript::RDIV: 4059 case Subscript::MIV: 4060 break; 4061 default: 4062 llvm_unreachable("subscript has unexpected classification"); 4063 } 4064 } 4065 4066 if (Coupled.count()) { 4067 // test coupled subscript groups 4068 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4069 for (unsigned II = 0; II <= MaxLevels; ++II) 4070 Constraints[II].setAny(SE); 4071 for (unsigned SI : Coupled.set_bits()) { 4072 SmallBitVector Group(Pair[SI].Group); 4073 SmallBitVector Sivs(Pairs); 4074 SmallBitVector Mivs(Pairs); 4075 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4076 for (unsigned SJ : Group.set_bits()) { 4077 if (Pair[SJ].Classification == Subscript::SIV) 4078 Sivs.set(SJ); 4079 else 4080 Mivs.set(SJ); 4081 } 4082 while (Sivs.any()) { 4083 bool Changed = false; 4084 for (unsigned SJ : Sivs.set_bits()) { 4085 // SJ is an SIV subscript that's part of the current coupled group 4086 unsigned Level; 4087 const SCEV *SplitIter = nullptr; 4088 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 4089 Result, NewConstraint, SplitIter); 4090 if (Level == SplitLevel && SplitIter) 4091 return SplitIter; 4092 ConstrainedLevels.set(Level); 4093 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4094 Changed = true; 4095 Sivs.reset(SJ); 4096 } 4097 if (Changed) { 4098 // propagate, possibly creating new SIVs and ZIVs 4099 for (unsigned SJ : Mivs.set_bits()) { 4100 // SJ is an MIV subscript that's part of the current coupled group 4101 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 4102 Pair[SJ].Loops, Constraints, Result.Consistent)) { 4103 Pair[SJ].Classification = 4104 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 4105 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 4106 Pair[SJ].Loops); 4107 switch (Pair[SJ].Classification) { 4108 case Subscript::ZIV: 4109 Mivs.reset(SJ); 4110 break; 4111 case Subscript::SIV: 4112 Sivs.set(SJ); 4113 Mivs.reset(SJ); 4114 break; 4115 case Subscript::RDIV: 4116 case Subscript::MIV: 4117 break; 4118 default: 4119 llvm_unreachable("bad subscript classification"); 4120 } 4121 } 4122 } 4123 } 4124 } 4125 } 4126 } 4127 llvm_unreachable("somehow reached end of routine"); 4128 return nullptr; 4129 } 4130