1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/Statistic.h" 54 #include "llvm/Analysis/AliasAnalysis.h" 55 #include "llvm/Analysis/Delinearization.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/IR/InstIterator.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/InitializePasses.h" 63 #include "llvm/Support/CommandLine.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/raw_ostream.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "da" 71 72 //===----------------------------------------------------------------------===// 73 // statistics 74 75 STATISTIC(TotalArrayPairs, "Array pairs tested"); 76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 79 STATISTIC(ZIVapplications, "ZIV applications"); 80 STATISTIC(ZIVindependence, "ZIV independence"); 81 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 83 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 87 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 89 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 97 STATISTIC(DeltaApplications, "Delta applications"); 98 STATISTIC(DeltaSuccesses, "Delta successes"); 99 STATISTIC(DeltaIndependence, "Delta independence"); 100 STATISTIC(DeltaPropagations, "Delta propagations"); 101 STATISTIC(GCDapplications, "GCD applications"); 102 STATISTIC(GCDsuccesses, "GCD successes"); 103 STATISTIC(GCDindependence, "GCD independence"); 104 STATISTIC(BanerjeeApplications, "Banerjee applications"); 105 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 106 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 107 108 static cl::opt<bool> 109 Delinearize("da-delinearize", cl::init(true), cl::Hidden, 110 cl::desc("Try to delinearize array references.")); 111 static cl::opt<bool> DisableDelinearizationChecks( 112 "da-disable-delinearization-checks", cl::Hidden, 113 cl::desc( 114 "Disable checks that try to statically verify validity of " 115 "delinearized subscripts. Enabling this option may result in incorrect " 116 "dependence vectors for languages that allow the subscript of one " 117 "dimension to underflow or overflow into another dimension.")); 118 119 static cl::opt<unsigned> MIVMaxLevelThreshold( 120 "da-miv-max-level-threshold", cl::init(7), cl::Hidden, 121 cl::desc("Maximum depth allowed for the recursive algorithm used to " 122 "explore MIV direction vectors.")); 123 124 //===----------------------------------------------------------------------===// 125 // basics 126 127 DependenceAnalysis::Result 128 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 129 auto &AA = FAM.getResult<AAManager>(F); 130 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 131 auto &LI = FAM.getResult<LoopAnalysis>(F); 132 return DependenceInfo(&F, &AA, &SE, &LI); 133 } 134 135 AnalysisKey DependenceAnalysis::Key; 136 137 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 138 "Dependence Analysis", true, true) 139 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 140 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 141 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 142 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 143 true, true) 144 145 char DependenceAnalysisWrapperPass::ID = 0; 146 147 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 148 : FunctionPass(ID) { 149 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 150 } 151 152 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 153 return new DependenceAnalysisWrapperPass(); 154 } 155 156 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 157 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 158 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 159 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 160 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 161 return false; 162 } 163 164 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 165 166 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 167 168 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 169 AU.setPreservesAll(); 170 AU.addRequiredTransitive<AAResultsWrapperPass>(); 171 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 172 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 173 } 174 175 // Used to test the dependence analyzer. 176 // Looks through the function, noting instructions that may access memory. 177 // Calls depends() on every possible pair and prints out the result. 178 // Ignores all other instructions. 179 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 180 auto *F = DA->getFunction(); 181 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 182 ++SrcI) { 183 if (SrcI->mayReadOrWriteMemory()) { 184 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 185 DstI != DstE; ++DstI) { 186 if (DstI->mayReadOrWriteMemory()) { 187 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 188 OS << " da analyze - "; 189 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 190 D->dump(OS); 191 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 192 if (D->isSplitable(Level)) { 193 OS << " da analyze - split level = " << Level; 194 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 195 OS << "!\n"; 196 } 197 } 198 } 199 else 200 OS << "none!\n"; 201 } 202 } 203 } 204 } 205 } 206 207 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 208 const Module *) const { 209 dumpExampleDependence(OS, info.get()); 210 } 211 212 PreservedAnalyses 213 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 214 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 215 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 216 return PreservedAnalyses::all(); 217 } 218 219 //===----------------------------------------------------------------------===// 220 // Dependence methods 221 222 // Returns true if this is an input dependence. 223 bool Dependence::isInput() const { 224 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 225 } 226 227 228 // Returns true if this is an output dependence. 229 bool Dependence::isOutput() const { 230 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 231 } 232 233 234 // Returns true if this is an flow (aka true) dependence. 235 bool Dependence::isFlow() const { 236 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 237 } 238 239 240 // Returns true if this is an anti dependence. 241 bool Dependence::isAnti() const { 242 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 243 } 244 245 246 // Returns true if a particular level is scalar; that is, 247 // if no subscript in the source or destination mention the induction 248 // variable associated with the loop at this level. 249 // Leave this out of line, so it will serve as a virtual method anchor 250 bool Dependence::isScalar(unsigned level) const { 251 return false; 252 } 253 254 255 //===----------------------------------------------------------------------===// 256 // FullDependence methods 257 258 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 259 bool PossiblyLoopIndependent, 260 unsigned CommonLevels) 261 : Dependence(Source, Destination), Levels(CommonLevels), 262 LoopIndependent(PossiblyLoopIndependent) { 263 Consistent = true; 264 if (CommonLevels) 265 DV = std::make_unique<DVEntry[]>(CommonLevels); 266 } 267 268 // The rest are simple getters that hide the implementation. 269 270 // getDirection - Returns the direction associated with a particular level. 271 unsigned FullDependence::getDirection(unsigned Level) const { 272 assert(0 < Level && Level <= Levels && "Level out of range"); 273 return DV[Level - 1].Direction; 274 } 275 276 277 // Returns the distance (or NULL) associated with a particular level. 278 const SCEV *FullDependence::getDistance(unsigned Level) const { 279 assert(0 < Level && Level <= Levels && "Level out of range"); 280 return DV[Level - 1].Distance; 281 } 282 283 284 // Returns true if a particular level is scalar; that is, 285 // if no subscript in the source or destination mention the induction 286 // variable associated with the loop at this level. 287 bool FullDependence::isScalar(unsigned Level) const { 288 assert(0 < Level && Level <= Levels && "Level out of range"); 289 return DV[Level - 1].Scalar; 290 } 291 292 293 // Returns true if peeling the first iteration from this loop 294 // will break this dependence. 295 bool FullDependence::isPeelFirst(unsigned Level) const { 296 assert(0 < Level && Level <= Levels && "Level out of range"); 297 return DV[Level - 1].PeelFirst; 298 } 299 300 301 // Returns true if peeling the last iteration from this loop 302 // will break this dependence. 303 bool FullDependence::isPeelLast(unsigned Level) const { 304 assert(0 < Level && Level <= Levels && "Level out of range"); 305 return DV[Level - 1].PeelLast; 306 } 307 308 309 // Returns true if splitting this loop will break the dependence. 310 bool FullDependence::isSplitable(unsigned Level) const { 311 assert(0 < Level && Level <= Levels && "Level out of range"); 312 return DV[Level - 1].Splitable; 313 } 314 315 316 //===----------------------------------------------------------------------===// 317 // DependenceInfo::Constraint methods 318 319 // If constraint is a point <X, Y>, returns X. 320 // Otherwise assert. 321 const SCEV *DependenceInfo::Constraint::getX() const { 322 assert(Kind == Point && "Kind should be Point"); 323 return A; 324 } 325 326 327 // If constraint is a point <X, Y>, returns Y. 328 // Otherwise assert. 329 const SCEV *DependenceInfo::Constraint::getY() const { 330 assert(Kind == Point && "Kind should be Point"); 331 return B; 332 } 333 334 335 // If constraint is a line AX + BY = C, returns A. 336 // Otherwise assert. 337 const SCEV *DependenceInfo::Constraint::getA() const { 338 assert((Kind == Line || Kind == Distance) && 339 "Kind should be Line (or Distance)"); 340 return A; 341 } 342 343 344 // If constraint is a line AX + BY = C, returns B. 345 // Otherwise assert. 346 const SCEV *DependenceInfo::Constraint::getB() const { 347 assert((Kind == Line || Kind == Distance) && 348 "Kind should be Line (or Distance)"); 349 return B; 350 } 351 352 353 // If constraint is a line AX + BY = C, returns C. 354 // Otherwise assert. 355 const SCEV *DependenceInfo::Constraint::getC() const { 356 assert((Kind == Line || Kind == Distance) && 357 "Kind should be Line (or Distance)"); 358 return C; 359 } 360 361 362 // If constraint is a distance, returns D. 363 // Otherwise assert. 364 const SCEV *DependenceInfo::Constraint::getD() const { 365 assert(Kind == Distance && "Kind should be Distance"); 366 return SE->getNegativeSCEV(C); 367 } 368 369 370 // Returns the loop associated with this constraint. 371 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 372 assert((Kind == Distance || Kind == Line || Kind == Point) && 373 "Kind should be Distance, Line, or Point"); 374 return AssociatedLoop; 375 } 376 377 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 378 const Loop *CurLoop) { 379 Kind = Point; 380 A = X; 381 B = Y; 382 AssociatedLoop = CurLoop; 383 } 384 385 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 386 const SCEV *CC, const Loop *CurLoop) { 387 Kind = Line; 388 A = AA; 389 B = BB; 390 C = CC; 391 AssociatedLoop = CurLoop; 392 } 393 394 void DependenceInfo::Constraint::setDistance(const SCEV *D, 395 const Loop *CurLoop) { 396 Kind = Distance; 397 A = SE->getOne(D->getType()); 398 B = SE->getNegativeSCEV(A); 399 C = SE->getNegativeSCEV(D); 400 AssociatedLoop = CurLoop; 401 } 402 403 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 404 405 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 406 SE = NewSE; 407 Kind = Any; 408 } 409 410 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 411 // For debugging purposes. Dumps the constraint out to OS. 412 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 413 if (isEmpty()) 414 OS << " Empty\n"; 415 else if (isAny()) 416 OS << " Any\n"; 417 else if (isPoint()) 418 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 419 else if (isDistance()) 420 OS << " Distance is " << *getD() << 421 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 422 else if (isLine()) 423 OS << " Line is " << *getA() << "*X + " << 424 *getB() << "*Y = " << *getC() << "\n"; 425 else 426 llvm_unreachable("unknown constraint type in Constraint::dump"); 427 } 428 #endif 429 430 431 // Updates X with the intersection 432 // of the Constraints X and Y. Returns true if X has changed. 433 // Corresponds to Figure 4 from the paper 434 // 435 // Practical Dependence Testing 436 // Goff, Kennedy, Tseng 437 // PLDI 1991 438 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 439 ++DeltaApplications; 440 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 441 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 442 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 443 assert(!Y->isPoint() && "Y must not be a Point"); 444 if (X->isAny()) { 445 if (Y->isAny()) 446 return false; 447 *X = *Y; 448 return true; 449 } 450 if (X->isEmpty()) 451 return false; 452 if (Y->isEmpty()) { 453 X->setEmpty(); 454 return true; 455 } 456 457 if (X->isDistance() && Y->isDistance()) { 458 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 459 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 460 return false; 461 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 462 X->setEmpty(); 463 ++DeltaSuccesses; 464 return true; 465 } 466 // Hmmm, interesting situation. 467 // I guess if either is constant, keep it and ignore the other. 468 if (isa<SCEVConstant>(Y->getD())) { 469 *X = *Y; 470 return true; 471 } 472 return false; 473 } 474 475 // At this point, the pseudo-code in Figure 4 of the paper 476 // checks if (X->isPoint() && Y->isPoint()). 477 // This case can't occur in our implementation, 478 // since a Point can only arise as the result of intersecting 479 // two Line constraints, and the right-hand value, Y, is never 480 // the result of an intersection. 481 assert(!(X->isPoint() && Y->isPoint()) && 482 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 483 484 if (X->isLine() && Y->isLine()) { 485 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 486 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 487 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 488 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 489 // slopes are equal, so lines are parallel 490 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 491 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 492 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 493 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 494 return false; 495 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 496 X->setEmpty(); 497 ++DeltaSuccesses; 498 return true; 499 } 500 return false; 501 } 502 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 503 // slopes differ, so lines intersect 504 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 505 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 506 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 507 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 508 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 509 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 510 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 511 const SCEVConstant *C1A2_C2A1 = 512 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 513 const SCEVConstant *C1B2_C2B1 = 514 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 515 const SCEVConstant *A1B2_A2B1 = 516 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 517 const SCEVConstant *A2B1_A1B2 = 518 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 519 if (!C1B2_C2B1 || !C1A2_C2A1 || 520 !A1B2_A2B1 || !A2B1_A1B2) 521 return false; 522 APInt Xtop = C1B2_C2B1->getAPInt(); 523 APInt Xbot = A1B2_A2B1->getAPInt(); 524 APInt Ytop = C1A2_C2A1->getAPInt(); 525 APInt Ybot = A2B1_A1B2->getAPInt(); 526 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 527 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 528 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 529 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 530 APInt Xq = Xtop; // these need to be initialized, even 531 APInt Xr = Xtop; // though they're just going to be overwritten 532 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 533 APInt Yq = Ytop; 534 APInt Yr = Ytop; 535 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 536 if (Xr != 0 || Yr != 0) { 537 X->setEmpty(); 538 ++DeltaSuccesses; 539 return true; 540 } 541 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 542 if (Xq.slt(0) || Yq.slt(0)) { 543 X->setEmpty(); 544 ++DeltaSuccesses; 545 return true; 546 } 547 if (const SCEVConstant *CUB = 548 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 549 const APInt &UpperBound = CUB->getAPInt(); 550 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 551 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 552 X->setEmpty(); 553 ++DeltaSuccesses; 554 return true; 555 } 556 } 557 X->setPoint(SE->getConstant(Xq), 558 SE->getConstant(Yq), 559 X->getAssociatedLoop()); 560 ++DeltaSuccesses; 561 return true; 562 } 563 return false; 564 } 565 566 // if (X->isLine() && Y->isPoint()) This case can't occur. 567 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 568 569 if (X->isPoint() && Y->isLine()) { 570 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 571 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 572 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 573 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 574 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 575 return false; 576 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 577 X->setEmpty(); 578 ++DeltaSuccesses; 579 return true; 580 } 581 return false; 582 } 583 584 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 585 return false; 586 } 587 588 589 //===----------------------------------------------------------------------===// 590 // DependenceInfo methods 591 592 // For debugging purposes. Dumps a dependence to OS. 593 void Dependence::dump(raw_ostream &OS) const { 594 bool Splitable = false; 595 if (isConfused()) 596 OS << "confused"; 597 else { 598 if (isConsistent()) 599 OS << "consistent "; 600 if (isFlow()) 601 OS << "flow"; 602 else if (isOutput()) 603 OS << "output"; 604 else if (isAnti()) 605 OS << "anti"; 606 else if (isInput()) 607 OS << "input"; 608 unsigned Levels = getLevels(); 609 OS << " ["; 610 for (unsigned II = 1; II <= Levels; ++II) { 611 if (isSplitable(II)) 612 Splitable = true; 613 if (isPeelFirst(II)) 614 OS << 'p'; 615 const SCEV *Distance = getDistance(II); 616 if (Distance) 617 OS << *Distance; 618 else if (isScalar(II)) 619 OS << "S"; 620 else { 621 unsigned Direction = getDirection(II); 622 if (Direction == DVEntry::ALL) 623 OS << "*"; 624 else { 625 if (Direction & DVEntry::LT) 626 OS << "<"; 627 if (Direction & DVEntry::EQ) 628 OS << "="; 629 if (Direction & DVEntry::GT) 630 OS << ">"; 631 } 632 } 633 if (isPeelLast(II)) 634 OS << 'p'; 635 if (II < Levels) 636 OS << " "; 637 } 638 if (isLoopIndependent()) 639 OS << "|<"; 640 OS << "]"; 641 if (Splitable) 642 OS << " splitable"; 643 } 644 OS << "!\n"; 645 } 646 647 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 648 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 649 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 650 // Otherwise the underlying objects are checked to see if they point to 651 // different identifiable objects. 652 static AliasResult underlyingObjectsAlias(AAResults *AA, 653 const DataLayout &DL, 654 const MemoryLocation &LocA, 655 const MemoryLocation &LocB) { 656 // Check the original locations (minus size) for noalias, which can happen for 657 // tbaa, incompatible underlying object locations, etc. 658 MemoryLocation LocAS = 659 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 660 MemoryLocation LocBS = 661 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 662 if (AA->isNoAlias(LocAS, LocBS)) 663 return AliasResult::NoAlias; 664 665 // Check the underlying objects are the same 666 const Value *AObj = getUnderlyingObject(LocA.Ptr); 667 const Value *BObj = getUnderlyingObject(LocB.Ptr); 668 669 // If the underlying objects are the same, they must alias 670 if (AObj == BObj) 671 return AliasResult::MustAlias; 672 673 // We may have hit the recursion limit for underlying objects, or have 674 // underlying objects where we don't know they will alias. 675 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 676 return AliasResult::MayAlias; 677 678 // Otherwise we know the objects are different and both identified objects so 679 // must not alias. 680 return AliasResult::NoAlias; 681 } 682 683 684 // Returns true if the load or store can be analyzed. Atomic and volatile 685 // operations have properties which this analysis does not understand. 686 static 687 bool isLoadOrStore(const Instruction *I) { 688 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 689 return LI->isUnordered(); 690 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 691 return SI->isUnordered(); 692 return false; 693 } 694 695 696 // Examines the loop nesting of the Src and Dst 697 // instructions and establishes their shared loops. Sets the variables 698 // CommonLevels, SrcLevels, and MaxLevels. 699 // The source and destination instructions needn't be contained in the same 700 // loop. The routine establishNestingLevels finds the level of most deeply 701 // nested loop that contains them both, CommonLevels. An instruction that's 702 // not contained in a loop is at level = 0. MaxLevels is equal to the level 703 // of the source plus the level of the destination, minus CommonLevels. 704 // This lets us allocate vectors MaxLevels in length, with room for every 705 // distinct loop referenced in both the source and destination subscripts. 706 // The variable SrcLevels is the nesting depth of the source instruction. 707 // It's used to help calculate distinct loops referenced by the destination. 708 // Here's the map from loops to levels: 709 // 0 - unused 710 // 1 - outermost common loop 711 // ... - other common loops 712 // CommonLevels - innermost common loop 713 // ... - loops containing Src but not Dst 714 // SrcLevels - innermost loop containing Src but not Dst 715 // ... - loops containing Dst but not Src 716 // MaxLevels - innermost loops containing Dst but not Src 717 // Consider the follow code fragment: 718 // for (a = ...) { 719 // for (b = ...) { 720 // for (c = ...) { 721 // for (d = ...) { 722 // A[] = ...; 723 // } 724 // } 725 // for (e = ...) { 726 // for (f = ...) { 727 // for (g = ...) { 728 // ... = A[]; 729 // } 730 // } 731 // } 732 // } 733 // } 734 // If we're looking at the possibility of a dependence between the store 735 // to A (the Src) and the load from A (the Dst), we'll note that they 736 // have 2 loops in common, so CommonLevels will equal 2 and the direction 737 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 738 // A map from loop names to loop numbers would look like 739 // a - 1 740 // b - 2 = CommonLevels 741 // c - 3 742 // d - 4 = SrcLevels 743 // e - 5 744 // f - 6 745 // g - 7 = MaxLevels 746 void DependenceInfo::establishNestingLevels(const Instruction *Src, 747 const Instruction *Dst) { 748 const BasicBlock *SrcBlock = Src->getParent(); 749 const BasicBlock *DstBlock = Dst->getParent(); 750 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 751 unsigned DstLevel = LI->getLoopDepth(DstBlock); 752 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 753 const Loop *DstLoop = LI->getLoopFor(DstBlock); 754 SrcLevels = SrcLevel; 755 MaxLevels = SrcLevel + DstLevel; 756 while (SrcLevel > DstLevel) { 757 SrcLoop = SrcLoop->getParentLoop(); 758 SrcLevel--; 759 } 760 while (DstLevel > SrcLevel) { 761 DstLoop = DstLoop->getParentLoop(); 762 DstLevel--; 763 } 764 while (SrcLoop != DstLoop) { 765 SrcLoop = SrcLoop->getParentLoop(); 766 DstLoop = DstLoop->getParentLoop(); 767 SrcLevel--; 768 } 769 CommonLevels = SrcLevel; 770 MaxLevels -= CommonLevels; 771 } 772 773 774 // Given one of the loops containing the source, return 775 // its level index in our numbering scheme. 776 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 777 return SrcLoop->getLoopDepth(); 778 } 779 780 781 // Given one of the loops containing the destination, 782 // return its level index in our numbering scheme. 783 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 784 unsigned D = DstLoop->getLoopDepth(); 785 if (D > CommonLevels) 786 // This tries to make sure that we assign unique numbers to src and dst when 787 // the memory accesses reside in different loops that have the same depth. 788 return D - CommonLevels + SrcLevels; 789 else 790 return D; 791 } 792 793 794 // Returns true if Expression is loop invariant in LoopNest. 795 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 796 const Loop *LoopNest) const { 797 // Unlike ScalarEvolution::isLoopInvariant() we consider an access outside of 798 // any loop as invariant, because we only consier expression evaluation at a 799 // specific position (where the array access takes place), and not across the 800 // entire function. 801 if (!LoopNest) 802 return true; 803 804 // If the expression is invariant in the outermost loop of the loop nest, it 805 // is invariant anywhere in the loop nest. 806 const Loop *OutermostLoop = LoopNest; 807 while (OutermostLoop->getParentLoop()) 808 OutermostLoop = OutermostLoop->getParentLoop(); 809 return SE->isLoopInvariant(Expression, OutermostLoop); 810 } 811 812 813 814 // Finds the set of loops from the LoopNest that 815 // have a level <= CommonLevels and are referred to by the SCEV Expression. 816 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 817 const Loop *LoopNest, 818 SmallBitVector &Loops) const { 819 while (LoopNest) { 820 unsigned Level = LoopNest->getLoopDepth(); 821 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 822 Loops.set(Level); 823 LoopNest = LoopNest->getParentLoop(); 824 } 825 } 826 827 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 828 829 unsigned widestWidthSeen = 0; 830 Type *widestType; 831 832 // Go through each pair and find the widest bit to which we need 833 // to extend all of them. 834 for (Subscript *Pair : Pairs) { 835 const SCEV *Src = Pair->Src; 836 const SCEV *Dst = Pair->Dst; 837 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 838 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 839 if (SrcTy == nullptr || DstTy == nullptr) { 840 assert(SrcTy == DstTy && "This function only unify integer types and " 841 "expect Src and Dst share the same type " 842 "otherwise."); 843 continue; 844 } 845 if (SrcTy->getBitWidth() > widestWidthSeen) { 846 widestWidthSeen = SrcTy->getBitWidth(); 847 widestType = SrcTy; 848 } 849 if (DstTy->getBitWidth() > widestWidthSeen) { 850 widestWidthSeen = DstTy->getBitWidth(); 851 widestType = DstTy; 852 } 853 } 854 855 856 assert(widestWidthSeen > 0); 857 858 // Now extend each pair to the widest seen. 859 for (Subscript *Pair : Pairs) { 860 const SCEV *Src = Pair->Src; 861 const SCEV *Dst = Pair->Dst; 862 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 863 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 864 if (SrcTy == nullptr || DstTy == nullptr) { 865 assert(SrcTy == DstTy && "This function only unify integer types and " 866 "expect Src and Dst share the same type " 867 "otherwise."); 868 continue; 869 } 870 if (SrcTy->getBitWidth() < widestWidthSeen) 871 // Sign-extend Src to widestType 872 Pair->Src = SE->getSignExtendExpr(Src, widestType); 873 if (DstTy->getBitWidth() < widestWidthSeen) { 874 // Sign-extend Dst to widestType 875 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 876 } 877 } 878 } 879 880 // removeMatchingExtensions - Examines a subscript pair. 881 // If the source and destination are identically sign (or zero) 882 // extended, it strips off the extension in an effect to simplify 883 // the actual analysis. 884 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 885 const SCEV *Src = Pair->Src; 886 const SCEV *Dst = Pair->Dst; 887 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 888 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 889 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 890 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 891 const SCEV *SrcCastOp = SrcCast->getOperand(); 892 const SCEV *DstCastOp = DstCast->getOperand(); 893 if (SrcCastOp->getType() == DstCastOp->getType()) { 894 Pair->Src = SrcCastOp; 895 Pair->Dst = DstCastOp; 896 } 897 } 898 } 899 900 // Examine the scev and return true iff it's affine. 901 // Collect any loops mentioned in the set of "Loops". 902 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 903 SmallBitVector &Loops, bool IsSrc) { 904 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 905 if (!AddRec) 906 return isLoopInvariant(Expr, LoopNest); 907 908 // The AddRec must depend on one of the containing loops. Otherwise, 909 // mapSrcLoop and mapDstLoop return indices outside the intended range. This 910 // can happen when a subscript in one loop references an IV from a sibling 911 // loop that could not be replaced with a concrete exit value by 912 // getSCEVAtScope. 913 const Loop *L = LoopNest; 914 while (L && AddRec->getLoop() != L) 915 L = L->getParentLoop(); 916 if (!L) 917 return false; 918 919 const SCEV *Start = AddRec->getStart(); 920 const SCEV *Step = AddRec->getStepRecurrence(*SE); 921 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 922 if (!isa<SCEVCouldNotCompute>(UB)) { 923 if (SE->getTypeSizeInBits(Start->getType()) < 924 SE->getTypeSizeInBits(UB->getType())) { 925 if (!AddRec->getNoWrapFlags()) 926 return false; 927 } 928 } 929 if (!isLoopInvariant(Step, LoopNest)) 930 return false; 931 if (IsSrc) 932 Loops.set(mapSrcLoop(AddRec->getLoop())); 933 else 934 Loops.set(mapDstLoop(AddRec->getLoop())); 935 return checkSubscript(Start, LoopNest, Loops, IsSrc); 936 } 937 938 // Examine the scev and return true iff it's linear. 939 // Collect any loops mentioned in the set of "Loops". 940 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 941 SmallBitVector &Loops) { 942 return checkSubscript(Src, LoopNest, Loops, true); 943 } 944 945 // Examine the scev and return true iff it's linear. 946 // Collect any loops mentioned in the set of "Loops". 947 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 948 SmallBitVector &Loops) { 949 return checkSubscript(Dst, LoopNest, Loops, false); 950 } 951 952 953 // Examines the subscript pair (the Src and Dst SCEVs) 954 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 955 // Collects the associated loops in a set. 956 DependenceInfo::Subscript::ClassificationKind 957 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 958 const SCEV *Dst, const Loop *DstLoopNest, 959 SmallBitVector &Loops) { 960 SmallBitVector SrcLoops(MaxLevels + 1); 961 SmallBitVector DstLoops(MaxLevels + 1); 962 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 963 return Subscript::NonLinear; 964 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 965 return Subscript::NonLinear; 966 Loops = SrcLoops; 967 Loops |= DstLoops; 968 unsigned N = Loops.count(); 969 if (N == 0) 970 return Subscript::ZIV; 971 if (N == 1) 972 return Subscript::SIV; 973 if (N == 2 && (SrcLoops.count() == 0 || 974 DstLoops.count() == 0 || 975 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 976 return Subscript::RDIV; 977 return Subscript::MIV; 978 } 979 980 981 // A wrapper around SCEV::isKnownPredicate. 982 // Looks for cases where we're interested in comparing for equality. 983 // If both X and Y have been identically sign or zero extended, 984 // it strips off the (confusing) extensions before invoking 985 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 986 // will be similarly updated. 987 // 988 // If SCEV::isKnownPredicate can't prove the predicate, 989 // we try simple subtraction, which seems to help in some cases 990 // involving symbolics. 991 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 992 const SCEV *Y) const { 993 if (Pred == CmpInst::ICMP_EQ || 994 Pred == CmpInst::ICMP_NE) { 995 if ((isa<SCEVSignExtendExpr>(X) && 996 isa<SCEVSignExtendExpr>(Y)) || 997 (isa<SCEVZeroExtendExpr>(X) && 998 isa<SCEVZeroExtendExpr>(Y))) { 999 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 1000 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 1001 const SCEV *Xop = CX->getOperand(); 1002 const SCEV *Yop = CY->getOperand(); 1003 if (Xop->getType() == Yop->getType()) { 1004 X = Xop; 1005 Y = Yop; 1006 } 1007 } 1008 } 1009 if (SE->isKnownPredicate(Pred, X, Y)) 1010 return true; 1011 // If SE->isKnownPredicate can't prove the condition, 1012 // we try the brute-force approach of subtracting 1013 // and testing the difference. 1014 // By testing with SE->isKnownPredicate first, we avoid 1015 // the possibility of overflow when the arguments are constants. 1016 const SCEV *Delta = SE->getMinusSCEV(X, Y); 1017 switch (Pred) { 1018 case CmpInst::ICMP_EQ: 1019 return Delta->isZero(); 1020 case CmpInst::ICMP_NE: 1021 return SE->isKnownNonZero(Delta); 1022 case CmpInst::ICMP_SGE: 1023 return SE->isKnownNonNegative(Delta); 1024 case CmpInst::ICMP_SLE: 1025 return SE->isKnownNonPositive(Delta); 1026 case CmpInst::ICMP_SGT: 1027 return SE->isKnownPositive(Delta); 1028 case CmpInst::ICMP_SLT: 1029 return SE->isKnownNegative(Delta); 1030 default: 1031 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1032 } 1033 } 1034 1035 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1036 /// with some extra checking if S is an AddRec and we can prove less-than using 1037 /// the loop bounds. 1038 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1039 // First unify to the same type 1040 auto *SType = dyn_cast<IntegerType>(S->getType()); 1041 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1042 if (!SType || !SizeType) 1043 return false; 1044 Type *MaxType = 1045 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1046 S = SE->getTruncateOrZeroExtend(S, MaxType); 1047 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1048 1049 // Special check for addrecs using BE taken count 1050 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1051 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1052 if (AddRec->isAffine()) { 1053 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1054 if (!isa<SCEVCouldNotCompute>(BECount)) { 1055 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1056 if (SE->isKnownNegative(Limit)) 1057 return true; 1058 } 1059 } 1060 } 1061 1062 // Check using normal isKnownNegative 1063 const SCEV *LimitedBound = 1064 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1065 return SE->isKnownNegative(LimitedBound); 1066 } 1067 1068 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1069 bool Inbounds = false; 1070 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1071 Inbounds = SrcGEP->isInBounds(); 1072 if (Inbounds) { 1073 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1074 if (AddRec->isAffine()) { 1075 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1076 // If both parts are NonNegative, the end result will be NonNegative 1077 if (SE->isKnownNonNegative(AddRec->getStart()) && 1078 SE->isKnownNonNegative(AddRec->getOperand(1))) 1079 return true; 1080 } 1081 } 1082 } 1083 1084 return SE->isKnownNonNegative(S); 1085 } 1086 1087 // All subscripts are all the same type. 1088 // Loop bound may be smaller (e.g., a char). 1089 // Should zero extend loop bound, since it's always >= 0. 1090 // This routine collects upper bound and extends or truncates if needed. 1091 // Truncating is safe when subscripts are known not to wrap. Cases without 1092 // nowrap flags should have been rejected earlier. 1093 // Return null if no bound available. 1094 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1095 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1096 const SCEV *UB = SE->getBackedgeTakenCount(L); 1097 return SE->getTruncateOrZeroExtend(UB, T); 1098 } 1099 return nullptr; 1100 } 1101 1102 1103 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1104 // If the cast fails, returns NULL. 1105 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1106 Type *T) const { 1107 if (const SCEV *UB = collectUpperBound(L, T)) 1108 return dyn_cast<SCEVConstant>(UB); 1109 return nullptr; 1110 } 1111 1112 1113 // testZIV - 1114 // When we have a pair of subscripts of the form [c1] and [c2], 1115 // where c1 and c2 are both loop invariant, we attack it using 1116 // the ZIV test. Basically, we test by comparing the two values, 1117 // but there are actually three possible results: 1118 // 1) the values are equal, so there's a dependence 1119 // 2) the values are different, so there's no dependence 1120 // 3) the values might be equal, so we have to assume a dependence. 1121 // 1122 // Return true if dependence disproved. 1123 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1124 FullDependence &Result) const { 1125 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1126 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1127 ++ZIVapplications; 1128 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1129 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1130 return false; // provably dependent 1131 } 1132 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1133 LLVM_DEBUG(dbgs() << " provably independent\n"); 1134 ++ZIVindependence; 1135 return true; // provably independent 1136 } 1137 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1138 Result.Consistent = false; 1139 return false; // possibly dependent 1140 } 1141 1142 1143 // strongSIVtest - 1144 // From the paper, Practical Dependence Testing, Section 4.2.1 1145 // 1146 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1147 // where i is an induction variable, c1 and c2 are loop invariant, 1148 // and a is a constant, we can solve it exactly using the Strong SIV test. 1149 // 1150 // Can prove independence. Failing that, can compute distance (and direction). 1151 // In the presence of symbolic terms, we can sometimes make progress. 1152 // 1153 // If there's a dependence, 1154 // 1155 // c1 + a*i = c2 + a*i' 1156 // 1157 // The dependence distance is 1158 // 1159 // d = i' - i = (c1 - c2)/a 1160 // 1161 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1162 // loop's upper bound. If a dependence exists, the dependence direction is 1163 // defined as 1164 // 1165 // { < if d > 0 1166 // direction = { = if d = 0 1167 // { > if d < 0 1168 // 1169 // Return true if dependence disproved. 1170 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1171 const SCEV *DstConst, const Loop *CurLoop, 1172 unsigned Level, FullDependence &Result, 1173 Constraint &NewConstraint) const { 1174 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1175 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1176 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1177 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1178 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1179 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1180 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1181 ++StrongSIVapplications; 1182 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1183 Level--; 1184 1185 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1186 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1187 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1188 1189 // check that |Delta| < iteration count 1190 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1191 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1192 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1193 const SCEV *AbsDelta = 1194 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1195 const SCEV *AbsCoeff = 1196 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1197 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1198 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1199 // Distance greater than trip count - no dependence 1200 ++StrongSIVindependence; 1201 ++StrongSIVsuccesses; 1202 return true; 1203 } 1204 } 1205 1206 // Can we compute distance? 1207 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1208 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1209 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1210 APInt Distance = ConstDelta; // these need to be initialized 1211 APInt Remainder = ConstDelta; 1212 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1213 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1214 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1215 // Make sure Coeff divides Delta exactly 1216 if (Remainder != 0) { 1217 // Coeff doesn't divide Distance, no dependence 1218 ++StrongSIVindependence; 1219 ++StrongSIVsuccesses; 1220 return true; 1221 } 1222 Result.DV[Level].Distance = SE->getConstant(Distance); 1223 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1224 if (Distance.sgt(0)) 1225 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1226 else if (Distance.slt(0)) 1227 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1228 else 1229 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1230 ++StrongSIVsuccesses; 1231 } 1232 else if (Delta->isZero()) { 1233 // since 0/X == 0 1234 Result.DV[Level].Distance = Delta; 1235 NewConstraint.setDistance(Delta, CurLoop); 1236 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1237 ++StrongSIVsuccesses; 1238 } 1239 else { 1240 if (Coeff->isOne()) { 1241 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1242 Result.DV[Level].Distance = Delta; // since X/1 == X 1243 NewConstraint.setDistance(Delta, CurLoop); 1244 } 1245 else { 1246 Result.Consistent = false; 1247 NewConstraint.setLine(Coeff, 1248 SE->getNegativeSCEV(Coeff), 1249 SE->getNegativeSCEV(Delta), CurLoop); 1250 } 1251 1252 // maybe we can get a useful direction 1253 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1254 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1255 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1256 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1257 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1258 // The double negatives above are confusing. 1259 // It helps to read !SE->isKnownNonZero(Delta) 1260 // as "Delta might be Zero" 1261 unsigned NewDirection = Dependence::DVEntry::NONE; 1262 if ((DeltaMaybePositive && CoeffMaybePositive) || 1263 (DeltaMaybeNegative && CoeffMaybeNegative)) 1264 NewDirection = Dependence::DVEntry::LT; 1265 if (DeltaMaybeZero) 1266 NewDirection |= Dependence::DVEntry::EQ; 1267 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1268 (DeltaMaybePositive && CoeffMaybeNegative)) 1269 NewDirection |= Dependence::DVEntry::GT; 1270 if (NewDirection < Result.DV[Level].Direction) 1271 ++StrongSIVsuccesses; 1272 Result.DV[Level].Direction &= NewDirection; 1273 } 1274 return false; 1275 } 1276 1277 1278 // weakCrossingSIVtest - 1279 // From the paper, Practical Dependence Testing, Section 4.2.2 1280 // 1281 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1282 // where i is an induction variable, c1 and c2 are loop invariant, 1283 // and a is a constant, we can solve it exactly using the 1284 // Weak-Crossing SIV test. 1285 // 1286 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1287 // the two lines, where i = i', yielding 1288 // 1289 // c1 + a*i = c2 - a*i 1290 // 2a*i = c2 - c1 1291 // i = (c2 - c1)/2a 1292 // 1293 // If i < 0, there is no dependence. 1294 // If i > upperbound, there is no dependence. 1295 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1296 // If i = upperbound, there's a dependence with distance = 0. 1297 // If i is integral, there's a dependence (all directions). 1298 // If the non-integer part = 1/2, there's a dependence (<> directions). 1299 // Otherwise, there's no dependence. 1300 // 1301 // Can prove independence. Failing that, 1302 // can sometimes refine the directions. 1303 // Can determine iteration for splitting. 1304 // 1305 // Return true if dependence disproved. 1306 bool DependenceInfo::weakCrossingSIVtest( 1307 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1308 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1309 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1310 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1311 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1312 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1313 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1314 ++WeakCrossingSIVapplications; 1315 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1316 Level--; 1317 Result.Consistent = false; 1318 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1319 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1320 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1321 if (Delta->isZero()) { 1322 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1323 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1324 ++WeakCrossingSIVsuccesses; 1325 if (!Result.DV[Level].Direction) { 1326 ++WeakCrossingSIVindependence; 1327 return true; 1328 } 1329 Result.DV[Level].Distance = Delta; // = 0 1330 return false; 1331 } 1332 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1333 if (!ConstCoeff) 1334 return false; 1335 1336 Result.DV[Level].Splitable = true; 1337 if (SE->isKnownNegative(ConstCoeff)) { 1338 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1339 assert(ConstCoeff && 1340 "dynamic cast of negative of ConstCoeff should yield constant"); 1341 Delta = SE->getNegativeSCEV(Delta); 1342 } 1343 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1344 1345 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1346 SplitIter = SE->getUDivExpr( 1347 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1348 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1349 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1350 1351 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1352 if (!ConstDelta) 1353 return false; 1354 1355 // We're certain that ConstCoeff > 0; therefore, 1356 // if Delta < 0, then no dependence. 1357 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1358 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1359 if (SE->isKnownNegative(Delta)) { 1360 // No dependence, Delta < 0 1361 ++WeakCrossingSIVindependence; 1362 ++WeakCrossingSIVsuccesses; 1363 return true; 1364 } 1365 1366 // We're certain that Delta > 0 and ConstCoeff > 0. 1367 // Check Delta/(2*ConstCoeff) against upper loop bound 1368 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1369 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1370 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1371 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1372 ConstantTwo); 1373 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1374 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1375 // Delta too big, no dependence 1376 ++WeakCrossingSIVindependence; 1377 ++WeakCrossingSIVsuccesses; 1378 return true; 1379 } 1380 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1381 // i = i' = UB 1382 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1383 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1384 ++WeakCrossingSIVsuccesses; 1385 if (!Result.DV[Level].Direction) { 1386 ++WeakCrossingSIVindependence; 1387 return true; 1388 } 1389 Result.DV[Level].Splitable = false; 1390 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1391 return false; 1392 } 1393 } 1394 1395 // check that Coeff divides Delta 1396 APInt APDelta = ConstDelta->getAPInt(); 1397 APInt APCoeff = ConstCoeff->getAPInt(); 1398 APInt Distance = APDelta; // these need to be initialzed 1399 APInt Remainder = APDelta; 1400 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1401 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1402 if (Remainder != 0) { 1403 // Coeff doesn't divide Delta, no dependence 1404 ++WeakCrossingSIVindependence; 1405 ++WeakCrossingSIVsuccesses; 1406 return true; 1407 } 1408 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1409 1410 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1411 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1412 Remainder = Distance.srem(Two); 1413 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1414 if (Remainder != 0) { 1415 // Equal direction isn't possible 1416 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1417 ++WeakCrossingSIVsuccesses; 1418 } 1419 return false; 1420 } 1421 1422 1423 // Kirch's algorithm, from 1424 // 1425 // Optimizing Supercompilers for Supercomputers 1426 // Michael Wolfe 1427 // MIT Press, 1989 1428 // 1429 // Program 2.1, page 29. 1430 // Computes the GCD of AM and BM. 1431 // Also finds a solution to the equation ax - by = gcd(a, b). 1432 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1433 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1434 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1435 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1436 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1437 APInt G0 = AM.abs(); 1438 APInt G1 = BM.abs(); 1439 APInt Q = G0; // these need to be initialized 1440 APInt R = G0; 1441 APInt::sdivrem(G0, G1, Q, R); 1442 while (R != 0) { 1443 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1444 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1445 G0 = G1; G1 = R; 1446 APInt::sdivrem(G0, G1, Q, R); 1447 } 1448 G = G1; 1449 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1450 X = AM.slt(0) ? -A1 : A1; 1451 Y = BM.slt(0) ? B1 : -B1; 1452 1453 // make sure gcd divides Delta 1454 R = Delta.srem(G); 1455 if (R != 0) 1456 return true; // gcd doesn't divide Delta, no dependence 1457 Q = Delta.sdiv(G); 1458 return false; 1459 } 1460 1461 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1462 APInt Q = A; // these need to be initialized 1463 APInt R = A; 1464 APInt::sdivrem(A, B, Q, R); 1465 if (R == 0) 1466 return Q; 1467 if ((A.sgt(0) && B.sgt(0)) || 1468 (A.slt(0) && B.slt(0))) 1469 return Q; 1470 else 1471 return Q - 1; 1472 } 1473 1474 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1475 APInt Q = A; // these need to be initialized 1476 APInt R = A; 1477 APInt::sdivrem(A, B, Q, R); 1478 if (R == 0) 1479 return Q; 1480 if ((A.sgt(0) && B.sgt(0)) || 1481 (A.slt(0) && B.slt(0))) 1482 return Q + 1; 1483 else 1484 return Q; 1485 } 1486 1487 // exactSIVtest - 1488 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1489 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1490 // and a2 are constant, we can solve it exactly using an algorithm developed 1491 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1492 // 1493 // Dependence Analysis for Supercomputing 1494 // Utpal Banerjee 1495 // Kluwer Academic Publishers, 1988 1496 // 1497 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1498 // so use them if possible. They're also a bit better with symbolics and, 1499 // in the case of the strong SIV test, can compute Distances. 1500 // 1501 // Return true if dependence disproved. 1502 // 1503 // This is a modified version of the original Banerjee algorithm. The original 1504 // only tested whether Dst depends on Src. This algorithm extends that and 1505 // returns all the dependencies that exist between Dst and Src. 1506 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1507 const SCEV *SrcConst, const SCEV *DstConst, 1508 const Loop *CurLoop, unsigned Level, 1509 FullDependence &Result, 1510 Constraint &NewConstraint) const { 1511 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1512 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1513 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1514 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1515 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1516 ++ExactSIVapplications; 1517 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1518 Level--; 1519 Result.Consistent = false; 1520 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1521 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1522 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1523 CurLoop); 1524 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1525 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1526 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1527 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1528 return false; 1529 1530 // find gcd 1531 APInt G, X, Y; 1532 APInt AM = ConstSrcCoeff->getAPInt(); 1533 APInt BM = ConstDstCoeff->getAPInt(); 1534 APInt CM = ConstDelta->getAPInt(); 1535 unsigned Bits = AM.getBitWidth(); 1536 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1537 // gcd doesn't divide Delta, no dependence 1538 ++ExactSIVindependence; 1539 ++ExactSIVsuccesses; 1540 return true; 1541 } 1542 1543 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1544 1545 // since SCEV construction normalizes, LM = 0 1546 APInt UM(Bits, 1, true); 1547 bool UMValid = false; 1548 // UM is perhaps unavailable, let's check 1549 if (const SCEVConstant *CUB = 1550 collectConstantUpperBound(CurLoop, Delta->getType())) { 1551 UM = CUB->getAPInt(); 1552 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1553 UMValid = true; 1554 } 1555 1556 APInt TU(APInt::getSignedMaxValue(Bits)); 1557 APInt TL(APInt::getSignedMinValue(Bits)); 1558 APInt TC = CM.sdiv(G); 1559 APInt TX = X * TC; 1560 APInt TY = Y * TC; 1561 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1562 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1563 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1564 1565 SmallVector<APInt, 2> TLVec, TUVec; 1566 APInt TB = BM.sdiv(G); 1567 if (TB.sgt(0)) { 1568 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1569 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1570 // New bound check - modification to Banerjee's e3 check 1571 if (UMValid) { 1572 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1573 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1574 } 1575 } else { 1576 TUVec.push_back(floorOfQuotient(-TX, TB)); 1577 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1578 // New bound check - modification to Banerjee's e3 check 1579 if (UMValid) { 1580 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1581 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1582 } 1583 } 1584 1585 APInt TA = AM.sdiv(G); 1586 if (TA.sgt(0)) { 1587 if (UMValid) { 1588 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1589 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1590 } 1591 // New bound check - modification to Banerjee's e3 check 1592 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1593 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1594 } else { 1595 if (UMValid) { 1596 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1597 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1598 } 1599 // New bound check - modification to Banerjee's e3 check 1600 TUVec.push_back(floorOfQuotient(-TY, TA)); 1601 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1602 } 1603 1604 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1605 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1606 1607 if (TLVec.empty() || TUVec.empty()) 1608 return false; 1609 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1610 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1611 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1612 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1613 1614 if (TL.sgt(TU)) { 1615 ++ExactSIVindependence; 1616 ++ExactSIVsuccesses; 1617 return true; 1618 } 1619 1620 // explore directions 1621 unsigned NewDirection = Dependence::DVEntry::NONE; 1622 APInt LowerDistance, UpperDistance; 1623 if (TA.sgt(TB)) { 1624 LowerDistance = (TY - TX) + (TA - TB) * TL; 1625 UpperDistance = (TY - TX) + (TA - TB) * TU; 1626 } else { 1627 LowerDistance = (TY - TX) + (TA - TB) * TU; 1628 UpperDistance = (TY - TX) + (TA - TB) * TL; 1629 } 1630 1631 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1632 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1633 1634 APInt Zero(Bits, 0, true); 1635 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1636 NewDirection |= Dependence::DVEntry::EQ; 1637 ++ExactSIVsuccesses; 1638 } 1639 if (LowerDistance.slt(0)) { 1640 NewDirection |= Dependence::DVEntry::GT; 1641 ++ExactSIVsuccesses; 1642 } 1643 if (UpperDistance.sgt(0)) { 1644 NewDirection |= Dependence::DVEntry::LT; 1645 ++ExactSIVsuccesses; 1646 } 1647 1648 // finished 1649 Result.DV[Level].Direction &= NewDirection; 1650 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1651 ++ExactSIVindependence; 1652 LLVM_DEBUG(dbgs() << "\t Result = "); 1653 LLVM_DEBUG(Result.dump(dbgs())); 1654 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1655 } 1656 1657 1658 // Return true if the divisor evenly divides the dividend. 1659 static 1660 bool isRemainderZero(const SCEVConstant *Dividend, 1661 const SCEVConstant *Divisor) { 1662 const APInt &ConstDividend = Dividend->getAPInt(); 1663 const APInt &ConstDivisor = Divisor->getAPInt(); 1664 return ConstDividend.srem(ConstDivisor) == 0; 1665 } 1666 1667 1668 // weakZeroSrcSIVtest - 1669 // From the paper, Practical Dependence Testing, Section 4.2.2 1670 // 1671 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1672 // where i is an induction variable, c1 and c2 are loop invariant, 1673 // and a is a constant, we can solve it exactly using the 1674 // Weak-Zero SIV test. 1675 // 1676 // Given 1677 // 1678 // c1 = c2 + a*i 1679 // 1680 // we get 1681 // 1682 // (c1 - c2)/a = i 1683 // 1684 // If i is not an integer, there's no dependence. 1685 // If i < 0 or > UB, there's no dependence. 1686 // If i = 0, the direction is >= and peeling the 1687 // 1st iteration will break the dependence. 1688 // If i = UB, the direction is <= and peeling the 1689 // last iteration will break the dependence. 1690 // Otherwise, the direction is *. 1691 // 1692 // Can prove independence. Failing that, we can sometimes refine 1693 // the directions. Can sometimes show that first or last 1694 // iteration carries all the dependences (so worth peeling). 1695 // 1696 // (see also weakZeroDstSIVtest) 1697 // 1698 // Return true if dependence disproved. 1699 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1700 const SCEV *SrcConst, 1701 const SCEV *DstConst, 1702 const Loop *CurLoop, unsigned Level, 1703 FullDependence &Result, 1704 Constraint &NewConstraint) const { 1705 // For the WeakSIV test, it's possible the loop isn't common to 1706 // the Src and Dst loops. If it isn't, then there's no need to 1707 // record a direction. 1708 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1709 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1710 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1711 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1712 ++WeakZeroSIVapplications; 1713 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1714 Level--; 1715 Result.Consistent = false; 1716 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1717 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1718 CurLoop); 1719 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1720 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1721 if (Level < CommonLevels) { 1722 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1723 Result.DV[Level].PeelFirst = true; 1724 ++WeakZeroSIVsuccesses; 1725 } 1726 return false; // dependences caused by first iteration 1727 } 1728 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1729 if (!ConstCoeff) 1730 return false; 1731 const SCEV *AbsCoeff = 1732 SE->isKnownNegative(ConstCoeff) ? 1733 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1734 const SCEV *NewDelta = 1735 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1736 1737 // check that Delta/SrcCoeff < iteration count 1738 // really check NewDelta < count*AbsCoeff 1739 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1740 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1741 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1742 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1743 ++WeakZeroSIVindependence; 1744 ++WeakZeroSIVsuccesses; 1745 return true; 1746 } 1747 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1748 // dependences caused by last iteration 1749 if (Level < CommonLevels) { 1750 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1751 Result.DV[Level].PeelLast = true; 1752 ++WeakZeroSIVsuccesses; 1753 } 1754 return false; 1755 } 1756 } 1757 1758 // check that Delta/SrcCoeff >= 0 1759 // really check that NewDelta >= 0 1760 if (SE->isKnownNegative(NewDelta)) { 1761 // No dependence, newDelta < 0 1762 ++WeakZeroSIVindependence; 1763 ++WeakZeroSIVsuccesses; 1764 return true; 1765 } 1766 1767 // if SrcCoeff doesn't divide Delta, then no dependence 1768 if (isa<SCEVConstant>(Delta) && 1769 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1770 ++WeakZeroSIVindependence; 1771 ++WeakZeroSIVsuccesses; 1772 return true; 1773 } 1774 return false; 1775 } 1776 1777 1778 // weakZeroDstSIVtest - 1779 // From the paper, Practical Dependence Testing, Section 4.2.2 1780 // 1781 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1782 // where i is an induction variable, c1 and c2 are loop invariant, 1783 // and a is a constant, we can solve it exactly using the 1784 // Weak-Zero SIV test. 1785 // 1786 // Given 1787 // 1788 // c1 + a*i = c2 1789 // 1790 // we get 1791 // 1792 // i = (c2 - c1)/a 1793 // 1794 // If i is not an integer, there's no dependence. 1795 // If i < 0 or > UB, there's no dependence. 1796 // If i = 0, the direction is <= and peeling the 1797 // 1st iteration will break the dependence. 1798 // If i = UB, the direction is >= and peeling the 1799 // last iteration will break the dependence. 1800 // Otherwise, the direction is *. 1801 // 1802 // Can prove independence. Failing that, we can sometimes refine 1803 // the directions. Can sometimes show that first or last 1804 // iteration carries all the dependences (so worth peeling). 1805 // 1806 // (see also weakZeroSrcSIVtest) 1807 // 1808 // Return true if dependence disproved. 1809 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1810 const SCEV *SrcConst, 1811 const SCEV *DstConst, 1812 const Loop *CurLoop, unsigned Level, 1813 FullDependence &Result, 1814 Constraint &NewConstraint) const { 1815 // For the WeakSIV test, it's possible the loop isn't common to the 1816 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1817 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1818 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1819 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1820 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1821 ++WeakZeroSIVapplications; 1822 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1823 Level--; 1824 Result.Consistent = false; 1825 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1826 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1827 CurLoop); 1828 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1829 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1830 if (Level < CommonLevels) { 1831 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1832 Result.DV[Level].PeelFirst = true; 1833 ++WeakZeroSIVsuccesses; 1834 } 1835 return false; // dependences caused by first iteration 1836 } 1837 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1838 if (!ConstCoeff) 1839 return false; 1840 const SCEV *AbsCoeff = 1841 SE->isKnownNegative(ConstCoeff) ? 1842 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1843 const SCEV *NewDelta = 1844 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1845 1846 // check that Delta/SrcCoeff < iteration count 1847 // really check NewDelta < count*AbsCoeff 1848 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1849 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1850 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1851 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1852 ++WeakZeroSIVindependence; 1853 ++WeakZeroSIVsuccesses; 1854 return true; 1855 } 1856 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1857 // dependences caused by last iteration 1858 if (Level < CommonLevels) { 1859 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1860 Result.DV[Level].PeelLast = true; 1861 ++WeakZeroSIVsuccesses; 1862 } 1863 return false; 1864 } 1865 } 1866 1867 // check that Delta/SrcCoeff >= 0 1868 // really check that NewDelta >= 0 1869 if (SE->isKnownNegative(NewDelta)) { 1870 // No dependence, newDelta < 0 1871 ++WeakZeroSIVindependence; 1872 ++WeakZeroSIVsuccesses; 1873 return true; 1874 } 1875 1876 // if SrcCoeff doesn't divide Delta, then no dependence 1877 if (isa<SCEVConstant>(Delta) && 1878 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1879 ++WeakZeroSIVindependence; 1880 ++WeakZeroSIVsuccesses; 1881 return true; 1882 } 1883 return false; 1884 } 1885 1886 1887 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1888 // Things of the form [c1 + a*i] and [c2 + b*j], 1889 // where i and j are induction variable, c1 and c2 are loop invariant, 1890 // and a and b are constants. 1891 // Returns true if any possible dependence is disproved. 1892 // Marks the result as inconsistent. 1893 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1894 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1895 const SCEV *SrcConst, const SCEV *DstConst, 1896 const Loop *SrcLoop, const Loop *DstLoop, 1897 FullDependence &Result) const { 1898 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1899 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1900 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1901 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1902 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1903 ++ExactRDIVapplications; 1904 Result.Consistent = false; 1905 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1906 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1907 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1908 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1909 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1910 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1911 return false; 1912 1913 // find gcd 1914 APInt G, X, Y; 1915 APInt AM = ConstSrcCoeff->getAPInt(); 1916 APInt BM = ConstDstCoeff->getAPInt(); 1917 APInt CM = ConstDelta->getAPInt(); 1918 unsigned Bits = AM.getBitWidth(); 1919 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1920 // gcd doesn't divide Delta, no dependence 1921 ++ExactRDIVindependence; 1922 return true; 1923 } 1924 1925 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1926 1927 // since SCEV construction seems to normalize, LM = 0 1928 APInt SrcUM(Bits, 1, true); 1929 bool SrcUMvalid = false; 1930 // SrcUM is perhaps unavailable, let's check 1931 if (const SCEVConstant *UpperBound = 1932 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1933 SrcUM = UpperBound->getAPInt(); 1934 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1935 SrcUMvalid = true; 1936 } 1937 1938 APInt DstUM(Bits, 1, true); 1939 bool DstUMvalid = false; 1940 // UM is perhaps unavailable, let's check 1941 if (const SCEVConstant *UpperBound = 1942 collectConstantUpperBound(DstLoop, Delta->getType())) { 1943 DstUM = UpperBound->getAPInt(); 1944 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1945 DstUMvalid = true; 1946 } 1947 1948 APInt TU(APInt::getSignedMaxValue(Bits)); 1949 APInt TL(APInt::getSignedMinValue(Bits)); 1950 APInt TC = CM.sdiv(G); 1951 APInt TX = X * TC; 1952 APInt TY = Y * TC; 1953 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1954 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1955 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1956 1957 SmallVector<APInt, 2> TLVec, TUVec; 1958 APInt TB = BM.sdiv(G); 1959 if (TB.sgt(0)) { 1960 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1961 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1962 if (SrcUMvalid) { 1963 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 1964 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1965 } 1966 } else { 1967 TUVec.push_back(floorOfQuotient(-TX, TB)); 1968 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1969 if (SrcUMvalid) { 1970 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 1971 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1972 } 1973 } 1974 1975 APInt TA = AM.sdiv(G); 1976 if (TA.sgt(0)) { 1977 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1978 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1979 if (DstUMvalid) { 1980 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 1981 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1982 } 1983 } else { 1984 TUVec.push_back(floorOfQuotient(-TY, TA)); 1985 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1986 if (DstUMvalid) { 1987 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 1988 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1989 } 1990 } 1991 1992 if (TLVec.empty() || TUVec.empty()) 1993 return false; 1994 1995 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1996 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1997 1998 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1999 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 2000 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 2001 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 2002 2003 if (TL.sgt(TU)) 2004 ++ExactRDIVindependence; 2005 return TL.sgt(TU); 2006 } 2007 2008 2009 // symbolicRDIVtest - 2010 // In Section 4.5 of the Practical Dependence Testing paper,the authors 2011 // introduce a special case of Banerjee's Inequalities (also called the 2012 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 2013 // particularly cases with symbolics. Since it's only able to disprove 2014 // dependence (not compute distances or directions), we'll use it as a 2015 // fall back for the other tests. 2016 // 2017 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2018 // where i and j are induction variables and c1 and c2 are loop invariants, 2019 // we can use the symbolic tests to disprove some dependences, serving as a 2020 // backup for the RDIV test. Note that i and j can be the same variable, 2021 // letting this test serve as a backup for the various SIV tests. 2022 // 2023 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2024 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2025 // loop bounds for the i and j loops, respectively. So, ... 2026 // 2027 // c1 + a1*i = c2 + a2*j 2028 // a1*i - a2*j = c2 - c1 2029 // 2030 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2031 // range of the maximum and minimum possible values of a1*i - a2*j. 2032 // Considering the signs of a1 and a2, we have 4 possible cases: 2033 // 2034 // 1) If a1 >= 0 and a2 >= 0, then 2035 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2036 // -a2*N2 <= c2 - c1 <= a1*N1 2037 // 2038 // 2) If a1 >= 0 and a2 <= 0, then 2039 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2040 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2041 // 2042 // 3) If a1 <= 0 and a2 >= 0, then 2043 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2044 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2045 // 2046 // 4) If a1 <= 0 and a2 <= 0, then 2047 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2048 // a1*N1 <= c2 - c1 <= -a2*N2 2049 // 2050 // return true if dependence disproved 2051 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2052 const SCEV *C1, const SCEV *C2, 2053 const Loop *Loop1, 2054 const Loop *Loop2) const { 2055 ++SymbolicRDIVapplications; 2056 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2057 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2058 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2059 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2060 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2061 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2062 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2063 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2064 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2065 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2066 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2067 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2068 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2069 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2070 if (SE->isKnownNonNegative(A1)) { 2071 if (SE->isKnownNonNegative(A2)) { 2072 // A1 >= 0 && A2 >= 0 2073 if (N1) { 2074 // make sure that c2 - c1 <= a1*N1 2075 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2076 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2077 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2078 ++SymbolicRDIVindependence; 2079 return true; 2080 } 2081 } 2082 if (N2) { 2083 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2084 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2085 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2086 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2087 ++SymbolicRDIVindependence; 2088 return true; 2089 } 2090 } 2091 } 2092 else if (SE->isKnownNonPositive(A2)) { 2093 // a1 >= 0 && a2 <= 0 2094 if (N1 && N2) { 2095 // make sure that c2 - c1 <= a1*N1 - a2*N2 2096 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2097 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2098 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2099 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2100 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2101 ++SymbolicRDIVindependence; 2102 return true; 2103 } 2104 } 2105 // make sure that 0 <= c2 - c1 2106 if (SE->isKnownNegative(C2_C1)) { 2107 ++SymbolicRDIVindependence; 2108 return true; 2109 } 2110 } 2111 } 2112 else if (SE->isKnownNonPositive(A1)) { 2113 if (SE->isKnownNonNegative(A2)) { 2114 // a1 <= 0 && a2 >= 0 2115 if (N1 && N2) { 2116 // make sure that a1*N1 - a2*N2 <= c2 - c1 2117 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2118 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2119 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2120 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2121 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2122 ++SymbolicRDIVindependence; 2123 return true; 2124 } 2125 } 2126 // make sure that c2 - c1 <= 0 2127 if (SE->isKnownPositive(C2_C1)) { 2128 ++SymbolicRDIVindependence; 2129 return true; 2130 } 2131 } 2132 else if (SE->isKnownNonPositive(A2)) { 2133 // a1 <= 0 && a2 <= 0 2134 if (N1) { 2135 // make sure that a1*N1 <= c2 - c1 2136 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2137 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2138 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2139 ++SymbolicRDIVindependence; 2140 return true; 2141 } 2142 } 2143 if (N2) { 2144 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2145 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2146 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2147 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2148 ++SymbolicRDIVindependence; 2149 return true; 2150 } 2151 } 2152 } 2153 } 2154 return false; 2155 } 2156 2157 2158 // testSIV - 2159 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2160 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2161 // a2 are constant, we attack it with an SIV test. While they can all be 2162 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2163 // they apply; they're cheaper and sometimes more precise. 2164 // 2165 // Return true if dependence disproved. 2166 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2167 FullDependence &Result, Constraint &NewConstraint, 2168 const SCEV *&SplitIter) const { 2169 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2170 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2171 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2172 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2173 if (SrcAddRec && DstAddRec) { 2174 const SCEV *SrcConst = SrcAddRec->getStart(); 2175 const SCEV *DstConst = DstAddRec->getStart(); 2176 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2177 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2178 const Loop *CurLoop = SrcAddRec->getLoop(); 2179 assert(CurLoop == DstAddRec->getLoop() && 2180 "both loops in SIV should be same"); 2181 Level = mapSrcLoop(CurLoop); 2182 bool disproven; 2183 if (SrcCoeff == DstCoeff) 2184 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2185 Level, Result, NewConstraint); 2186 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2187 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2188 Level, Result, NewConstraint, SplitIter); 2189 else 2190 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2191 Level, Result, NewConstraint); 2192 return disproven || 2193 gcdMIVtest(Src, Dst, Result) || 2194 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2195 } 2196 if (SrcAddRec) { 2197 const SCEV *SrcConst = SrcAddRec->getStart(); 2198 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2199 const SCEV *DstConst = Dst; 2200 const Loop *CurLoop = SrcAddRec->getLoop(); 2201 Level = mapSrcLoop(CurLoop); 2202 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2203 Level, Result, NewConstraint) || 2204 gcdMIVtest(Src, Dst, Result); 2205 } 2206 if (DstAddRec) { 2207 const SCEV *DstConst = DstAddRec->getStart(); 2208 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2209 const SCEV *SrcConst = Src; 2210 const Loop *CurLoop = DstAddRec->getLoop(); 2211 Level = mapDstLoop(CurLoop); 2212 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2213 CurLoop, Level, Result, NewConstraint) || 2214 gcdMIVtest(Src, Dst, Result); 2215 } 2216 llvm_unreachable("SIV test expected at least one AddRec"); 2217 return false; 2218 } 2219 2220 2221 // testRDIV - 2222 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2223 // where i and j are induction variables, c1 and c2 are loop invariant, 2224 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2225 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2226 // It doesn't make sense to talk about distance or direction in this case, 2227 // so there's no point in making special versions of the Strong SIV test or 2228 // the Weak-crossing SIV test. 2229 // 2230 // With minor algebra, this test can also be used for things like 2231 // [c1 + a1*i + a2*j][c2]. 2232 // 2233 // Return true if dependence disproved. 2234 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2235 FullDependence &Result) const { 2236 // we have 3 possible situations here: 2237 // 1) [a*i + b] and [c*j + d] 2238 // 2) [a*i + c*j + b] and [d] 2239 // 3) [b] and [a*i + c*j + d] 2240 // We need to find what we've got and get organized 2241 2242 const SCEV *SrcConst, *DstConst; 2243 const SCEV *SrcCoeff, *DstCoeff; 2244 const Loop *SrcLoop, *DstLoop; 2245 2246 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2247 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2248 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2249 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2250 if (SrcAddRec && DstAddRec) { 2251 SrcConst = SrcAddRec->getStart(); 2252 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2253 SrcLoop = SrcAddRec->getLoop(); 2254 DstConst = DstAddRec->getStart(); 2255 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2256 DstLoop = DstAddRec->getLoop(); 2257 } 2258 else if (SrcAddRec) { 2259 if (const SCEVAddRecExpr *tmpAddRec = 2260 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2261 SrcConst = tmpAddRec->getStart(); 2262 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2263 SrcLoop = tmpAddRec->getLoop(); 2264 DstConst = Dst; 2265 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2266 DstLoop = SrcAddRec->getLoop(); 2267 } 2268 else 2269 llvm_unreachable("RDIV reached by surprising SCEVs"); 2270 } 2271 else if (DstAddRec) { 2272 if (const SCEVAddRecExpr *tmpAddRec = 2273 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2274 DstConst = tmpAddRec->getStart(); 2275 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2276 DstLoop = tmpAddRec->getLoop(); 2277 SrcConst = Src; 2278 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2279 SrcLoop = DstAddRec->getLoop(); 2280 } 2281 else 2282 llvm_unreachable("RDIV reached by surprising SCEVs"); 2283 } 2284 else 2285 llvm_unreachable("RDIV expected at least one AddRec"); 2286 return exactRDIVtest(SrcCoeff, DstCoeff, 2287 SrcConst, DstConst, 2288 SrcLoop, DstLoop, 2289 Result) || 2290 gcdMIVtest(Src, Dst, Result) || 2291 symbolicRDIVtest(SrcCoeff, DstCoeff, 2292 SrcConst, DstConst, 2293 SrcLoop, DstLoop); 2294 } 2295 2296 2297 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2298 // Return true if dependence disproved. 2299 // Can sometimes refine direction vectors. 2300 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2301 const SmallBitVector &Loops, 2302 FullDependence &Result) const { 2303 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2304 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2305 Result.Consistent = false; 2306 return gcdMIVtest(Src, Dst, Result) || 2307 banerjeeMIVtest(Src, Dst, Loops, Result); 2308 } 2309 2310 2311 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2312 // in this case 10. If there is no constant part, returns NULL. 2313 static 2314 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2315 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2316 return Constant; 2317 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2318 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2319 return Constant; 2320 return nullptr; 2321 } 2322 2323 2324 //===----------------------------------------------------------------------===// 2325 // gcdMIVtest - 2326 // Tests an MIV subscript pair for dependence. 2327 // Returns true if any possible dependence is disproved. 2328 // Marks the result as inconsistent. 2329 // Can sometimes disprove the equal direction for 1 or more loops, 2330 // as discussed in Michael Wolfe's book, 2331 // High Performance Compilers for Parallel Computing, page 235. 2332 // 2333 // We spend some effort (code!) to handle cases like 2334 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2335 // but M and N are just loop-invariant variables. 2336 // This should help us handle linearized subscripts; 2337 // also makes this test a useful backup to the various SIV tests. 2338 // 2339 // It occurs to me that the presence of loop-invariant variables 2340 // changes the nature of the test from "greatest common divisor" 2341 // to "a common divisor". 2342 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2343 FullDependence &Result) const { 2344 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2345 ++GCDapplications; 2346 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2347 APInt RunningGCD = APInt::getZero(BitWidth); 2348 2349 // Examine Src coefficients. 2350 // Compute running GCD and record source constant. 2351 // Because we're looking for the constant at the end of the chain, 2352 // we can't quit the loop just because the GCD == 1. 2353 const SCEV *Coefficients = Src; 2354 while (const SCEVAddRecExpr *AddRec = 2355 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2356 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2357 // If the coefficient is the product of a constant and other stuff, 2358 // we can use the constant in the GCD computation. 2359 const auto *Constant = getConstantPart(Coeff); 2360 if (!Constant) 2361 return false; 2362 APInt ConstCoeff = Constant->getAPInt(); 2363 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2364 Coefficients = AddRec->getStart(); 2365 } 2366 const SCEV *SrcConst = Coefficients; 2367 2368 // Examine Dst coefficients. 2369 // Compute running GCD and record destination constant. 2370 // Because we're looking for the constant at the end of the chain, 2371 // we can't quit the loop just because the GCD == 1. 2372 Coefficients = Dst; 2373 while (const SCEVAddRecExpr *AddRec = 2374 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2375 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2376 // If the coefficient is the product of a constant and other stuff, 2377 // we can use the constant in the GCD computation. 2378 const auto *Constant = getConstantPart(Coeff); 2379 if (!Constant) 2380 return false; 2381 APInt ConstCoeff = Constant->getAPInt(); 2382 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2383 Coefficients = AddRec->getStart(); 2384 } 2385 const SCEV *DstConst = Coefficients; 2386 2387 APInt ExtraGCD = APInt::getZero(BitWidth); 2388 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2389 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2390 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2391 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2392 // If Delta is a sum of products, we may be able to make further progress. 2393 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2394 const SCEV *Operand = Sum->getOperand(Op); 2395 if (isa<SCEVConstant>(Operand)) { 2396 assert(!Constant && "Surprised to find multiple constants"); 2397 Constant = cast<SCEVConstant>(Operand); 2398 } 2399 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2400 // Search for constant operand to participate in GCD; 2401 // If none found; return false. 2402 const SCEVConstant *ConstOp = getConstantPart(Product); 2403 if (!ConstOp) 2404 return false; 2405 APInt ConstOpValue = ConstOp->getAPInt(); 2406 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2407 ConstOpValue.abs()); 2408 } 2409 else 2410 return false; 2411 } 2412 } 2413 if (!Constant) 2414 return false; 2415 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2416 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2417 if (ConstDelta == 0) 2418 return false; 2419 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2420 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2421 APInt Remainder = ConstDelta.srem(RunningGCD); 2422 if (Remainder != 0) { 2423 ++GCDindependence; 2424 return true; 2425 } 2426 2427 // Try to disprove equal directions. 2428 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2429 // the code above can't disprove the dependence because the GCD = 1. 2430 // So we consider what happen if i = i' and what happens if j = j'. 2431 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2432 // which is infeasible, so we can disallow the = direction for the i level. 2433 // Setting j = j' doesn't help matters, so we end up with a direction vector 2434 // of [<>, *] 2435 // 2436 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2437 // we need to remember that the constant part is 5 and the RunningGCD should 2438 // be initialized to ExtraGCD = 30. 2439 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2440 2441 bool Improved = false; 2442 Coefficients = Src; 2443 while (const SCEVAddRecExpr *AddRec = 2444 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2445 Coefficients = AddRec->getStart(); 2446 const Loop *CurLoop = AddRec->getLoop(); 2447 RunningGCD = ExtraGCD; 2448 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2449 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2450 const SCEV *Inner = Src; 2451 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2452 AddRec = cast<SCEVAddRecExpr>(Inner); 2453 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2454 if (CurLoop == AddRec->getLoop()) 2455 ; // SrcCoeff == Coeff 2456 else { 2457 // If the coefficient is the product of a constant and other stuff, 2458 // we can use the constant in the GCD computation. 2459 Constant = getConstantPart(Coeff); 2460 if (!Constant) 2461 return false; 2462 APInt ConstCoeff = Constant->getAPInt(); 2463 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2464 } 2465 Inner = AddRec->getStart(); 2466 } 2467 Inner = Dst; 2468 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2469 AddRec = cast<SCEVAddRecExpr>(Inner); 2470 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2471 if (CurLoop == AddRec->getLoop()) 2472 DstCoeff = Coeff; 2473 else { 2474 // If the coefficient is the product of a constant and other stuff, 2475 // we can use the constant in the GCD computation. 2476 Constant = getConstantPart(Coeff); 2477 if (!Constant) 2478 return false; 2479 APInt ConstCoeff = Constant->getAPInt(); 2480 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2481 } 2482 Inner = AddRec->getStart(); 2483 } 2484 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2485 // If the coefficient is the product of a constant and other stuff, 2486 // we can use the constant in the GCD computation. 2487 Constant = getConstantPart(Delta); 2488 if (!Constant) 2489 // The difference of the two coefficients might not be a product 2490 // or constant, in which case we give up on this direction. 2491 continue; 2492 APInt ConstCoeff = Constant->getAPInt(); 2493 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2494 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2495 if (RunningGCD != 0) { 2496 Remainder = ConstDelta.srem(RunningGCD); 2497 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2498 if (Remainder != 0) { 2499 unsigned Level = mapSrcLoop(CurLoop); 2500 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2501 Improved = true; 2502 } 2503 } 2504 } 2505 if (Improved) 2506 ++GCDsuccesses; 2507 LLVM_DEBUG(dbgs() << "all done\n"); 2508 return false; 2509 } 2510 2511 2512 //===----------------------------------------------------------------------===// 2513 // banerjeeMIVtest - 2514 // Use Banerjee's Inequalities to test an MIV subscript pair. 2515 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2516 // Generally follows the discussion in Section 2.5.2 of 2517 // 2518 // Optimizing Supercompilers for Supercomputers 2519 // Michael Wolfe 2520 // 2521 // The inequalities given on page 25 are simplified in that loops are 2522 // normalized so that the lower bound is always 0 and the stride is always 1. 2523 // For example, Wolfe gives 2524 // 2525 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2526 // 2527 // where A_k is the coefficient of the kth index in the source subscript, 2528 // B_k is the coefficient of the kth index in the destination subscript, 2529 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2530 // index, and N_k is the stride of the kth index. Since all loops are normalized 2531 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2532 // equation to 2533 // 2534 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2535 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2536 // 2537 // Similar simplifications are possible for the other equations. 2538 // 2539 // When we can't determine the number of iterations for a loop, 2540 // we use NULL as an indicator for the worst case, infinity. 2541 // When computing the upper bound, NULL denotes +inf; 2542 // for the lower bound, NULL denotes -inf. 2543 // 2544 // Return true if dependence disproved. 2545 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2546 const SmallBitVector &Loops, 2547 FullDependence &Result) const { 2548 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2549 ++BanerjeeApplications; 2550 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2551 const SCEV *A0; 2552 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2553 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2554 const SCEV *B0; 2555 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2556 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2557 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2558 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2559 2560 // Compute bounds for all the * directions. 2561 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2562 for (unsigned K = 1; K <= MaxLevels; ++K) { 2563 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2564 Bound[K].Direction = Dependence::DVEntry::ALL; 2565 Bound[K].DirSet = Dependence::DVEntry::NONE; 2566 findBoundsALL(A, B, Bound, K); 2567 #ifndef NDEBUG 2568 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2569 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2570 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2571 else 2572 LLVM_DEBUG(dbgs() << "-inf\t"); 2573 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2574 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2575 else 2576 LLVM_DEBUG(dbgs() << "+inf\n"); 2577 #endif 2578 } 2579 2580 // Test the *, *, *, ... case. 2581 bool Disproved = false; 2582 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2583 // Explore the direction vector hierarchy. 2584 unsigned DepthExpanded = 0; 2585 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2586 Loops, DepthExpanded, Delta); 2587 if (NewDeps > 0) { 2588 bool Improved = false; 2589 for (unsigned K = 1; K <= CommonLevels; ++K) { 2590 if (Loops[K]) { 2591 unsigned Old = Result.DV[K - 1].Direction; 2592 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2593 Improved |= Old != Result.DV[K - 1].Direction; 2594 if (!Result.DV[K - 1].Direction) { 2595 Improved = false; 2596 Disproved = true; 2597 break; 2598 } 2599 } 2600 } 2601 if (Improved) 2602 ++BanerjeeSuccesses; 2603 } 2604 else { 2605 ++BanerjeeIndependence; 2606 Disproved = true; 2607 } 2608 } 2609 else { 2610 ++BanerjeeIndependence; 2611 Disproved = true; 2612 } 2613 delete [] Bound; 2614 delete [] A; 2615 delete [] B; 2616 return Disproved; 2617 } 2618 2619 2620 // Hierarchically expands the direction vector 2621 // search space, combining the directions of discovered dependences 2622 // in the DirSet field of Bound. Returns the number of distinct 2623 // dependences discovered. If the dependence is disproved, 2624 // it will return 0. 2625 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2626 CoefficientInfo *B, BoundInfo *Bound, 2627 const SmallBitVector &Loops, 2628 unsigned &DepthExpanded, 2629 const SCEV *Delta) const { 2630 // This algorithm has worst case complexity of O(3^n), where 'n' is the number 2631 // of common loop levels. To avoid excessive compile-time, pessimize all the 2632 // results and immediately return when the number of common levels is beyond 2633 // the given threshold. 2634 if (CommonLevels > MIVMaxLevelThreshold) { 2635 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV " 2636 "direction exploration is terminated.\n"); 2637 for (unsigned K = 1; K <= CommonLevels; ++K) 2638 if (Loops[K]) 2639 Bound[K].DirSet = Dependence::DVEntry::ALL; 2640 return 1; 2641 } 2642 2643 if (Level > CommonLevels) { 2644 // record result 2645 LLVM_DEBUG(dbgs() << "\t["); 2646 for (unsigned K = 1; K <= CommonLevels; ++K) { 2647 if (Loops[K]) { 2648 Bound[K].DirSet |= Bound[K].Direction; 2649 #ifndef NDEBUG 2650 switch (Bound[K].Direction) { 2651 case Dependence::DVEntry::LT: 2652 LLVM_DEBUG(dbgs() << " <"); 2653 break; 2654 case Dependence::DVEntry::EQ: 2655 LLVM_DEBUG(dbgs() << " ="); 2656 break; 2657 case Dependence::DVEntry::GT: 2658 LLVM_DEBUG(dbgs() << " >"); 2659 break; 2660 case Dependence::DVEntry::ALL: 2661 LLVM_DEBUG(dbgs() << " *"); 2662 break; 2663 default: 2664 llvm_unreachable("unexpected Bound[K].Direction"); 2665 } 2666 #endif 2667 } 2668 } 2669 LLVM_DEBUG(dbgs() << " ]\n"); 2670 return 1; 2671 } 2672 if (Loops[Level]) { 2673 if (Level > DepthExpanded) { 2674 DepthExpanded = Level; 2675 // compute bounds for <, =, > at current level 2676 findBoundsLT(A, B, Bound, Level); 2677 findBoundsGT(A, B, Bound, Level); 2678 findBoundsEQ(A, B, Bound, Level); 2679 #ifndef NDEBUG 2680 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2681 LLVM_DEBUG(dbgs() << "\t <\t"); 2682 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2683 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2684 << '\t'); 2685 else 2686 LLVM_DEBUG(dbgs() << "-inf\t"); 2687 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2688 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2689 << '\n'); 2690 else 2691 LLVM_DEBUG(dbgs() << "+inf\n"); 2692 LLVM_DEBUG(dbgs() << "\t =\t"); 2693 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2694 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2695 << '\t'); 2696 else 2697 LLVM_DEBUG(dbgs() << "-inf\t"); 2698 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2699 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2700 << '\n'); 2701 else 2702 LLVM_DEBUG(dbgs() << "+inf\n"); 2703 LLVM_DEBUG(dbgs() << "\t >\t"); 2704 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2705 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2706 << '\t'); 2707 else 2708 LLVM_DEBUG(dbgs() << "-inf\t"); 2709 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2710 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2711 << '\n'); 2712 else 2713 LLVM_DEBUG(dbgs() << "+inf\n"); 2714 #endif 2715 } 2716 2717 unsigned NewDeps = 0; 2718 2719 // test bounds for <, *, *, ... 2720 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2721 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2722 Loops, DepthExpanded, Delta); 2723 2724 // Test bounds for =, *, *, ... 2725 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2726 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2727 Loops, DepthExpanded, Delta); 2728 2729 // test bounds for >, *, *, ... 2730 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2731 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2732 Loops, DepthExpanded, Delta); 2733 2734 Bound[Level].Direction = Dependence::DVEntry::ALL; 2735 return NewDeps; 2736 } 2737 else 2738 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2739 } 2740 2741 2742 // Returns true iff the current bounds are plausible. 2743 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2744 BoundInfo *Bound, const SCEV *Delta) const { 2745 Bound[Level].Direction = DirKind; 2746 if (const SCEV *LowerBound = getLowerBound(Bound)) 2747 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2748 return false; 2749 if (const SCEV *UpperBound = getUpperBound(Bound)) 2750 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2751 return false; 2752 return true; 2753 } 2754 2755 2756 // Computes the upper and lower bounds for level K 2757 // using the * direction. Records them in Bound. 2758 // Wolfe gives the equations 2759 // 2760 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2761 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2762 // 2763 // Since we normalize loops, we can simplify these equations to 2764 // 2765 // LB^*_k = (A^-_k - B^+_k)U_k 2766 // UB^*_k = (A^+_k - B^-_k)U_k 2767 // 2768 // We must be careful to handle the case where the upper bound is unknown. 2769 // Note that the lower bound is always <= 0 2770 // and the upper bound is always >= 0. 2771 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2772 BoundInfo *Bound, unsigned K) const { 2773 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2774 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2775 if (Bound[K].Iterations) { 2776 Bound[K].Lower[Dependence::DVEntry::ALL] = 2777 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2778 Bound[K].Iterations); 2779 Bound[K].Upper[Dependence::DVEntry::ALL] = 2780 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2781 Bound[K].Iterations); 2782 } 2783 else { 2784 // If the difference is 0, we won't need to know the number of iterations. 2785 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2786 Bound[K].Lower[Dependence::DVEntry::ALL] = 2787 SE->getZero(A[K].Coeff->getType()); 2788 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2789 Bound[K].Upper[Dependence::DVEntry::ALL] = 2790 SE->getZero(A[K].Coeff->getType()); 2791 } 2792 } 2793 2794 2795 // Computes the upper and lower bounds for level K 2796 // using the = direction. Records them in Bound. 2797 // Wolfe gives the equations 2798 // 2799 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2800 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2801 // 2802 // Since we normalize loops, we can simplify these equations to 2803 // 2804 // LB^=_k = (A_k - B_k)^- U_k 2805 // UB^=_k = (A_k - B_k)^+ U_k 2806 // 2807 // We must be careful to handle the case where the upper bound is unknown. 2808 // Note that the lower bound is always <= 0 2809 // and the upper bound is always >= 0. 2810 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2811 BoundInfo *Bound, unsigned K) const { 2812 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2813 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2814 if (Bound[K].Iterations) { 2815 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2816 const SCEV *NegativePart = getNegativePart(Delta); 2817 Bound[K].Lower[Dependence::DVEntry::EQ] = 2818 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2819 const SCEV *PositivePart = getPositivePart(Delta); 2820 Bound[K].Upper[Dependence::DVEntry::EQ] = 2821 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2822 } 2823 else { 2824 // If the positive/negative part of the difference is 0, 2825 // we won't need to know the number of iterations. 2826 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2827 const SCEV *NegativePart = getNegativePart(Delta); 2828 if (NegativePart->isZero()) 2829 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2830 const SCEV *PositivePart = getPositivePart(Delta); 2831 if (PositivePart->isZero()) 2832 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2833 } 2834 } 2835 2836 2837 // Computes the upper and lower bounds for level K 2838 // using the < direction. Records them in Bound. 2839 // Wolfe gives the equations 2840 // 2841 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2842 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2843 // 2844 // Since we normalize loops, we can simplify these equations to 2845 // 2846 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2847 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2848 // 2849 // We must be careful to handle the case where the upper bound is unknown. 2850 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2851 BoundInfo *Bound, unsigned K) const { 2852 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2853 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2854 if (Bound[K].Iterations) { 2855 const SCEV *Iter_1 = SE->getMinusSCEV( 2856 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2857 const SCEV *NegPart = 2858 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2859 Bound[K].Lower[Dependence::DVEntry::LT] = 2860 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2861 const SCEV *PosPart = 2862 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2863 Bound[K].Upper[Dependence::DVEntry::LT] = 2864 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2865 } 2866 else { 2867 // If the positive/negative part of the difference is 0, 2868 // we won't need to know the number of iterations. 2869 const SCEV *NegPart = 2870 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2871 if (NegPart->isZero()) 2872 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2873 const SCEV *PosPart = 2874 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2875 if (PosPart->isZero()) 2876 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2877 } 2878 } 2879 2880 2881 // Computes the upper and lower bounds for level K 2882 // using the > direction. Records them in Bound. 2883 // Wolfe gives the equations 2884 // 2885 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2886 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2887 // 2888 // Since we normalize loops, we can simplify these equations to 2889 // 2890 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2891 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2892 // 2893 // We must be careful to handle the case where the upper bound is unknown. 2894 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2895 BoundInfo *Bound, unsigned K) const { 2896 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2897 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2898 if (Bound[K].Iterations) { 2899 const SCEV *Iter_1 = SE->getMinusSCEV( 2900 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2901 const SCEV *NegPart = 2902 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2903 Bound[K].Lower[Dependence::DVEntry::GT] = 2904 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2905 const SCEV *PosPart = 2906 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2907 Bound[K].Upper[Dependence::DVEntry::GT] = 2908 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2909 } 2910 else { 2911 // If the positive/negative part of the difference is 0, 2912 // we won't need to know the number of iterations. 2913 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2914 if (NegPart->isZero()) 2915 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2916 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2917 if (PosPart->isZero()) 2918 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2919 } 2920 } 2921 2922 2923 // X^+ = max(X, 0) 2924 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2925 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2926 } 2927 2928 2929 // X^- = min(X, 0) 2930 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2931 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2932 } 2933 2934 2935 // Walks through the subscript, 2936 // collecting each coefficient, the associated loop bounds, 2937 // and recording its positive and negative parts for later use. 2938 DependenceInfo::CoefficientInfo * 2939 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2940 const SCEV *&Constant) const { 2941 const SCEV *Zero = SE->getZero(Subscript->getType()); 2942 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2943 for (unsigned K = 1; K <= MaxLevels; ++K) { 2944 CI[K].Coeff = Zero; 2945 CI[K].PosPart = Zero; 2946 CI[K].NegPart = Zero; 2947 CI[K].Iterations = nullptr; 2948 } 2949 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2950 const Loop *L = AddRec->getLoop(); 2951 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2952 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2953 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2954 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2955 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2956 Subscript = AddRec->getStart(); 2957 } 2958 Constant = Subscript; 2959 #ifndef NDEBUG 2960 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2961 for (unsigned K = 1; K <= MaxLevels; ++K) { 2962 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2963 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2964 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2965 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2966 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2967 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2968 if (CI[K].Iterations) 2969 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2970 else 2971 LLVM_DEBUG(dbgs() << "+inf"); 2972 LLVM_DEBUG(dbgs() << '\n'); 2973 } 2974 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2975 #endif 2976 return CI; 2977 } 2978 2979 2980 // Looks through all the bounds info and 2981 // computes the lower bound given the current direction settings 2982 // at each level. If the lower bound for any level is -inf, 2983 // the result is -inf. 2984 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2985 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2986 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2987 if (Bound[K].Lower[Bound[K].Direction]) 2988 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2989 else 2990 Sum = nullptr; 2991 } 2992 return Sum; 2993 } 2994 2995 2996 // Looks through all the bounds info and 2997 // computes the upper bound given the current direction settings 2998 // at each level. If the upper bound at any level is +inf, 2999 // the result is +inf. 3000 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 3001 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 3002 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 3003 if (Bound[K].Upper[Bound[K].Direction]) 3004 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 3005 else 3006 Sum = nullptr; 3007 } 3008 return Sum; 3009 } 3010 3011 3012 //===----------------------------------------------------------------------===// 3013 // Constraint manipulation for Delta test. 3014 3015 // Given a linear SCEV, 3016 // return the coefficient (the step) 3017 // corresponding to the specified loop. 3018 // If there isn't one, return 0. 3019 // For example, given a*i + b*j + c*k, finding the coefficient 3020 // corresponding to the j loop would yield b. 3021 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3022 const Loop *TargetLoop) const { 3023 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3024 if (!AddRec) 3025 return SE->getZero(Expr->getType()); 3026 if (AddRec->getLoop() == TargetLoop) 3027 return AddRec->getStepRecurrence(*SE); 3028 return findCoefficient(AddRec->getStart(), TargetLoop); 3029 } 3030 3031 3032 // Given a linear SCEV, 3033 // return the SCEV given by zeroing out the coefficient 3034 // corresponding to the specified loop. 3035 // For example, given a*i + b*j + c*k, zeroing the coefficient 3036 // corresponding to the j loop would yield a*i + c*k. 3037 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3038 const Loop *TargetLoop) const { 3039 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3040 if (!AddRec) 3041 return Expr; // ignore 3042 if (AddRec->getLoop() == TargetLoop) 3043 return AddRec->getStart(); 3044 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3045 AddRec->getStepRecurrence(*SE), 3046 AddRec->getLoop(), 3047 AddRec->getNoWrapFlags()); 3048 } 3049 3050 3051 // Given a linear SCEV Expr, 3052 // return the SCEV given by adding some Value to the 3053 // coefficient corresponding to the specified TargetLoop. 3054 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3055 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3056 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3057 const Loop *TargetLoop, 3058 const SCEV *Value) const { 3059 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3060 if (!AddRec) // create a new addRec 3061 return SE->getAddRecExpr(Expr, 3062 Value, 3063 TargetLoop, 3064 SCEV::FlagAnyWrap); // Worst case, with no info. 3065 if (AddRec->getLoop() == TargetLoop) { 3066 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3067 if (Sum->isZero()) 3068 return AddRec->getStart(); 3069 return SE->getAddRecExpr(AddRec->getStart(), 3070 Sum, 3071 AddRec->getLoop(), 3072 AddRec->getNoWrapFlags()); 3073 } 3074 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3075 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3076 return SE->getAddRecExpr( 3077 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3078 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3079 AddRec->getNoWrapFlags()); 3080 } 3081 3082 3083 // Review the constraints, looking for opportunities 3084 // to simplify a subscript pair (Src and Dst). 3085 // Return true if some simplification occurs. 3086 // If the simplification isn't exact (that is, if it is conservative 3087 // in terms of dependence), set consistent to false. 3088 // Corresponds to Figure 5 from the paper 3089 // 3090 // Practical Dependence Testing 3091 // Goff, Kennedy, Tseng 3092 // PLDI 1991 3093 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3094 SmallBitVector &Loops, 3095 SmallVectorImpl<Constraint> &Constraints, 3096 bool &Consistent) { 3097 bool Result = false; 3098 for (unsigned LI : Loops.set_bits()) { 3099 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3100 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3101 if (Constraints[LI].isDistance()) 3102 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3103 else if (Constraints[LI].isLine()) 3104 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3105 else if (Constraints[LI].isPoint()) 3106 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3107 } 3108 return Result; 3109 } 3110 3111 3112 // Attempt to propagate a distance 3113 // constraint into a subscript pair (Src and Dst). 3114 // Return true if some simplification occurs. 3115 // If the simplification isn't exact (that is, if it is conservative 3116 // in terms of dependence), set consistent to false. 3117 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3118 Constraint &CurConstraint, 3119 bool &Consistent) { 3120 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3121 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3122 const SCEV *A_K = findCoefficient(Src, CurLoop); 3123 if (A_K->isZero()) 3124 return false; 3125 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3126 Src = SE->getMinusSCEV(Src, DA_K); 3127 Src = zeroCoefficient(Src, CurLoop); 3128 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3129 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3130 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3131 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3132 if (!findCoefficient(Dst, CurLoop)->isZero()) 3133 Consistent = false; 3134 return true; 3135 } 3136 3137 3138 // Attempt to propagate a line 3139 // constraint into a subscript pair (Src and Dst). 3140 // Return true if some simplification occurs. 3141 // If the simplification isn't exact (that is, if it is conservative 3142 // in terms of dependence), set consistent to false. 3143 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3144 Constraint &CurConstraint, 3145 bool &Consistent) { 3146 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3147 const SCEV *A = CurConstraint.getA(); 3148 const SCEV *B = CurConstraint.getB(); 3149 const SCEV *C = CurConstraint.getC(); 3150 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3151 << "\n"); 3152 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3153 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3154 if (A->isZero()) { 3155 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3156 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3157 if (!Bconst || !Cconst) return false; 3158 APInt Beta = Bconst->getAPInt(); 3159 APInt Charlie = Cconst->getAPInt(); 3160 APInt CdivB = Charlie.sdiv(Beta); 3161 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3162 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3163 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3164 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3165 Dst = zeroCoefficient(Dst, CurLoop); 3166 if (!findCoefficient(Src, CurLoop)->isZero()) 3167 Consistent = false; 3168 } 3169 else if (B->isZero()) { 3170 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3171 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3172 if (!Aconst || !Cconst) return false; 3173 APInt Alpha = Aconst->getAPInt(); 3174 APInt Charlie = Cconst->getAPInt(); 3175 APInt CdivA = Charlie.sdiv(Alpha); 3176 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3177 const SCEV *A_K = findCoefficient(Src, CurLoop); 3178 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3179 Src = zeroCoefficient(Src, CurLoop); 3180 if (!findCoefficient(Dst, CurLoop)->isZero()) 3181 Consistent = false; 3182 } 3183 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3184 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3185 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3186 if (!Aconst || !Cconst) return false; 3187 APInt Alpha = Aconst->getAPInt(); 3188 APInt Charlie = Cconst->getAPInt(); 3189 APInt CdivA = Charlie.sdiv(Alpha); 3190 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3191 const SCEV *A_K = findCoefficient(Src, CurLoop); 3192 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3193 Src = zeroCoefficient(Src, CurLoop); 3194 Dst = addToCoefficient(Dst, CurLoop, A_K); 3195 if (!findCoefficient(Dst, CurLoop)->isZero()) 3196 Consistent = false; 3197 } 3198 else { 3199 // paper is incorrect here, or perhaps just misleading 3200 const SCEV *A_K = findCoefficient(Src, CurLoop); 3201 Src = SE->getMulExpr(Src, A); 3202 Dst = SE->getMulExpr(Dst, A); 3203 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3204 Src = zeroCoefficient(Src, CurLoop); 3205 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3206 if (!findCoefficient(Dst, CurLoop)->isZero()) 3207 Consistent = false; 3208 } 3209 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3210 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3211 return true; 3212 } 3213 3214 3215 // Attempt to propagate a point 3216 // constraint into a subscript pair (Src and Dst). 3217 // Return true if some simplification occurs. 3218 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3219 Constraint &CurConstraint) { 3220 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3221 const SCEV *A_K = findCoefficient(Src, CurLoop); 3222 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3223 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3224 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3225 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3226 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3227 Src = zeroCoefficient(Src, CurLoop); 3228 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3229 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3230 Dst = zeroCoefficient(Dst, CurLoop); 3231 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3232 return true; 3233 } 3234 3235 3236 // Update direction vector entry based on the current constraint. 3237 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3238 const Constraint &CurConstraint) const { 3239 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3240 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3241 if (CurConstraint.isAny()) 3242 ; // use defaults 3243 else if (CurConstraint.isDistance()) { 3244 // this one is consistent, the others aren't 3245 Level.Scalar = false; 3246 Level.Distance = CurConstraint.getD(); 3247 unsigned NewDirection = Dependence::DVEntry::NONE; 3248 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3249 NewDirection = Dependence::DVEntry::EQ; 3250 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3251 NewDirection |= Dependence::DVEntry::LT; 3252 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3253 NewDirection |= Dependence::DVEntry::GT; 3254 Level.Direction &= NewDirection; 3255 } 3256 else if (CurConstraint.isLine()) { 3257 Level.Scalar = false; 3258 Level.Distance = nullptr; 3259 // direction should be accurate 3260 } 3261 else if (CurConstraint.isPoint()) { 3262 Level.Scalar = false; 3263 Level.Distance = nullptr; 3264 unsigned NewDirection = Dependence::DVEntry::NONE; 3265 if (!isKnownPredicate(CmpInst::ICMP_NE, 3266 CurConstraint.getY(), 3267 CurConstraint.getX())) 3268 // if X may be = Y 3269 NewDirection |= Dependence::DVEntry::EQ; 3270 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3271 CurConstraint.getY(), 3272 CurConstraint.getX())) 3273 // if Y may be > X 3274 NewDirection |= Dependence::DVEntry::LT; 3275 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3276 CurConstraint.getY(), 3277 CurConstraint.getX())) 3278 // if Y may be < X 3279 NewDirection |= Dependence::DVEntry::GT; 3280 Level.Direction &= NewDirection; 3281 } 3282 else 3283 llvm_unreachable("constraint has unexpected kind"); 3284 } 3285 3286 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3287 /// source and destination array references are recurrences on a nested loop, 3288 /// this function flattens the nested recurrences into separate recurrences 3289 /// for each loop level. 3290 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3291 SmallVectorImpl<Subscript> &Pair) { 3292 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3293 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3294 Value *SrcPtr = getLoadStorePointerOperand(Src); 3295 Value *DstPtr = getLoadStorePointerOperand(Dst); 3296 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3297 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3298 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3299 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3300 const SCEVUnknown *SrcBase = 3301 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3302 const SCEVUnknown *DstBase = 3303 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3304 3305 if (!SrcBase || !DstBase || SrcBase != DstBase) 3306 return false; 3307 3308 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3309 3310 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3311 SrcSubscripts, DstSubscripts) && 3312 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3313 SrcSubscripts, DstSubscripts)) 3314 return false; 3315 3316 int Size = SrcSubscripts.size(); 3317 LLVM_DEBUG({ 3318 dbgs() << "\nSrcSubscripts: "; 3319 for (int I = 0; I < Size; I++) 3320 dbgs() << *SrcSubscripts[I]; 3321 dbgs() << "\nDstSubscripts: "; 3322 for (int I = 0; I < Size; I++) 3323 dbgs() << *DstSubscripts[I]; 3324 }); 3325 3326 // The delinearization transforms a single-subscript MIV dependence test into 3327 // a multi-subscript SIV dependence test that is easier to compute. So we 3328 // resize Pair to contain as many pairs of subscripts as the delinearization 3329 // has found, and then initialize the pairs following the delinearization. 3330 Pair.resize(Size); 3331 for (int I = 0; I < Size; ++I) { 3332 Pair[I].Src = SrcSubscripts[I]; 3333 Pair[I].Dst = DstSubscripts[I]; 3334 unifySubscriptType(&Pair[I]); 3335 } 3336 3337 return true; 3338 } 3339 3340 bool DependenceInfo::tryDelinearizeFixedSize( 3341 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3342 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3343 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3344 3345 Value *SrcPtr = getLoadStorePointerOperand(Src); 3346 Value *DstPtr = getLoadStorePointerOperand(Dst); 3347 const SCEVUnknown *SrcBase = 3348 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3349 const SCEVUnknown *DstBase = 3350 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3351 assert(SrcBase && DstBase && SrcBase == DstBase && 3352 "expected src and dst scev unknowns to be equal"); 3353 3354 // Check the simple case where the array dimensions are fixed size. 3355 auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr); 3356 auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr); 3357 if (!SrcGEP || !DstGEP) 3358 return false; 3359 3360 SmallVector<int, 4> SrcSizes, DstSizes; 3361 getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes); 3362 getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes); 3363 3364 // Check that the two size arrays are non-empty and equal in length and 3365 // value. 3366 if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || 3367 SrcSizes.size() != DstSizes.size() || 3368 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3369 SrcSubscripts.clear(); 3370 DstSubscripts.clear(); 3371 return false; 3372 } 3373 3374 Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts(); 3375 Value *DstBasePtr = DstGEP->getOperand(0)->stripPointerCasts(); 3376 3377 // Check that for identical base pointers we do not miss index offsets 3378 // that have been added before this GEP is applied. 3379 if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) { 3380 SrcSubscripts.clear(); 3381 DstSubscripts.clear(); 3382 return false; 3383 } 3384 3385 assert(SrcSubscripts.size() == DstSubscripts.size() && 3386 SrcSubscripts.size() == SrcSizes.size() + 1 && 3387 "Expected equal number of entries in the list of sizes and " 3388 "subscripts."); 3389 3390 // In general we cannot safely assume that the subscripts recovered from GEPs 3391 // are in the range of values defined for their corresponding array 3392 // dimensions. For example some C language usage/interpretation make it 3393 // impossible to verify this at compile-time. As such we can only delinearize 3394 // iff the subscripts are positive and are less than the range of the 3395 // dimension. 3396 if (!DisableDelinearizationChecks) { 3397 auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3398 SmallVectorImpl<const SCEV *> &Subscripts, 3399 Value *Ptr) { 3400 size_t SSize = Subscripts.size(); 3401 for (size_t I = 1; I < SSize; ++I) { 3402 const SCEV *S = Subscripts[I]; 3403 if (!isKnownNonNegative(S, Ptr)) 3404 return false; 3405 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3406 const SCEV *Range = SE->getConstant( 3407 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3408 if (!isKnownLessThan(S, Range)) 3409 return false; 3410 } 3411 } 3412 return true; 3413 }; 3414 3415 if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3416 !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { 3417 SrcSubscripts.clear(); 3418 DstSubscripts.clear(); 3419 return false; 3420 } 3421 } 3422 LLVM_DEBUG({ 3423 dbgs() << "Delinearized subscripts of fixed-size array\n" 3424 << "SrcGEP:" << *SrcGEP << "\n" 3425 << "DstGEP:" << *DstGEP << "\n"; 3426 }); 3427 return true; 3428 } 3429 3430 bool DependenceInfo::tryDelinearizeParametricSize( 3431 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3432 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3433 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3434 3435 Value *SrcPtr = getLoadStorePointerOperand(Src); 3436 Value *DstPtr = getLoadStorePointerOperand(Dst); 3437 const SCEVUnknown *SrcBase = 3438 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3439 const SCEVUnknown *DstBase = 3440 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3441 assert(SrcBase && DstBase && SrcBase == DstBase && 3442 "expected src and dst scev unknowns to be equal"); 3443 3444 const SCEV *ElementSize = SE->getElementSize(Src); 3445 if (ElementSize != SE->getElementSize(Dst)) 3446 return false; 3447 3448 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3449 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3450 3451 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3452 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3453 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3454 return false; 3455 3456 // First step: collect parametric terms in both array references. 3457 SmallVector<const SCEV *, 4> Terms; 3458 collectParametricTerms(*SE, SrcAR, Terms); 3459 collectParametricTerms(*SE, DstAR, Terms); 3460 3461 // Second step: find subscript sizes. 3462 SmallVector<const SCEV *, 4> Sizes; 3463 findArrayDimensions(*SE, Terms, Sizes, ElementSize); 3464 3465 // Third step: compute the access functions for each subscript. 3466 computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); 3467 computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); 3468 3469 // Fail when there is only a subscript: that's a linearized access function. 3470 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3471 SrcSubscripts.size() != DstSubscripts.size()) 3472 return false; 3473 3474 size_t Size = SrcSubscripts.size(); 3475 3476 // Statically check that the array bounds are in-range. The first subscript we 3477 // don't have a size for and it cannot overflow into another subscript, so is 3478 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3479 // and dst. 3480 // FIXME: It may be better to record these sizes and add them as constraints 3481 // to the dependency checks. 3482 if (!DisableDelinearizationChecks) 3483 for (size_t I = 1; I < Size; ++I) { 3484 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3485 return false; 3486 3487 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3488 return false; 3489 3490 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3491 return false; 3492 3493 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3494 return false; 3495 } 3496 3497 return true; 3498 } 3499 3500 //===----------------------------------------------------------------------===// 3501 3502 #ifndef NDEBUG 3503 // For debugging purposes, dump a small bit vector to dbgs(). 3504 static void dumpSmallBitVector(SmallBitVector &BV) { 3505 dbgs() << "{"; 3506 for (unsigned VI : BV.set_bits()) { 3507 dbgs() << VI; 3508 if (BV.find_next(VI) >= 0) 3509 dbgs() << ' '; 3510 } 3511 dbgs() << "}\n"; 3512 } 3513 #endif 3514 3515 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3516 FunctionAnalysisManager::Invalidator &Inv) { 3517 // Check if the analysis itself has been invalidated. 3518 auto PAC = PA.getChecker<DependenceAnalysis>(); 3519 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3520 return true; 3521 3522 // Check transitive dependencies. 3523 return Inv.invalidate<AAManager>(F, PA) || 3524 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3525 Inv.invalidate<LoopAnalysis>(F, PA); 3526 } 3527 3528 // depends - 3529 // Returns NULL if there is no dependence. 3530 // Otherwise, return a Dependence with as many details as possible. 3531 // Corresponds to Section 3.1 in the paper 3532 // 3533 // Practical Dependence Testing 3534 // Goff, Kennedy, Tseng 3535 // PLDI 1991 3536 // 3537 // Care is required to keep the routine below, getSplitIteration(), 3538 // up to date with respect to this routine. 3539 std::unique_ptr<Dependence> 3540 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3541 bool PossiblyLoopIndependent) { 3542 if (Src == Dst) 3543 PossiblyLoopIndependent = false; 3544 3545 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3546 // if both instructions don't reference memory, there's no dependence 3547 return nullptr; 3548 3549 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3550 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3551 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3552 return std::make_unique<Dependence>(Src, Dst); 3553 } 3554 3555 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3556 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3557 Value *SrcPtr = getLoadStorePointerOperand(Src); 3558 Value *DstPtr = getLoadStorePointerOperand(Dst); 3559 3560 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3561 MemoryLocation::get(Dst), 3562 MemoryLocation::get(Src))) { 3563 case AliasResult::MayAlias: 3564 case AliasResult::PartialAlias: 3565 // cannot analyse objects if we don't understand their aliasing. 3566 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3567 return std::make_unique<Dependence>(Src, Dst); 3568 case AliasResult::NoAlias: 3569 // If the objects noalias, they are distinct, accesses are independent. 3570 LLVM_DEBUG(dbgs() << "no alias\n"); 3571 return nullptr; 3572 case AliasResult::MustAlias: 3573 break; // The underlying objects alias; test accesses for dependence. 3574 } 3575 3576 // establish loop nesting levels 3577 establishNestingLevels(Src, Dst); 3578 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3579 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3580 3581 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3582 ++TotalArrayPairs; 3583 3584 unsigned Pairs = 1; 3585 SmallVector<Subscript, 2> Pair(Pairs); 3586 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3587 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3588 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3589 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3590 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { 3591 // If two pointers have different bases, trying to analyze indexes won't 3592 // work; we can't compare them to each other. This can happen, for example, 3593 // if one is produced by an LCSSA PHI node. 3594 // 3595 // We check this upfront so we don't crash in cases where getMinusSCEV() 3596 // returns a SCEVCouldNotCompute. 3597 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3598 return std::make_unique<Dependence>(Src, Dst); 3599 } 3600 Pair[0].Src = SrcSCEV; 3601 Pair[0].Dst = DstSCEV; 3602 3603 if (Delinearize) { 3604 if (tryDelinearize(Src, Dst, Pair)) { 3605 LLVM_DEBUG(dbgs() << " delinearized\n"); 3606 Pairs = Pair.size(); 3607 } 3608 } 3609 3610 for (unsigned P = 0; P < Pairs; ++P) { 3611 Pair[P].Loops.resize(MaxLevels + 1); 3612 Pair[P].GroupLoops.resize(MaxLevels + 1); 3613 Pair[P].Group.resize(Pairs); 3614 removeMatchingExtensions(&Pair[P]); 3615 Pair[P].Classification = 3616 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3617 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3618 Pair[P].Loops); 3619 Pair[P].GroupLoops = Pair[P].Loops; 3620 Pair[P].Group.set(P); 3621 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3622 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3623 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3624 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3625 LLVM_DEBUG(dbgs() << "\tloops = "); 3626 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3627 } 3628 3629 SmallBitVector Separable(Pairs); 3630 SmallBitVector Coupled(Pairs); 3631 3632 // Partition subscripts into separable and minimally-coupled groups 3633 // Algorithm in paper is algorithmically better; 3634 // this may be faster in practice. Check someday. 3635 // 3636 // Here's an example of how it works. Consider this code: 3637 // 3638 // for (i = ...) { 3639 // for (j = ...) { 3640 // for (k = ...) { 3641 // for (l = ...) { 3642 // for (m = ...) { 3643 // A[i][j][k][m] = ...; 3644 // ... = A[0][j][l][i + j]; 3645 // } 3646 // } 3647 // } 3648 // } 3649 // } 3650 // 3651 // There are 4 subscripts here: 3652 // 0 [i] and [0] 3653 // 1 [j] and [j] 3654 // 2 [k] and [l] 3655 // 3 [m] and [i + j] 3656 // 3657 // We've already classified each subscript pair as ZIV, SIV, etc., 3658 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3659 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3660 // and set Pair[P].Group = {P}. 3661 // 3662 // Src Dst Classification Loops GroupLoops Group 3663 // 0 [i] [0] SIV {1} {1} {0} 3664 // 1 [j] [j] SIV {2} {2} {1} 3665 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3666 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3667 // 3668 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3669 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3670 // 3671 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3672 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3673 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3674 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3675 // to either Separable or Coupled). 3676 // 3677 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3678 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3679 // so Pair[3].Group = {0, 1, 3} and Done = false. 3680 // 3681 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3682 // Since Done remains true, we add 2 to the set of Separable pairs. 3683 // 3684 // Finally, we consider 3. There's nothing to compare it with, 3685 // so Done remains true and we add it to the Coupled set. 3686 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3687 // 3688 // In the end, we've got 1 separable subscript and 1 coupled group. 3689 for (unsigned SI = 0; SI < Pairs; ++SI) { 3690 if (Pair[SI].Classification == Subscript::NonLinear) { 3691 // ignore these, but collect loops for later 3692 ++NonlinearSubscriptPairs; 3693 collectCommonLoops(Pair[SI].Src, 3694 LI->getLoopFor(Src->getParent()), 3695 Pair[SI].Loops); 3696 collectCommonLoops(Pair[SI].Dst, 3697 LI->getLoopFor(Dst->getParent()), 3698 Pair[SI].Loops); 3699 Result.Consistent = false; 3700 } else if (Pair[SI].Classification == Subscript::ZIV) { 3701 // always separable 3702 Separable.set(SI); 3703 } 3704 else { 3705 // SIV, RDIV, or MIV, so check for coupled group 3706 bool Done = true; 3707 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3708 SmallBitVector Intersection = Pair[SI].GroupLoops; 3709 Intersection &= Pair[SJ].GroupLoops; 3710 if (Intersection.any()) { 3711 // accumulate set of all the loops in group 3712 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3713 // accumulate set of all subscripts in group 3714 Pair[SJ].Group |= Pair[SI].Group; 3715 Done = false; 3716 } 3717 } 3718 if (Done) { 3719 if (Pair[SI].Group.count() == 1) { 3720 Separable.set(SI); 3721 ++SeparableSubscriptPairs; 3722 } 3723 else { 3724 Coupled.set(SI); 3725 ++CoupledSubscriptPairs; 3726 } 3727 } 3728 } 3729 } 3730 3731 LLVM_DEBUG(dbgs() << " Separable = "); 3732 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3733 LLVM_DEBUG(dbgs() << " Coupled = "); 3734 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3735 3736 Constraint NewConstraint; 3737 NewConstraint.setAny(SE); 3738 3739 // test separable subscripts 3740 for (unsigned SI : Separable.set_bits()) { 3741 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3742 switch (Pair[SI].Classification) { 3743 case Subscript::ZIV: 3744 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3745 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3746 return nullptr; 3747 break; 3748 case Subscript::SIV: { 3749 LLVM_DEBUG(dbgs() << ", SIV\n"); 3750 unsigned Level; 3751 const SCEV *SplitIter = nullptr; 3752 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3753 SplitIter)) 3754 return nullptr; 3755 break; 3756 } 3757 case Subscript::RDIV: 3758 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3759 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3760 return nullptr; 3761 break; 3762 case Subscript::MIV: 3763 LLVM_DEBUG(dbgs() << ", MIV\n"); 3764 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3765 return nullptr; 3766 break; 3767 default: 3768 llvm_unreachable("subscript has unexpected classification"); 3769 } 3770 } 3771 3772 if (Coupled.count()) { 3773 // test coupled subscript groups 3774 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3775 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3776 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3777 for (unsigned II = 0; II <= MaxLevels; ++II) 3778 Constraints[II].setAny(SE); 3779 for (unsigned SI : Coupled.set_bits()) { 3780 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3781 SmallBitVector Group(Pair[SI].Group); 3782 SmallBitVector Sivs(Pairs); 3783 SmallBitVector Mivs(Pairs); 3784 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3785 SmallVector<Subscript *, 4> PairsInGroup; 3786 for (unsigned SJ : Group.set_bits()) { 3787 LLVM_DEBUG(dbgs() << SJ << " "); 3788 if (Pair[SJ].Classification == Subscript::SIV) 3789 Sivs.set(SJ); 3790 else 3791 Mivs.set(SJ); 3792 PairsInGroup.push_back(&Pair[SJ]); 3793 } 3794 unifySubscriptType(PairsInGroup); 3795 LLVM_DEBUG(dbgs() << "}\n"); 3796 while (Sivs.any()) { 3797 bool Changed = false; 3798 for (unsigned SJ : Sivs.set_bits()) { 3799 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3800 // SJ is an SIV subscript that's part of the current coupled group 3801 unsigned Level; 3802 const SCEV *SplitIter = nullptr; 3803 LLVM_DEBUG(dbgs() << "SIV\n"); 3804 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3805 SplitIter)) 3806 return nullptr; 3807 ConstrainedLevels.set(Level); 3808 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3809 if (Constraints[Level].isEmpty()) { 3810 ++DeltaIndependence; 3811 return nullptr; 3812 } 3813 Changed = true; 3814 } 3815 Sivs.reset(SJ); 3816 } 3817 if (Changed) { 3818 // propagate, possibly creating new SIVs and ZIVs 3819 LLVM_DEBUG(dbgs() << " propagating\n"); 3820 LLVM_DEBUG(dbgs() << "\tMivs = "); 3821 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3822 for (unsigned SJ : Mivs.set_bits()) { 3823 // SJ is an MIV subscript that's part of the current coupled group 3824 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3825 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3826 Constraints, Result.Consistent)) { 3827 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3828 ++DeltaPropagations; 3829 Pair[SJ].Classification = 3830 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3831 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3832 Pair[SJ].Loops); 3833 switch (Pair[SJ].Classification) { 3834 case Subscript::ZIV: 3835 LLVM_DEBUG(dbgs() << "ZIV\n"); 3836 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3837 return nullptr; 3838 Mivs.reset(SJ); 3839 break; 3840 case Subscript::SIV: 3841 Sivs.set(SJ); 3842 Mivs.reset(SJ); 3843 break; 3844 case Subscript::RDIV: 3845 case Subscript::MIV: 3846 break; 3847 default: 3848 llvm_unreachable("bad subscript classification"); 3849 } 3850 } 3851 } 3852 } 3853 } 3854 3855 // test & propagate remaining RDIVs 3856 for (unsigned SJ : Mivs.set_bits()) { 3857 if (Pair[SJ].Classification == Subscript::RDIV) { 3858 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3859 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3860 return nullptr; 3861 // I don't yet understand how to propagate RDIV results 3862 Mivs.reset(SJ); 3863 } 3864 } 3865 3866 // test remaining MIVs 3867 // This code is temporary. 3868 // Better to somehow test all remaining subscripts simultaneously. 3869 for (unsigned SJ : Mivs.set_bits()) { 3870 if (Pair[SJ].Classification == Subscript::MIV) { 3871 LLVM_DEBUG(dbgs() << "MIV test\n"); 3872 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3873 return nullptr; 3874 } 3875 else 3876 llvm_unreachable("expected only MIV subscripts at this point"); 3877 } 3878 3879 // update Result.DV from constraint vector 3880 LLVM_DEBUG(dbgs() << " updating\n"); 3881 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3882 if (SJ > CommonLevels) 3883 break; 3884 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3885 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3886 return nullptr; 3887 } 3888 } 3889 } 3890 3891 // Make sure the Scalar flags are set correctly. 3892 SmallBitVector CompleteLoops(MaxLevels + 1); 3893 for (unsigned SI = 0; SI < Pairs; ++SI) 3894 CompleteLoops |= Pair[SI].Loops; 3895 for (unsigned II = 1; II <= CommonLevels; ++II) 3896 if (CompleteLoops[II]) 3897 Result.DV[II - 1].Scalar = false; 3898 3899 if (PossiblyLoopIndependent) { 3900 // Make sure the LoopIndependent flag is set correctly. 3901 // All directions must include equal, otherwise no 3902 // loop-independent dependence is possible. 3903 for (unsigned II = 1; II <= CommonLevels; ++II) { 3904 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3905 Result.LoopIndependent = false; 3906 break; 3907 } 3908 } 3909 } 3910 else { 3911 // On the other hand, if all directions are equal and there's no 3912 // loop-independent dependence possible, then no dependence exists. 3913 bool AllEqual = true; 3914 for (unsigned II = 1; II <= CommonLevels; ++II) { 3915 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3916 AllEqual = false; 3917 break; 3918 } 3919 } 3920 if (AllEqual) 3921 return nullptr; 3922 } 3923 3924 return std::make_unique<FullDependence>(std::move(Result)); 3925 } 3926 3927 //===----------------------------------------------------------------------===// 3928 // getSplitIteration - 3929 // Rather than spend rarely-used space recording the splitting iteration 3930 // during the Weak-Crossing SIV test, we re-compute it on demand. 3931 // The re-computation is basically a repeat of the entire dependence test, 3932 // though simplified since we know that the dependence exists. 3933 // It's tedious, since we must go through all propagations, etc. 3934 // 3935 // Care is required to keep this code up to date with respect to the routine 3936 // above, depends(). 3937 // 3938 // Generally, the dependence analyzer will be used to build 3939 // a dependence graph for a function (basically a map from instructions 3940 // to dependences). Looking for cycles in the graph shows us loops 3941 // that cannot be trivially vectorized/parallelized. 3942 // 3943 // We can try to improve the situation by examining all the dependences 3944 // that make up the cycle, looking for ones we can break. 3945 // Sometimes, peeling the first or last iteration of a loop will break 3946 // dependences, and we've got flags for those possibilities. 3947 // Sometimes, splitting a loop at some other iteration will do the trick, 3948 // and we've got a flag for that case. Rather than waste the space to 3949 // record the exact iteration (since we rarely know), we provide 3950 // a method that calculates the iteration. It's a drag that it must work 3951 // from scratch, but wonderful in that it's possible. 3952 // 3953 // Here's an example: 3954 // 3955 // for (i = 0; i < 10; i++) 3956 // A[i] = ... 3957 // ... = A[11 - i] 3958 // 3959 // There's a loop-carried flow dependence from the store to the load, 3960 // found by the weak-crossing SIV test. The dependence will have a flag, 3961 // indicating that the dependence can be broken by splitting the loop. 3962 // Calling getSplitIteration will return 5. 3963 // Splitting the loop breaks the dependence, like so: 3964 // 3965 // for (i = 0; i <= 5; i++) 3966 // A[i] = ... 3967 // ... = A[11 - i] 3968 // for (i = 6; i < 10; i++) 3969 // A[i] = ... 3970 // ... = A[11 - i] 3971 // 3972 // breaks the dependence and allows us to vectorize/parallelize 3973 // both loops. 3974 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3975 unsigned SplitLevel) { 3976 assert(Dep.isSplitable(SplitLevel) && 3977 "Dep should be splitable at SplitLevel"); 3978 Instruction *Src = Dep.getSrc(); 3979 Instruction *Dst = Dep.getDst(); 3980 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3981 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3982 assert(isLoadOrStore(Src)); 3983 assert(isLoadOrStore(Dst)); 3984 Value *SrcPtr = getLoadStorePointerOperand(Src); 3985 Value *DstPtr = getLoadStorePointerOperand(Dst); 3986 assert(underlyingObjectsAlias( 3987 AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), 3988 MemoryLocation::get(Src)) == AliasResult::MustAlias); 3989 3990 // establish loop nesting levels 3991 establishNestingLevels(Src, Dst); 3992 3993 FullDependence Result(Src, Dst, false, CommonLevels); 3994 3995 unsigned Pairs = 1; 3996 SmallVector<Subscript, 2> Pair(Pairs); 3997 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3998 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3999 Pair[0].Src = SrcSCEV; 4000 Pair[0].Dst = DstSCEV; 4001 4002 if (Delinearize) { 4003 if (tryDelinearize(Src, Dst, Pair)) { 4004 LLVM_DEBUG(dbgs() << " delinearized\n"); 4005 Pairs = Pair.size(); 4006 } 4007 } 4008 4009 for (unsigned P = 0; P < Pairs; ++P) { 4010 Pair[P].Loops.resize(MaxLevels + 1); 4011 Pair[P].GroupLoops.resize(MaxLevels + 1); 4012 Pair[P].Group.resize(Pairs); 4013 removeMatchingExtensions(&Pair[P]); 4014 Pair[P].Classification = 4015 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 4016 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 4017 Pair[P].Loops); 4018 Pair[P].GroupLoops = Pair[P].Loops; 4019 Pair[P].Group.set(P); 4020 } 4021 4022 SmallBitVector Separable(Pairs); 4023 SmallBitVector Coupled(Pairs); 4024 4025 // partition subscripts into separable and minimally-coupled groups 4026 for (unsigned SI = 0; SI < Pairs; ++SI) { 4027 if (Pair[SI].Classification == Subscript::NonLinear) { 4028 // ignore these, but collect loops for later 4029 collectCommonLoops(Pair[SI].Src, 4030 LI->getLoopFor(Src->getParent()), 4031 Pair[SI].Loops); 4032 collectCommonLoops(Pair[SI].Dst, 4033 LI->getLoopFor(Dst->getParent()), 4034 Pair[SI].Loops); 4035 Result.Consistent = false; 4036 } 4037 else if (Pair[SI].Classification == Subscript::ZIV) 4038 Separable.set(SI); 4039 else { 4040 // SIV, RDIV, or MIV, so check for coupled group 4041 bool Done = true; 4042 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4043 SmallBitVector Intersection = Pair[SI].GroupLoops; 4044 Intersection &= Pair[SJ].GroupLoops; 4045 if (Intersection.any()) { 4046 // accumulate set of all the loops in group 4047 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4048 // accumulate set of all subscripts in group 4049 Pair[SJ].Group |= Pair[SI].Group; 4050 Done = false; 4051 } 4052 } 4053 if (Done) { 4054 if (Pair[SI].Group.count() == 1) 4055 Separable.set(SI); 4056 else 4057 Coupled.set(SI); 4058 } 4059 } 4060 } 4061 4062 Constraint NewConstraint; 4063 NewConstraint.setAny(SE); 4064 4065 // test separable subscripts 4066 for (unsigned SI : Separable.set_bits()) { 4067 switch (Pair[SI].Classification) { 4068 case Subscript::SIV: { 4069 unsigned Level; 4070 const SCEV *SplitIter = nullptr; 4071 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4072 Result, NewConstraint, SplitIter); 4073 if (Level == SplitLevel) { 4074 assert(SplitIter != nullptr); 4075 return SplitIter; 4076 } 4077 break; 4078 } 4079 case Subscript::ZIV: 4080 case Subscript::RDIV: 4081 case Subscript::MIV: 4082 break; 4083 default: 4084 llvm_unreachable("subscript has unexpected classification"); 4085 } 4086 } 4087 4088 if (Coupled.count()) { 4089 // test coupled subscript groups 4090 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4091 for (unsigned II = 0; II <= MaxLevels; ++II) 4092 Constraints[II].setAny(SE); 4093 for (unsigned SI : Coupled.set_bits()) { 4094 SmallBitVector Group(Pair[SI].Group); 4095 SmallBitVector Sivs(Pairs); 4096 SmallBitVector Mivs(Pairs); 4097 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4098 for (unsigned SJ : Group.set_bits()) { 4099 if (Pair[SJ].Classification == Subscript::SIV) 4100 Sivs.set(SJ); 4101 else 4102 Mivs.set(SJ); 4103 } 4104 while (Sivs.any()) { 4105 bool Changed = false; 4106 for (unsigned SJ : Sivs.set_bits()) { 4107 // SJ is an SIV subscript that's part of the current coupled group 4108 unsigned Level; 4109 const SCEV *SplitIter = nullptr; 4110 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 4111 Result, NewConstraint, SplitIter); 4112 if (Level == SplitLevel && SplitIter) 4113 return SplitIter; 4114 ConstrainedLevels.set(Level); 4115 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4116 Changed = true; 4117 Sivs.reset(SJ); 4118 } 4119 if (Changed) { 4120 // propagate, possibly creating new SIVs and ZIVs 4121 for (unsigned SJ : Mivs.set_bits()) { 4122 // SJ is an MIV subscript that's part of the current coupled group 4123 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 4124 Pair[SJ].Loops, Constraints, Result.Consistent)) { 4125 Pair[SJ].Classification = 4126 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 4127 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 4128 Pair[SJ].Loops); 4129 switch (Pair[SJ].Classification) { 4130 case Subscript::ZIV: 4131 Mivs.reset(SJ); 4132 break; 4133 case Subscript::SIV: 4134 Sivs.set(SJ); 4135 Mivs.reset(SJ); 4136 break; 4137 case Subscript::RDIV: 4138 case Subscript::MIV: 4139 break; 4140 default: 4141 llvm_unreachable("bad subscript classification"); 4142 } 4143 } 4144 } 4145 } 4146 } 4147 } 4148 } 4149 llvm_unreachable("somehow reached end of routine"); 4150 return nullptr; 4151 } 4152