1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/Config/llvm-config.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/InitializePasses.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 static cl::opt<bool> DisableDelinearizationChecks( 114 "da-disable-delinearization-checks", cl::init(false), cl::Hidden, 115 cl::ZeroOrMore, 116 cl::desc( 117 "Disable checks that try to statically verify validity of " 118 "delinearized subscripts. Enabling this option may result in incorrect " 119 "dependence vectors for languages that allow the subscript of one " 120 "dimension to underflow or overflow into another dimension.")); 121 122 static cl::opt<unsigned> MIVMaxLevelThreshold( 123 "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore, 124 cl::desc("Maximum depth allowed for the recursive algorithm used to " 125 "explore MIV direction vectors.")); 126 127 //===----------------------------------------------------------------------===// 128 // basics 129 130 DependenceAnalysis::Result 131 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 132 auto &AA = FAM.getResult<AAManager>(F); 133 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 134 auto &LI = FAM.getResult<LoopAnalysis>(F); 135 return DependenceInfo(&F, &AA, &SE, &LI); 136 } 137 138 AnalysisKey DependenceAnalysis::Key; 139 140 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 141 "Dependence Analysis", true, true) 142 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 143 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 144 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 145 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 146 true, true) 147 148 char DependenceAnalysisWrapperPass::ID = 0; 149 150 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 151 : FunctionPass(ID) { 152 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 153 } 154 155 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 156 return new DependenceAnalysisWrapperPass(); 157 } 158 159 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 160 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 161 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 162 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 163 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 164 return false; 165 } 166 167 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 168 169 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 170 171 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 172 AU.setPreservesAll(); 173 AU.addRequiredTransitive<AAResultsWrapperPass>(); 174 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 175 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 176 } 177 178 // Used to test the dependence analyzer. 179 // Looks through the function, noting instructions that may access memory. 180 // Calls depends() on every possible pair and prints out the result. 181 // Ignores all other instructions. 182 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 183 auto *F = DA->getFunction(); 184 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 185 ++SrcI) { 186 if (SrcI->mayReadOrWriteMemory()) { 187 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 188 DstI != DstE; ++DstI) { 189 if (DstI->mayReadOrWriteMemory()) { 190 OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n"; 191 OS << " da analyze - "; 192 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 193 D->dump(OS); 194 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 195 if (D->isSplitable(Level)) { 196 OS << " da analyze - split level = " << Level; 197 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 198 OS << "!\n"; 199 } 200 } 201 } 202 else 203 OS << "none!\n"; 204 } 205 } 206 } 207 } 208 } 209 210 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 211 const Module *) const { 212 dumpExampleDependence(OS, info.get()); 213 } 214 215 PreservedAnalyses 216 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 217 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 218 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 219 return PreservedAnalyses::all(); 220 } 221 222 //===----------------------------------------------------------------------===// 223 // Dependence methods 224 225 // Returns true if this is an input dependence. 226 bool Dependence::isInput() const { 227 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 228 } 229 230 231 // Returns true if this is an output dependence. 232 bool Dependence::isOutput() const { 233 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 234 } 235 236 237 // Returns true if this is an flow (aka true) dependence. 238 bool Dependence::isFlow() const { 239 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 240 } 241 242 243 // Returns true if this is an anti dependence. 244 bool Dependence::isAnti() const { 245 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 246 } 247 248 249 // Returns true if a particular level is scalar; that is, 250 // if no subscript in the source or destination mention the induction 251 // variable associated with the loop at this level. 252 // Leave this out of line, so it will serve as a virtual method anchor 253 bool Dependence::isScalar(unsigned level) const { 254 return false; 255 } 256 257 258 //===----------------------------------------------------------------------===// 259 // FullDependence methods 260 261 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 262 bool PossiblyLoopIndependent, 263 unsigned CommonLevels) 264 : Dependence(Source, Destination), Levels(CommonLevels), 265 LoopIndependent(PossiblyLoopIndependent) { 266 Consistent = true; 267 if (CommonLevels) 268 DV = std::make_unique<DVEntry[]>(CommonLevels); 269 } 270 271 // The rest are simple getters that hide the implementation. 272 273 // getDirection - Returns the direction associated with a particular level. 274 unsigned FullDependence::getDirection(unsigned Level) const { 275 assert(0 < Level && Level <= Levels && "Level out of range"); 276 return DV[Level - 1].Direction; 277 } 278 279 280 // Returns the distance (or NULL) associated with a particular level. 281 const SCEV *FullDependence::getDistance(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].Distance; 284 } 285 286 287 // Returns true if a particular level is scalar; that is, 288 // if no subscript in the source or destination mention the induction 289 // variable associated with the loop at this level. 290 bool FullDependence::isScalar(unsigned Level) const { 291 assert(0 < Level && Level <= Levels && "Level out of range"); 292 return DV[Level - 1].Scalar; 293 } 294 295 296 // Returns true if peeling the first iteration from this loop 297 // will break this dependence. 298 bool FullDependence::isPeelFirst(unsigned Level) const { 299 assert(0 < Level && Level <= Levels && "Level out of range"); 300 return DV[Level - 1].PeelFirst; 301 } 302 303 304 // Returns true if peeling the last iteration from this loop 305 // will break this dependence. 306 bool FullDependence::isPeelLast(unsigned Level) const { 307 assert(0 < Level && Level <= Levels && "Level out of range"); 308 return DV[Level - 1].PeelLast; 309 } 310 311 312 // Returns true if splitting this loop will break the dependence. 313 bool FullDependence::isSplitable(unsigned Level) const { 314 assert(0 < Level && Level <= Levels && "Level out of range"); 315 return DV[Level - 1].Splitable; 316 } 317 318 319 //===----------------------------------------------------------------------===// 320 // DependenceInfo::Constraint methods 321 322 // If constraint is a point <X, Y>, returns X. 323 // Otherwise assert. 324 const SCEV *DependenceInfo::Constraint::getX() const { 325 assert(Kind == Point && "Kind should be Point"); 326 return A; 327 } 328 329 330 // If constraint is a point <X, Y>, returns Y. 331 // Otherwise assert. 332 const SCEV *DependenceInfo::Constraint::getY() const { 333 assert(Kind == Point && "Kind should be Point"); 334 return B; 335 } 336 337 338 // If constraint is a line AX + BY = C, returns A. 339 // Otherwise assert. 340 const SCEV *DependenceInfo::Constraint::getA() const { 341 assert((Kind == Line || Kind == Distance) && 342 "Kind should be Line (or Distance)"); 343 return A; 344 } 345 346 347 // If constraint is a line AX + BY = C, returns B. 348 // Otherwise assert. 349 const SCEV *DependenceInfo::Constraint::getB() const { 350 assert((Kind == Line || Kind == Distance) && 351 "Kind should be Line (or Distance)"); 352 return B; 353 } 354 355 356 // If constraint is a line AX + BY = C, returns C. 357 // Otherwise assert. 358 const SCEV *DependenceInfo::Constraint::getC() const { 359 assert((Kind == Line || Kind == Distance) && 360 "Kind should be Line (or Distance)"); 361 return C; 362 } 363 364 365 // If constraint is a distance, returns D. 366 // Otherwise assert. 367 const SCEV *DependenceInfo::Constraint::getD() const { 368 assert(Kind == Distance && "Kind should be Distance"); 369 return SE->getNegativeSCEV(C); 370 } 371 372 373 // Returns the loop associated with this constraint. 374 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 375 assert((Kind == Distance || Kind == Line || Kind == Point) && 376 "Kind should be Distance, Line, or Point"); 377 return AssociatedLoop; 378 } 379 380 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 381 const Loop *CurLoop) { 382 Kind = Point; 383 A = X; 384 B = Y; 385 AssociatedLoop = CurLoop; 386 } 387 388 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 389 const SCEV *CC, const Loop *CurLoop) { 390 Kind = Line; 391 A = AA; 392 B = BB; 393 C = CC; 394 AssociatedLoop = CurLoop; 395 } 396 397 void DependenceInfo::Constraint::setDistance(const SCEV *D, 398 const Loop *CurLoop) { 399 Kind = Distance; 400 A = SE->getOne(D->getType()); 401 B = SE->getNegativeSCEV(A); 402 C = SE->getNegativeSCEV(D); 403 AssociatedLoop = CurLoop; 404 } 405 406 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 407 408 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 409 SE = NewSE; 410 Kind = Any; 411 } 412 413 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 414 // For debugging purposes. Dumps the constraint out to OS. 415 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 416 if (isEmpty()) 417 OS << " Empty\n"; 418 else if (isAny()) 419 OS << " Any\n"; 420 else if (isPoint()) 421 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 422 else if (isDistance()) 423 OS << " Distance is " << *getD() << 424 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 425 else if (isLine()) 426 OS << " Line is " << *getA() << "*X + " << 427 *getB() << "*Y = " << *getC() << "\n"; 428 else 429 llvm_unreachable("unknown constraint type in Constraint::dump"); 430 } 431 #endif 432 433 434 // Updates X with the intersection 435 // of the Constraints X and Y. Returns true if X has changed. 436 // Corresponds to Figure 4 from the paper 437 // 438 // Practical Dependence Testing 439 // Goff, Kennedy, Tseng 440 // PLDI 1991 441 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 442 ++DeltaApplications; 443 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 444 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 445 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 446 assert(!Y->isPoint() && "Y must not be a Point"); 447 if (X->isAny()) { 448 if (Y->isAny()) 449 return false; 450 *X = *Y; 451 return true; 452 } 453 if (X->isEmpty()) 454 return false; 455 if (Y->isEmpty()) { 456 X->setEmpty(); 457 return true; 458 } 459 460 if (X->isDistance() && Y->isDistance()) { 461 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 462 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 463 return false; 464 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 465 X->setEmpty(); 466 ++DeltaSuccesses; 467 return true; 468 } 469 // Hmmm, interesting situation. 470 // I guess if either is constant, keep it and ignore the other. 471 if (isa<SCEVConstant>(Y->getD())) { 472 *X = *Y; 473 return true; 474 } 475 return false; 476 } 477 478 // At this point, the pseudo-code in Figure 4 of the paper 479 // checks if (X->isPoint() && Y->isPoint()). 480 // This case can't occur in our implementation, 481 // since a Point can only arise as the result of intersecting 482 // two Line constraints, and the right-hand value, Y, is never 483 // the result of an intersection. 484 assert(!(X->isPoint() && Y->isPoint()) && 485 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 486 487 if (X->isLine() && Y->isLine()) { 488 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 489 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 490 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 491 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 492 // slopes are equal, so lines are parallel 493 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 494 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 495 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 496 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 497 return false; 498 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 499 X->setEmpty(); 500 ++DeltaSuccesses; 501 return true; 502 } 503 return false; 504 } 505 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 506 // slopes differ, so lines intersect 507 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 508 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 509 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 510 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 511 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 512 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 513 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 514 const SCEVConstant *C1A2_C2A1 = 515 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 516 const SCEVConstant *C1B2_C2B1 = 517 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 518 const SCEVConstant *A1B2_A2B1 = 519 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 520 const SCEVConstant *A2B1_A1B2 = 521 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 522 if (!C1B2_C2B1 || !C1A2_C2A1 || 523 !A1B2_A2B1 || !A2B1_A1B2) 524 return false; 525 APInt Xtop = C1B2_C2B1->getAPInt(); 526 APInt Xbot = A1B2_A2B1->getAPInt(); 527 APInt Ytop = C1A2_C2A1->getAPInt(); 528 APInt Ybot = A2B1_A1B2->getAPInt(); 529 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 530 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 531 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 532 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 533 APInt Xq = Xtop; // these need to be initialized, even 534 APInt Xr = Xtop; // though they're just going to be overwritten 535 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 536 APInt Yq = Ytop; 537 APInt Yr = Ytop; 538 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 539 if (Xr != 0 || Yr != 0) { 540 X->setEmpty(); 541 ++DeltaSuccesses; 542 return true; 543 } 544 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 545 if (Xq.slt(0) || Yq.slt(0)) { 546 X->setEmpty(); 547 ++DeltaSuccesses; 548 return true; 549 } 550 if (const SCEVConstant *CUB = 551 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 552 const APInt &UpperBound = CUB->getAPInt(); 553 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 554 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 555 X->setEmpty(); 556 ++DeltaSuccesses; 557 return true; 558 } 559 } 560 X->setPoint(SE->getConstant(Xq), 561 SE->getConstant(Yq), 562 X->getAssociatedLoop()); 563 ++DeltaSuccesses; 564 return true; 565 } 566 return false; 567 } 568 569 // if (X->isLine() && Y->isPoint()) This case can't occur. 570 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 571 572 if (X->isPoint() && Y->isLine()) { 573 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 574 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 575 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 576 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 577 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 578 return false; 579 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 580 X->setEmpty(); 581 ++DeltaSuccesses; 582 return true; 583 } 584 return false; 585 } 586 587 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 588 return false; 589 } 590 591 592 //===----------------------------------------------------------------------===// 593 // DependenceInfo methods 594 595 // For debugging purposes. Dumps a dependence to OS. 596 void Dependence::dump(raw_ostream &OS) const { 597 bool Splitable = false; 598 if (isConfused()) 599 OS << "confused"; 600 else { 601 if (isConsistent()) 602 OS << "consistent "; 603 if (isFlow()) 604 OS << "flow"; 605 else if (isOutput()) 606 OS << "output"; 607 else if (isAnti()) 608 OS << "anti"; 609 else if (isInput()) 610 OS << "input"; 611 unsigned Levels = getLevels(); 612 OS << " ["; 613 for (unsigned II = 1; II <= Levels; ++II) { 614 if (isSplitable(II)) 615 Splitable = true; 616 if (isPeelFirst(II)) 617 OS << 'p'; 618 const SCEV *Distance = getDistance(II); 619 if (Distance) 620 OS << *Distance; 621 else if (isScalar(II)) 622 OS << "S"; 623 else { 624 unsigned Direction = getDirection(II); 625 if (Direction == DVEntry::ALL) 626 OS << "*"; 627 else { 628 if (Direction & DVEntry::LT) 629 OS << "<"; 630 if (Direction & DVEntry::EQ) 631 OS << "="; 632 if (Direction & DVEntry::GT) 633 OS << ">"; 634 } 635 } 636 if (isPeelLast(II)) 637 OS << 'p'; 638 if (II < Levels) 639 OS << " "; 640 } 641 if (isLoopIndependent()) 642 OS << "|<"; 643 OS << "]"; 644 if (Splitable) 645 OS << " splitable"; 646 } 647 OS << "!\n"; 648 } 649 650 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 651 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 652 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 653 // Otherwise the underlying objects are checked to see if they point to 654 // different identifiable objects. 655 static AliasResult underlyingObjectsAlias(AAResults *AA, 656 const DataLayout &DL, 657 const MemoryLocation &LocA, 658 const MemoryLocation &LocB) { 659 // Check the original locations (minus size) for noalias, which can happen for 660 // tbaa, incompatible underlying object locations, etc. 661 MemoryLocation LocAS = 662 MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); 663 MemoryLocation LocBS = 664 MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); 665 if (AA->isNoAlias(LocAS, LocBS)) 666 return AliasResult::NoAlias; 667 668 // Check the underlying objects are the same 669 const Value *AObj = getUnderlyingObject(LocA.Ptr); 670 const Value *BObj = getUnderlyingObject(LocB.Ptr); 671 672 // If the underlying objects are the same, they must alias 673 if (AObj == BObj) 674 return AliasResult::MustAlias; 675 676 // We may have hit the recursion limit for underlying objects, or have 677 // underlying objects where we don't know they will alias. 678 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 679 return AliasResult::MayAlias; 680 681 // Otherwise we know the objects are different and both identified objects so 682 // must not alias. 683 return AliasResult::NoAlias; 684 } 685 686 687 // Returns true if the load or store can be analyzed. Atomic and volatile 688 // operations have properties which this analysis does not understand. 689 static 690 bool isLoadOrStore(const Instruction *I) { 691 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 692 return LI->isUnordered(); 693 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 694 return SI->isUnordered(); 695 return false; 696 } 697 698 699 // Examines the loop nesting of the Src and Dst 700 // instructions and establishes their shared loops. Sets the variables 701 // CommonLevels, SrcLevels, and MaxLevels. 702 // The source and destination instructions needn't be contained in the same 703 // loop. The routine establishNestingLevels finds the level of most deeply 704 // nested loop that contains them both, CommonLevels. An instruction that's 705 // not contained in a loop is at level = 0. MaxLevels is equal to the level 706 // of the source plus the level of the destination, minus CommonLevels. 707 // This lets us allocate vectors MaxLevels in length, with room for every 708 // distinct loop referenced in both the source and destination subscripts. 709 // The variable SrcLevels is the nesting depth of the source instruction. 710 // It's used to help calculate distinct loops referenced by the destination. 711 // Here's the map from loops to levels: 712 // 0 - unused 713 // 1 - outermost common loop 714 // ... - other common loops 715 // CommonLevels - innermost common loop 716 // ... - loops containing Src but not Dst 717 // SrcLevels - innermost loop containing Src but not Dst 718 // ... - loops containing Dst but not Src 719 // MaxLevels - innermost loops containing Dst but not Src 720 // Consider the follow code fragment: 721 // for (a = ...) { 722 // for (b = ...) { 723 // for (c = ...) { 724 // for (d = ...) { 725 // A[] = ...; 726 // } 727 // } 728 // for (e = ...) { 729 // for (f = ...) { 730 // for (g = ...) { 731 // ... = A[]; 732 // } 733 // } 734 // } 735 // } 736 // } 737 // If we're looking at the possibility of a dependence between the store 738 // to A (the Src) and the load from A (the Dst), we'll note that they 739 // have 2 loops in common, so CommonLevels will equal 2 and the direction 740 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 741 // A map from loop names to loop numbers would look like 742 // a - 1 743 // b - 2 = CommonLevels 744 // c - 3 745 // d - 4 = SrcLevels 746 // e - 5 747 // f - 6 748 // g - 7 = MaxLevels 749 void DependenceInfo::establishNestingLevels(const Instruction *Src, 750 const Instruction *Dst) { 751 const BasicBlock *SrcBlock = Src->getParent(); 752 const BasicBlock *DstBlock = Dst->getParent(); 753 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 754 unsigned DstLevel = LI->getLoopDepth(DstBlock); 755 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 756 const Loop *DstLoop = LI->getLoopFor(DstBlock); 757 SrcLevels = SrcLevel; 758 MaxLevels = SrcLevel + DstLevel; 759 while (SrcLevel > DstLevel) { 760 SrcLoop = SrcLoop->getParentLoop(); 761 SrcLevel--; 762 } 763 while (DstLevel > SrcLevel) { 764 DstLoop = DstLoop->getParentLoop(); 765 DstLevel--; 766 } 767 while (SrcLoop != DstLoop) { 768 SrcLoop = SrcLoop->getParentLoop(); 769 DstLoop = DstLoop->getParentLoop(); 770 SrcLevel--; 771 } 772 CommonLevels = SrcLevel; 773 MaxLevels -= CommonLevels; 774 } 775 776 777 // Given one of the loops containing the source, return 778 // its level index in our numbering scheme. 779 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 780 return SrcLoop->getLoopDepth(); 781 } 782 783 784 // Given one of the loops containing the destination, 785 // return its level index in our numbering scheme. 786 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 787 unsigned D = DstLoop->getLoopDepth(); 788 if (D > CommonLevels) 789 return D - CommonLevels + SrcLevels; 790 else 791 return D; 792 } 793 794 795 // Returns true if Expression is loop invariant in LoopNest. 796 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 797 const Loop *LoopNest) const { 798 if (!LoopNest) 799 return true; 800 return SE->isLoopInvariant(Expression, LoopNest) && 801 isLoopInvariant(Expression, LoopNest->getParentLoop()); 802 } 803 804 805 806 // Finds the set of loops from the LoopNest that 807 // have a level <= CommonLevels and are referred to by the SCEV Expression. 808 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 809 const Loop *LoopNest, 810 SmallBitVector &Loops) const { 811 while (LoopNest) { 812 unsigned Level = LoopNest->getLoopDepth(); 813 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 814 Loops.set(Level); 815 LoopNest = LoopNest->getParentLoop(); 816 } 817 } 818 819 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 820 821 unsigned widestWidthSeen = 0; 822 Type *widestType; 823 824 // Go through each pair and find the widest bit to which we need 825 // to extend all of them. 826 for (Subscript *Pair : Pairs) { 827 const SCEV *Src = Pair->Src; 828 const SCEV *Dst = Pair->Dst; 829 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 830 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 831 if (SrcTy == nullptr || DstTy == nullptr) { 832 assert(SrcTy == DstTy && "This function only unify integer types and " 833 "expect Src and Dst share the same type " 834 "otherwise."); 835 continue; 836 } 837 if (SrcTy->getBitWidth() > widestWidthSeen) { 838 widestWidthSeen = SrcTy->getBitWidth(); 839 widestType = SrcTy; 840 } 841 if (DstTy->getBitWidth() > widestWidthSeen) { 842 widestWidthSeen = DstTy->getBitWidth(); 843 widestType = DstTy; 844 } 845 } 846 847 848 assert(widestWidthSeen > 0); 849 850 // Now extend each pair to the widest seen. 851 for (Subscript *Pair : Pairs) { 852 const SCEV *Src = Pair->Src; 853 const SCEV *Dst = Pair->Dst; 854 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 855 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 856 if (SrcTy == nullptr || DstTy == nullptr) { 857 assert(SrcTy == DstTy && "This function only unify integer types and " 858 "expect Src and Dst share the same type " 859 "otherwise."); 860 continue; 861 } 862 if (SrcTy->getBitWidth() < widestWidthSeen) 863 // Sign-extend Src to widestType 864 Pair->Src = SE->getSignExtendExpr(Src, widestType); 865 if (DstTy->getBitWidth() < widestWidthSeen) { 866 // Sign-extend Dst to widestType 867 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 868 } 869 } 870 } 871 872 // removeMatchingExtensions - Examines a subscript pair. 873 // If the source and destination are identically sign (or zero) 874 // extended, it strips off the extension in an effect to simplify 875 // the actual analysis. 876 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 877 const SCEV *Src = Pair->Src; 878 const SCEV *Dst = Pair->Dst; 879 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 880 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 881 const SCEVIntegralCastExpr *SrcCast = cast<SCEVIntegralCastExpr>(Src); 882 const SCEVIntegralCastExpr *DstCast = cast<SCEVIntegralCastExpr>(Dst); 883 const SCEV *SrcCastOp = SrcCast->getOperand(); 884 const SCEV *DstCastOp = DstCast->getOperand(); 885 if (SrcCastOp->getType() == DstCastOp->getType()) { 886 Pair->Src = SrcCastOp; 887 Pair->Dst = DstCastOp; 888 } 889 } 890 } 891 892 // Examine the scev and return true iff it's linear. 893 // Collect any loops mentioned in the set of "Loops". 894 bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest, 895 SmallBitVector &Loops, bool IsSrc) { 896 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 897 if (!AddRec) 898 return isLoopInvariant(Expr, LoopNest); 899 const SCEV *Start = AddRec->getStart(); 900 const SCEV *Step = AddRec->getStepRecurrence(*SE); 901 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 902 if (!isa<SCEVCouldNotCompute>(UB)) { 903 if (SE->getTypeSizeInBits(Start->getType()) < 904 SE->getTypeSizeInBits(UB->getType())) { 905 if (!AddRec->getNoWrapFlags()) 906 return false; 907 } 908 } 909 if (!isLoopInvariant(Step, LoopNest)) 910 return false; 911 if (IsSrc) 912 Loops.set(mapSrcLoop(AddRec->getLoop())); 913 else 914 Loops.set(mapDstLoop(AddRec->getLoop())); 915 return checkSubscript(Start, LoopNest, Loops, IsSrc); 916 } 917 918 // Examine the scev and return true iff it's linear. 919 // Collect any loops mentioned in the set of "Loops". 920 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 921 SmallBitVector &Loops) { 922 return checkSubscript(Src, LoopNest, Loops, true); 923 } 924 925 // Examine the scev and return true iff it's linear. 926 // Collect any loops mentioned in the set of "Loops". 927 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 928 SmallBitVector &Loops) { 929 return checkSubscript(Dst, LoopNest, Loops, false); 930 } 931 932 933 // Examines the subscript pair (the Src and Dst SCEVs) 934 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 935 // Collects the associated loops in a set. 936 DependenceInfo::Subscript::ClassificationKind 937 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 938 const SCEV *Dst, const Loop *DstLoopNest, 939 SmallBitVector &Loops) { 940 SmallBitVector SrcLoops(MaxLevels + 1); 941 SmallBitVector DstLoops(MaxLevels + 1); 942 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 943 return Subscript::NonLinear; 944 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 945 return Subscript::NonLinear; 946 Loops = SrcLoops; 947 Loops |= DstLoops; 948 unsigned N = Loops.count(); 949 if (N == 0) 950 return Subscript::ZIV; 951 if (N == 1) 952 return Subscript::SIV; 953 if (N == 2 && (SrcLoops.count() == 0 || 954 DstLoops.count() == 0 || 955 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 956 return Subscript::RDIV; 957 return Subscript::MIV; 958 } 959 960 961 // A wrapper around SCEV::isKnownPredicate. 962 // Looks for cases where we're interested in comparing for equality. 963 // If both X and Y have been identically sign or zero extended, 964 // it strips off the (confusing) extensions before invoking 965 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 966 // will be similarly updated. 967 // 968 // If SCEV::isKnownPredicate can't prove the predicate, 969 // we try simple subtraction, which seems to help in some cases 970 // involving symbolics. 971 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 972 const SCEV *Y) const { 973 if (Pred == CmpInst::ICMP_EQ || 974 Pred == CmpInst::ICMP_NE) { 975 if ((isa<SCEVSignExtendExpr>(X) && 976 isa<SCEVSignExtendExpr>(Y)) || 977 (isa<SCEVZeroExtendExpr>(X) && 978 isa<SCEVZeroExtendExpr>(Y))) { 979 const SCEVIntegralCastExpr *CX = cast<SCEVIntegralCastExpr>(X); 980 const SCEVIntegralCastExpr *CY = cast<SCEVIntegralCastExpr>(Y); 981 const SCEV *Xop = CX->getOperand(); 982 const SCEV *Yop = CY->getOperand(); 983 if (Xop->getType() == Yop->getType()) { 984 X = Xop; 985 Y = Yop; 986 } 987 } 988 } 989 if (SE->isKnownPredicate(Pred, X, Y)) 990 return true; 991 // If SE->isKnownPredicate can't prove the condition, 992 // we try the brute-force approach of subtracting 993 // and testing the difference. 994 // By testing with SE->isKnownPredicate first, we avoid 995 // the possibility of overflow when the arguments are constants. 996 const SCEV *Delta = SE->getMinusSCEV(X, Y); 997 switch (Pred) { 998 case CmpInst::ICMP_EQ: 999 return Delta->isZero(); 1000 case CmpInst::ICMP_NE: 1001 return SE->isKnownNonZero(Delta); 1002 case CmpInst::ICMP_SGE: 1003 return SE->isKnownNonNegative(Delta); 1004 case CmpInst::ICMP_SLE: 1005 return SE->isKnownNonPositive(Delta); 1006 case CmpInst::ICMP_SGT: 1007 return SE->isKnownPositive(Delta); 1008 case CmpInst::ICMP_SLT: 1009 return SE->isKnownNegative(Delta); 1010 default: 1011 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1012 } 1013 } 1014 1015 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1016 /// with some extra checking if S is an AddRec and we can prove less-than using 1017 /// the loop bounds. 1018 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1019 // First unify to the same type 1020 auto *SType = dyn_cast<IntegerType>(S->getType()); 1021 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1022 if (!SType || !SizeType) 1023 return false; 1024 Type *MaxType = 1025 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1026 S = SE->getTruncateOrZeroExtend(S, MaxType); 1027 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1028 1029 // Special check for addrecs using BE taken count 1030 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1031 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1032 if (AddRec->isAffine()) { 1033 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1034 if (!isa<SCEVCouldNotCompute>(BECount)) { 1035 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1036 if (SE->isKnownNegative(Limit)) 1037 return true; 1038 } 1039 } 1040 } 1041 1042 // Check using normal isKnownNegative 1043 const SCEV *LimitedBound = 1044 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1045 return SE->isKnownNegative(LimitedBound); 1046 } 1047 1048 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1049 bool Inbounds = false; 1050 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1051 Inbounds = SrcGEP->isInBounds(); 1052 if (Inbounds) { 1053 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1054 if (AddRec->isAffine()) { 1055 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1056 // If both parts are NonNegative, the end result will be NonNegative 1057 if (SE->isKnownNonNegative(AddRec->getStart()) && 1058 SE->isKnownNonNegative(AddRec->getOperand(1))) 1059 return true; 1060 } 1061 } 1062 } 1063 1064 return SE->isKnownNonNegative(S); 1065 } 1066 1067 // All subscripts are all the same type. 1068 // Loop bound may be smaller (e.g., a char). 1069 // Should zero extend loop bound, since it's always >= 0. 1070 // This routine collects upper bound and extends or truncates if needed. 1071 // Truncating is safe when subscripts are known not to wrap. Cases without 1072 // nowrap flags should have been rejected earlier. 1073 // Return null if no bound available. 1074 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1075 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1076 const SCEV *UB = SE->getBackedgeTakenCount(L); 1077 return SE->getTruncateOrZeroExtend(UB, T); 1078 } 1079 return nullptr; 1080 } 1081 1082 1083 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1084 // If the cast fails, returns NULL. 1085 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1086 Type *T) const { 1087 if (const SCEV *UB = collectUpperBound(L, T)) 1088 return dyn_cast<SCEVConstant>(UB); 1089 return nullptr; 1090 } 1091 1092 1093 // testZIV - 1094 // When we have a pair of subscripts of the form [c1] and [c2], 1095 // where c1 and c2 are both loop invariant, we attack it using 1096 // the ZIV test. Basically, we test by comparing the two values, 1097 // but there are actually three possible results: 1098 // 1) the values are equal, so there's a dependence 1099 // 2) the values are different, so there's no dependence 1100 // 3) the values might be equal, so we have to assume a dependence. 1101 // 1102 // Return true if dependence disproved. 1103 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1104 FullDependence &Result) const { 1105 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1106 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1107 ++ZIVapplications; 1108 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1109 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1110 return false; // provably dependent 1111 } 1112 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1113 LLVM_DEBUG(dbgs() << " provably independent\n"); 1114 ++ZIVindependence; 1115 return true; // provably independent 1116 } 1117 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1118 Result.Consistent = false; 1119 return false; // possibly dependent 1120 } 1121 1122 1123 // strongSIVtest - 1124 // From the paper, Practical Dependence Testing, Section 4.2.1 1125 // 1126 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1127 // where i is an induction variable, c1 and c2 are loop invariant, 1128 // and a is a constant, we can solve it exactly using the Strong SIV test. 1129 // 1130 // Can prove independence. Failing that, can compute distance (and direction). 1131 // In the presence of symbolic terms, we can sometimes make progress. 1132 // 1133 // If there's a dependence, 1134 // 1135 // c1 + a*i = c2 + a*i' 1136 // 1137 // The dependence distance is 1138 // 1139 // d = i' - i = (c1 - c2)/a 1140 // 1141 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1142 // loop's upper bound. If a dependence exists, the dependence direction is 1143 // defined as 1144 // 1145 // { < if d > 0 1146 // direction = { = if d = 0 1147 // { > if d < 0 1148 // 1149 // Return true if dependence disproved. 1150 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1151 const SCEV *DstConst, const Loop *CurLoop, 1152 unsigned Level, FullDependence &Result, 1153 Constraint &NewConstraint) const { 1154 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1155 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1156 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1157 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1158 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1159 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1160 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1161 ++StrongSIVapplications; 1162 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1163 Level--; 1164 1165 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1166 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1167 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1168 1169 // check that |Delta| < iteration count 1170 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1171 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1172 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1173 const SCEV *AbsDelta = 1174 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1175 const SCEV *AbsCoeff = 1176 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1177 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1178 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1179 // Distance greater than trip count - no dependence 1180 ++StrongSIVindependence; 1181 ++StrongSIVsuccesses; 1182 return true; 1183 } 1184 } 1185 1186 // Can we compute distance? 1187 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1188 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1189 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1190 APInt Distance = ConstDelta; // these need to be initialized 1191 APInt Remainder = ConstDelta; 1192 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1193 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1194 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1195 // Make sure Coeff divides Delta exactly 1196 if (Remainder != 0) { 1197 // Coeff doesn't divide Distance, no dependence 1198 ++StrongSIVindependence; 1199 ++StrongSIVsuccesses; 1200 return true; 1201 } 1202 Result.DV[Level].Distance = SE->getConstant(Distance); 1203 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1204 if (Distance.sgt(0)) 1205 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1206 else if (Distance.slt(0)) 1207 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1208 else 1209 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1210 ++StrongSIVsuccesses; 1211 } 1212 else if (Delta->isZero()) { 1213 // since 0/X == 0 1214 Result.DV[Level].Distance = Delta; 1215 NewConstraint.setDistance(Delta, CurLoop); 1216 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1217 ++StrongSIVsuccesses; 1218 } 1219 else { 1220 if (Coeff->isOne()) { 1221 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1222 Result.DV[Level].Distance = Delta; // since X/1 == X 1223 NewConstraint.setDistance(Delta, CurLoop); 1224 } 1225 else { 1226 Result.Consistent = false; 1227 NewConstraint.setLine(Coeff, 1228 SE->getNegativeSCEV(Coeff), 1229 SE->getNegativeSCEV(Delta), CurLoop); 1230 } 1231 1232 // maybe we can get a useful direction 1233 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1234 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1235 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1236 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1237 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1238 // The double negatives above are confusing. 1239 // It helps to read !SE->isKnownNonZero(Delta) 1240 // as "Delta might be Zero" 1241 unsigned NewDirection = Dependence::DVEntry::NONE; 1242 if ((DeltaMaybePositive && CoeffMaybePositive) || 1243 (DeltaMaybeNegative && CoeffMaybeNegative)) 1244 NewDirection = Dependence::DVEntry::LT; 1245 if (DeltaMaybeZero) 1246 NewDirection |= Dependence::DVEntry::EQ; 1247 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1248 (DeltaMaybePositive && CoeffMaybeNegative)) 1249 NewDirection |= Dependence::DVEntry::GT; 1250 if (NewDirection < Result.DV[Level].Direction) 1251 ++StrongSIVsuccesses; 1252 Result.DV[Level].Direction &= NewDirection; 1253 } 1254 return false; 1255 } 1256 1257 1258 // weakCrossingSIVtest - 1259 // From the paper, Practical Dependence Testing, Section 4.2.2 1260 // 1261 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1262 // where i is an induction variable, c1 and c2 are loop invariant, 1263 // and a is a constant, we can solve it exactly using the 1264 // Weak-Crossing SIV test. 1265 // 1266 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1267 // the two lines, where i = i', yielding 1268 // 1269 // c1 + a*i = c2 - a*i 1270 // 2a*i = c2 - c1 1271 // i = (c2 - c1)/2a 1272 // 1273 // If i < 0, there is no dependence. 1274 // If i > upperbound, there is no dependence. 1275 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1276 // If i = upperbound, there's a dependence with distance = 0. 1277 // If i is integral, there's a dependence (all directions). 1278 // If the non-integer part = 1/2, there's a dependence (<> directions). 1279 // Otherwise, there's no dependence. 1280 // 1281 // Can prove independence. Failing that, 1282 // can sometimes refine the directions. 1283 // Can determine iteration for splitting. 1284 // 1285 // Return true if dependence disproved. 1286 bool DependenceInfo::weakCrossingSIVtest( 1287 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1288 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1289 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1290 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1291 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1292 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1293 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1294 ++WeakCrossingSIVapplications; 1295 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1296 Level--; 1297 Result.Consistent = false; 1298 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1299 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1300 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1301 if (Delta->isZero()) { 1302 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1303 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1304 ++WeakCrossingSIVsuccesses; 1305 if (!Result.DV[Level].Direction) { 1306 ++WeakCrossingSIVindependence; 1307 return true; 1308 } 1309 Result.DV[Level].Distance = Delta; // = 0 1310 return false; 1311 } 1312 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1313 if (!ConstCoeff) 1314 return false; 1315 1316 Result.DV[Level].Splitable = true; 1317 if (SE->isKnownNegative(ConstCoeff)) { 1318 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1319 assert(ConstCoeff && 1320 "dynamic cast of negative of ConstCoeff should yield constant"); 1321 Delta = SE->getNegativeSCEV(Delta); 1322 } 1323 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1324 1325 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1326 SplitIter = SE->getUDivExpr( 1327 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1328 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1329 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1330 1331 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1332 if (!ConstDelta) 1333 return false; 1334 1335 // We're certain that ConstCoeff > 0; therefore, 1336 // if Delta < 0, then no dependence. 1337 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1338 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1339 if (SE->isKnownNegative(Delta)) { 1340 // No dependence, Delta < 0 1341 ++WeakCrossingSIVindependence; 1342 ++WeakCrossingSIVsuccesses; 1343 return true; 1344 } 1345 1346 // We're certain that Delta > 0 and ConstCoeff > 0. 1347 // Check Delta/(2*ConstCoeff) against upper loop bound 1348 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1349 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1350 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1351 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1352 ConstantTwo); 1353 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1354 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1355 // Delta too big, no dependence 1356 ++WeakCrossingSIVindependence; 1357 ++WeakCrossingSIVsuccesses; 1358 return true; 1359 } 1360 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1361 // i = i' = UB 1362 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1363 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1364 ++WeakCrossingSIVsuccesses; 1365 if (!Result.DV[Level].Direction) { 1366 ++WeakCrossingSIVindependence; 1367 return true; 1368 } 1369 Result.DV[Level].Splitable = false; 1370 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1371 return false; 1372 } 1373 } 1374 1375 // check that Coeff divides Delta 1376 APInt APDelta = ConstDelta->getAPInt(); 1377 APInt APCoeff = ConstCoeff->getAPInt(); 1378 APInt Distance = APDelta; // these need to be initialzed 1379 APInt Remainder = APDelta; 1380 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1381 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1382 if (Remainder != 0) { 1383 // Coeff doesn't divide Delta, no dependence 1384 ++WeakCrossingSIVindependence; 1385 ++WeakCrossingSIVsuccesses; 1386 return true; 1387 } 1388 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1389 1390 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1391 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1392 Remainder = Distance.srem(Two); 1393 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1394 if (Remainder != 0) { 1395 // Equal direction isn't possible 1396 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1397 ++WeakCrossingSIVsuccesses; 1398 } 1399 return false; 1400 } 1401 1402 1403 // Kirch's algorithm, from 1404 // 1405 // Optimizing Supercompilers for Supercomputers 1406 // Michael Wolfe 1407 // MIT Press, 1989 1408 // 1409 // Program 2.1, page 29. 1410 // Computes the GCD of AM and BM. 1411 // Also finds a solution to the equation ax - by = gcd(a, b). 1412 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1413 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1414 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1415 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1416 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1417 APInt G0 = AM.abs(); 1418 APInt G1 = BM.abs(); 1419 APInt Q = G0; // these need to be initialized 1420 APInt R = G0; 1421 APInt::sdivrem(G0, G1, Q, R); 1422 while (R != 0) { 1423 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1424 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1425 G0 = G1; G1 = R; 1426 APInt::sdivrem(G0, G1, Q, R); 1427 } 1428 G = G1; 1429 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1430 X = AM.slt(0) ? -A1 : A1; 1431 Y = BM.slt(0) ? B1 : -B1; 1432 1433 // make sure gcd divides Delta 1434 R = Delta.srem(G); 1435 if (R != 0) 1436 return true; // gcd doesn't divide Delta, no dependence 1437 Q = Delta.sdiv(G); 1438 return false; 1439 } 1440 1441 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1442 APInt Q = A; // these need to be initialized 1443 APInt R = A; 1444 APInt::sdivrem(A, B, Q, R); 1445 if (R == 0) 1446 return Q; 1447 if ((A.sgt(0) && B.sgt(0)) || 1448 (A.slt(0) && B.slt(0))) 1449 return Q; 1450 else 1451 return Q - 1; 1452 } 1453 1454 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1455 APInt Q = A; // these need to be initialized 1456 APInt R = A; 1457 APInt::sdivrem(A, B, Q, R); 1458 if (R == 0) 1459 return Q; 1460 if ((A.sgt(0) && B.sgt(0)) || 1461 (A.slt(0) && B.slt(0))) 1462 return Q + 1; 1463 else 1464 return Q; 1465 } 1466 1467 // exactSIVtest - 1468 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1469 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1470 // and a2 are constant, we can solve it exactly using an algorithm developed 1471 // by Banerjee and Wolfe. See Algorithm 6.2.1 (case 2.5) in: 1472 // 1473 // Dependence Analysis for Supercomputing 1474 // Utpal Banerjee 1475 // Kluwer Academic Publishers, 1988 1476 // 1477 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1478 // so use them if possible. They're also a bit better with symbolics and, 1479 // in the case of the strong SIV test, can compute Distances. 1480 // 1481 // Return true if dependence disproved. 1482 // 1483 // This is a modified version of the original Banerjee algorithm. The original 1484 // only tested whether Dst depends on Src. This algorithm extends that and 1485 // returns all the dependencies that exist between Dst and Src. 1486 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1487 const SCEV *SrcConst, const SCEV *DstConst, 1488 const Loop *CurLoop, unsigned Level, 1489 FullDependence &Result, 1490 Constraint &NewConstraint) const { 1491 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1492 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1493 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1494 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1495 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1496 ++ExactSIVapplications; 1497 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1498 Level--; 1499 Result.Consistent = false; 1500 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1501 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1502 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta, 1503 CurLoop); 1504 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1505 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1506 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1507 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1508 return false; 1509 1510 // find gcd 1511 APInt G, X, Y; 1512 APInt AM = ConstSrcCoeff->getAPInt(); 1513 APInt BM = ConstDstCoeff->getAPInt(); 1514 APInt CM = ConstDelta->getAPInt(); 1515 unsigned Bits = AM.getBitWidth(); 1516 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1517 // gcd doesn't divide Delta, no dependence 1518 ++ExactSIVindependence; 1519 ++ExactSIVsuccesses; 1520 return true; 1521 } 1522 1523 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1524 1525 // since SCEV construction normalizes, LM = 0 1526 APInt UM(Bits, 1, true); 1527 bool UMValid = false; 1528 // UM is perhaps unavailable, let's check 1529 if (const SCEVConstant *CUB = 1530 collectConstantUpperBound(CurLoop, Delta->getType())) { 1531 UM = CUB->getAPInt(); 1532 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1533 UMValid = true; 1534 } 1535 1536 APInt TU(APInt::getSignedMaxValue(Bits)); 1537 APInt TL(APInt::getSignedMinValue(Bits)); 1538 APInt TC = CM.sdiv(G); 1539 APInt TX = X * TC; 1540 APInt TY = Y * TC; 1541 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1542 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1543 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1544 1545 SmallVector<APInt, 2> TLVec, TUVec; 1546 APInt TB = BM.sdiv(G); 1547 if (TB.sgt(0)) { 1548 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1549 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1550 // New bound check - modification to Banerjee's e3 check 1551 if (UMValid) { 1552 TUVec.push_back(floorOfQuotient(UM - TX, TB)); 1553 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1554 } 1555 } else { 1556 TUVec.push_back(floorOfQuotient(-TX, TB)); 1557 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1558 // New bound check - modification to Banerjee's e3 check 1559 if (UMValid) { 1560 TLVec.push_back(ceilingOfQuotient(UM - TX, TB)); 1561 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1562 } 1563 } 1564 1565 APInt TA = AM.sdiv(G); 1566 if (TA.sgt(0)) { 1567 if (UMValid) { 1568 TUVec.push_back(floorOfQuotient(UM - TY, TA)); 1569 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1570 } 1571 // New bound check - modification to Banerjee's e3 check 1572 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1573 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1574 } else { 1575 if (UMValid) { 1576 TLVec.push_back(ceilingOfQuotient(UM - TY, TA)); 1577 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1578 } 1579 // New bound check - modification to Banerjee's e3 check 1580 TUVec.push_back(floorOfQuotient(-TY, TA)); 1581 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1582 } 1583 1584 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1585 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1586 1587 if (TLVec.empty() || TUVec.empty()) 1588 return false; 1589 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1590 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1591 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1592 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1593 1594 if (TL.sgt(TU)) { 1595 ++ExactSIVindependence; 1596 ++ExactSIVsuccesses; 1597 return true; 1598 } 1599 1600 // explore directions 1601 unsigned NewDirection = Dependence::DVEntry::NONE; 1602 APInt LowerDistance, UpperDistance; 1603 if (TA.sgt(TB)) { 1604 LowerDistance = (TY - TX) + (TA - TB) * TL; 1605 UpperDistance = (TY - TX) + (TA - TB) * TU; 1606 } else { 1607 LowerDistance = (TY - TX) + (TA - TB) * TU; 1608 UpperDistance = (TY - TX) + (TA - TB) * TL; 1609 } 1610 1611 LLVM_DEBUG(dbgs() << "\t LowerDistance = " << LowerDistance << "\n"); 1612 LLVM_DEBUG(dbgs() << "\t UpperDistance = " << UpperDistance << "\n"); 1613 1614 APInt Zero(Bits, 0, true); 1615 if (LowerDistance.sle(Zero) && UpperDistance.sge(Zero)) { 1616 NewDirection |= Dependence::DVEntry::EQ; 1617 ++ExactSIVsuccesses; 1618 } 1619 if (LowerDistance.slt(0)) { 1620 NewDirection |= Dependence::DVEntry::GT; 1621 ++ExactSIVsuccesses; 1622 } 1623 if (UpperDistance.sgt(0)) { 1624 NewDirection |= Dependence::DVEntry::LT; 1625 ++ExactSIVsuccesses; 1626 } 1627 1628 // finished 1629 Result.DV[Level].Direction &= NewDirection; 1630 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1631 ++ExactSIVindependence; 1632 LLVM_DEBUG(dbgs() << "\t Result = "); 1633 LLVM_DEBUG(Result.dump(dbgs())); 1634 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1635 } 1636 1637 1638 // Return true if the divisor evenly divides the dividend. 1639 static 1640 bool isRemainderZero(const SCEVConstant *Dividend, 1641 const SCEVConstant *Divisor) { 1642 const APInt &ConstDividend = Dividend->getAPInt(); 1643 const APInt &ConstDivisor = Divisor->getAPInt(); 1644 return ConstDividend.srem(ConstDivisor) == 0; 1645 } 1646 1647 1648 // weakZeroSrcSIVtest - 1649 // From the paper, Practical Dependence Testing, Section 4.2.2 1650 // 1651 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1652 // where i is an induction variable, c1 and c2 are loop invariant, 1653 // and a is a constant, we can solve it exactly using the 1654 // Weak-Zero SIV test. 1655 // 1656 // Given 1657 // 1658 // c1 = c2 + a*i 1659 // 1660 // we get 1661 // 1662 // (c1 - c2)/a = i 1663 // 1664 // If i is not an integer, there's no dependence. 1665 // If i < 0 or > UB, there's no dependence. 1666 // If i = 0, the direction is >= and peeling the 1667 // 1st iteration will break the dependence. 1668 // If i = UB, the direction is <= and peeling the 1669 // last iteration will break the dependence. 1670 // Otherwise, the direction is *. 1671 // 1672 // Can prove independence. Failing that, we can sometimes refine 1673 // the directions. Can sometimes show that first or last 1674 // iteration carries all the dependences (so worth peeling). 1675 // 1676 // (see also weakZeroDstSIVtest) 1677 // 1678 // Return true if dependence disproved. 1679 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1680 const SCEV *SrcConst, 1681 const SCEV *DstConst, 1682 const Loop *CurLoop, unsigned Level, 1683 FullDependence &Result, 1684 Constraint &NewConstraint) const { 1685 // For the WeakSIV test, it's possible the loop isn't common to 1686 // the Src and Dst loops. If it isn't, then there's no need to 1687 // record a direction. 1688 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1689 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1690 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1691 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1692 ++WeakZeroSIVapplications; 1693 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1694 Level--; 1695 Result.Consistent = false; 1696 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1697 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1698 CurLoop); 1699 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1700 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1701 if (Level < CommonLevels) { 1702 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1703 Result.DV[Level].PeelFirst = true; 1704 ++WeakZeroSIVsuccesses; 1705 } 1706 return false; // dependences caused by first iteration 1707 } 1708 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1709 if (!ConstCoeff) 1710 return false; 1711 const SCEV *AbsCoeff = 1712 SE->isKnownNegative(ConstCoeff) ? 1713 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1714 const SCEV *NewDelta = 1715 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1716 1717 // check that Delta/SrcCoeff < iteration count 1718 // really check NewDelta < count*AbsCoeff 1719 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1720 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1721 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1722 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1723 ++WeakZeroSIVindependence; 1724 ++WeakZeroSIVsuccesses; 1725 return true; 1726 } 1727 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1728 // dependences caused by last iteration 1729 if (Level < CommonLevels) { 1730 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1731 Result.DV[Level].PeelLast = true; 1732 ++WeakZeroSIVsuccesses; 1733 } 1734 return false; 1735 } 1736 } 1737 1738 // check that Delta/SrcCoeff >= 0 1739 // really check that NewDelta >= 0 1740 if (SE->isKnownNegative(NewDelta)) { 1741 // No dependence, newDelta < 0 1742 ++WeakZeroSIVindependence; 1743 ++WeakZeroSIVsuccesses; 1744 return true; 1745 } 1746 1747 // if SrcCoeff doesn't divide Delta, then no dependence 1748 if (isa<SCEVConstant>(Delta) && 1749 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1750 ++WeakZeroSIVindependence; 1751 ++WeakZeroSIVsuccesses; 1752 return true; 1753 } 1754 return false; 1755 } 1756 1757 1758 // weakZeroDstSIVtest - 1759 // From the paper, Practical Dependence Testing, Section 4.2.2 1760 // 1761 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1762 // where i is an induction variable, c1 and c2 are loop invariant, 1763 // and a is a constant, we can solve it exactly using the 1764 // Weak-Zero SIV test. 1765 // 1766 // Given 1767 // 1768 // c1 + a*i = c2 1769 // 1770 // we get 1771 // 1772 // i = (c2 - c1)/a 1773 // 1774 // If i is not an integer, there's no dependence. 1775 // If i < 0 or > UB, there's no dependence. 1776 // If i = 0, the direction is <= and peeling the 1777 // 1st iteration will break the dependence. 1778 // If i = UB, the direction is >= and peeling the 1779 // last iteration will break the dependence. 1780 // Otherwise, the direction is *. 1781 // 1782 // Can prove independence. Failing that, we can sometimes refine 1783 // the directions. Can sometimes show that first or last 1784 // iteration carries all the dependences (so worth peeling). 1785 // 1786 // (see also weakZeroSrcSIVtest) 1787 // 1788 // Return true if dependence disproved. 1789 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1790 const SCEV *SrcConst, 1791 const SCEV *DstConst, 1792 const Loop *CurLoop, unsigned Level, 1793 FullDependence &Result, 1794 Constraint &NewConstraint) const { 1795 // For the WeakSIV test, it's possible the loop isn't common to the 1796 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1797 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1798 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1799 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1800 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1801 ++WeakZeroSIVapplications; 1802 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1803 Level--; 1804 Result.Consistent = false; 1805 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1806 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1807 CurLoop); 1808 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1809 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1810 if (Level < CommonLevels) { 1811 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1812 Result.DV[Level].PeelFirst = true; 1813 ++WeakZeroSIVsuccesses; 1814 } 1815 return false; // dependences caused by first iteration 1816 } 1817 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1818 if (!ConstCoeff) 1819 return false; 1820 const SCEV *AbsCoeff = 1821 SE->isKnownNegative(ConstCoeff) ? 1822 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1823 const SCEV *NewDelta = 1824 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1825 1826 // check that Delta/SrcCoeff < iteration count 1827 // really check NewDelta < count*AbsCoeff 1828 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1829 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1830 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1831 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1832 ++WeakZeroSIVindependence; 1833 ++WeakZeroSIVsuccesses; 1834 return true; 1835 } 1836 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1837 // dependences caused by last iteration 1838 if (Level < CommonLevels) { 1839 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1840 Result.DV[Level].PeelLast = true; 1841 ++WeakZeroSIVsuccesses; 1842 } 1843 return false; 1844 } 1845 } 1846 1847 // check that Delta/SrcCoeff >= 0 1848 // really check that NewDelta >= 0 1849 if (SE->isKnownNegative(NewDelta)) { 1850 // No dependence, newDelta < 0 1851 ++WeakZeroSIVindependence; 1852 ++WeakZeroSIVsuccesses; 1853 return true; 1854 } 1855 1856 // if SrcCoeff doesn't divide Delta, then no dependence 1857 if (isa<SCEVConstant>(Delta) && 1858 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1859 ++WeakZeroSIVindependence; 1860 ++WeakZeroSIVsuccesses; 1861 return true; 1862 } 1863 return false; 1864 } 1865 1866 1867 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1868 // Things of the form [c1 + a*i] and [c2 + b*j], 1869 // where i and j are induction variable, c1 and c2 are loop invariant, 1870 // and a and b are constants. 1871 // Returns true if any possible dependence is disproved. 1872 // Marks the result as inconsistent. 1873 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1874 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1875 const SCEV *SrcConst, const SCEV *DstConst, 1876 const Loop *SrcLoop, const Loop *DstLoop, 1877 FullDependence &Result) const { 1878 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1879 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1880 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1881 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1882 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1883 ++ExactRDIVapplications; 1884 Result.Consistent = false; 1885 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1886 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1887 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1888 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1889 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1890 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1891 return false; 1892 1893 // find gcd 1894 APInt G, X, Y; 1895 APInt AM = ConstSrcCoeff->getAPInt(); 1896 APInt BM = ConstDstCoeff->getAPInt(); 1897 APInt CM = ConstDelta->getAPInt(); 1898 unsigned Bits = AM.getBitWidth(); 1899 if (findGCD(Bits, AM, BM, CM, G, X, Y)) { 1900 // gcd doesn't divide Delta, no dependence 1901 ++ExactRDIVindependence; 1902 return true; 1903 } 1904 1905 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1906 1907 // since SCEV construction seems to normalize, LM = 0 1908 APInt SrcUM(Bits, 1, true); 1909 bool SrcUMvalid = false; 1910 // SrcUM is perhaps unavailable, let's check 1911 if (const SCEVConstant *UpperBound = 1912 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1913 SrcUM = UpperBound->getAPInt(); 1914 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1915 SrcUMvalid = true; 1916 } 1917 1918 APInt DstUM(Bits, 1, true); 1919 bool DstUMvalid = false; 1920 // UM is perhaps unavailable, let's check 1921 if (const SCEVConstant *UpperBound = 1922 collectConstantUpperBound(DstLoop, Delta->getType())) { 1923 DstUM = UpperBound->getAPInt(); 1924 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1925 DstUMvalid = true; 1926 } 1927 1928 APInt TU(APInt::getSignedMaxValue(Bits)); 1929 APInt TL(APInt::getSignedMinValue(Bits)); 1930 APInt TC = CM.sdiv(G); 1931 APInt TX = X * TC; 1932 APInt TY = Y * TC; 1933 LLVM_DEBUG(dbgs() << "\t TC = " << TC << "\n"); 1934 LLVM_DEBUG(dbgs() << "\t TX = " << TX << "\n"); 1935 LLVM_DEBUG(dbgs() << "\t TY = " << TY << "\n"); 1936 1937 SmallVector<APInt, 2> TLVec, TUVec; 1938 APInt TB = BM.sdiv(G); 1939 if (TB.sgt(0)) { 1940 TLVec.push_back(ceilingOfQuotient(-TX, TB)); 1941 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1942 if (SrcUMvalid) { 1943 TUVec.push_back(floorOfQuotient(SrcUM - TX, TB)); 1944 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1945 } 1946 } else { 1947 TUVec.push_back(floorOfQuotient(-TX, TB)); 1948 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1949 if (SrcUMvalid) { 1950 TLVec.push_back(ceilingOfQuotient(SrcUM - TX, TB)); 1951 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1952 } 1953 } 1954 1955 APInt TA = AM.sdiv(G); 1956 if (TA.sgt(0)) { 1957 TLVec.push_back(ceilingOfQuotient(-TY, TA)); 1958 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1959 if (DstUMvalid) { 1960 TUVec.push_back(floorOfQuotient(DstUM - TY, TA)); 1961 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1962 } 1963 } else { 1964 TUVec.push_back(floorOfQuotient(-TY, TA)); 1965 LLVM_DEBUG(dbgs() << "\t Possible TU = " << TUVec.back() << "\n"); 1966 if (DstUMvalid) { 1967 TLVec.push_back(ceilingOfQuotient(DstUM - TY, TA)); 1968 LLVM_DEBUG(dbgs() << "\t Possible TL = " << TLVec.back() << "\n"); 1969 } 1970 } 1971 1972 if (TLVec.empty() || TUVec.empty()) 1973 return false; 1974 1975 LLVM_DEBUG(dbgs() << "\t TA = " << TA << "\n"); 1976 LLVM_DEBUG(dbgs() << "\t TB = " << TB << "\n"); 1977 1978 TL = APIntOps::smax(TLVec.front(), TLVec.back()); 1979 TU = APIntOps::smin(TUVec.front(), TUVec.back()); 1980 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1981 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1982 1983 if (TL.sgt(TU)) 1984 ++ExactRDIVindependence; 1985 return TL.sgt(TU); 1986 } 1987 1988 1989 // symbolicRDIVtest - 1990 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1991 // introduce a special case of Banerjee's Inequalities (also called the 1992 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1993 // particularly cases with symbolics. Since it's only able to disprove 1994 // dependence (not compute distances or directions), we'll use it as a 1995 // fall back for the other tests. 1996 // 1997 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1998 // where i and j are induction variables and c1 and c2 are loop invariants, 1999 // we can use the symbolic tests to disprove some dependences, serving as a 2000 // backup for the RDIV test. Note that i and j can be the same variable, 2001 // letting this test serve as a backup for the various SIV tests. 2002 // 2003 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2004 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2005 // loop bounds for the i and j loops, respectively. So, ... 2006 // 2007 // c1 + a1*i = c2 + a2*j 2008 // a1*i - a2*j = c2 - c1 2009 // 2010 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2011 // range of the maximum and minimum possible values of a1*i - a2*j. 2012 // Considering the signs of a1 and a2, we have 4 possible cases: 2013 // 2014 // 1) If a1 >= 0 and a2 >= 0, then 2015 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2016 // -a2*N2 <= c2 - c1 <= a1*N1 2017 // 2018 // 2) If a1 >= 0 and a2 <= 0, then 2019 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2020 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2021 // 2022 // 3) If a1 <= 0 and a2 >= 0, then 2023 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2024 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2025 // 2026 // 4) If a1 <= 0 and a2 <= 0, then 2027 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2028 // a1*N1 <= c2 - c1 <= -a2*N2 2029 // 2030 // return true if dependence disproved 2031 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2032 const SCEV *C1, const SCEV *C2, 2033 const Loop *Loop1, 2034 const Loop *Loop2) const { 2035 ++SymbolicRDIVapplications; 2036 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2037 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2038 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2039 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2040 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2041 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2042 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2043 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2044 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2045 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2046 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2047 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2048 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2049 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2050 if (SE->isKnownNonNegative(A1)) { 2051 if (SE->isKnownNonNegative(A2)) { 2052 // A1 >= 0 && A2 >= 0 2053 if (N1) { 2054 // make sure that c2 - c1 <= a1*N1 2055 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2056 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2057 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2058 ++SymbolicRDIVindependence; 2059 return true; 2060 } 2061 } 2062 if (N2) { 2063 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2064 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2065 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2066 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2067 ++SymbolicRDIVindependence; 2068 return true; 2069 } 2070 } 2071 } 2072 else if (SE->isKnownNonPositive(A2)) { 2073 // a1 >= 0 && a2 <= 0 2074 if (N1 && N2) { 2075 // make sure that c2 - c1 <= a1*N1 - a2*N2 2076 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2077 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2078 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2079 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2080 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2081 ++SymbolicRDIVindependence; 2082 return true; 2083 } 2084 } 2085 // make sure that 0 <= c2 - c1 2086 if (SE->isKnownNegative(C2_C1)) { 2087 ++SymbolicRDIVindependence; 2088 return true; 2089 } 2090 } 2091 } 2092 else if (SE->isKnownNonPositive(A1)) { 2093 if (SE->isKnownNonNegative(A2)) { 2094 // a1 <= 0 && a2 >= 0 2095 if (N1 && N2) { 2096 // make sure that a1*N1 - a2*N2 <= c2 - c1 2097 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2098 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2099 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2100 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2101 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2102 ++SymbolicRDIVindependence; 2103 return true; 2104 } 2105 } 2106 // make sure that c2 - c1 <= 0 2107 if (SE->isKnownPositive(C2_C1)) { 2108 ++SymbolicRDIVindependence; 2109 return true; 2110 } 2111 } 2112 else if (SE->isKnownNonPositive(A2)) { 2113 // a1 <= 0 && a2 <= 0 2114 if (N1) { 2115 // make sure that a1*N1 <= c2 - c1 2116 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2117 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2118 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2119 ++SymbolicRDIVindependence; 2120 return true; 2121 } 2122 } 2123 if (N2) { 2124 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2125 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2126 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2127 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2128 ++SymbolicRDIVindependence; 2129 return true; 2130 } 2131 } 2132 } 2133 } 2134 return false; 2135 } 2136 2137 2138 // testSIV - 2139 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2140 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2141 // a2 are constant, we attack it with an SIV test. While they can all be 2142 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2143 // they apply; they're cheaper and sometimes more precise. 2144 // 2145 // Return true if dependence disproved. 2146 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2147 FullDependence &Result, Constraint &NewConstraint, 2148 const SCEV *&SplitIter) const { 2149 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2150 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2151 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2152 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2153 if (SrcAddRec && DstAddRec) { 2154 const SCEV *SrcConst = SrcAddRec->getStart(); 2155 const SCEV *DstConst = DstAddRec->getStart(); 2156 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2157 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2158 const Loop *CurLoop = SrcAddRec->getLoop(); 2159 assert(CurLoop == DstAddRec->getLoop() && 2160 "both loops in SIV should be same"); 2161 Level = mapSrcLoop(CurLoop); 2162 bool disproven; 2163 if (SrcCoeff == DstCoeff) 2164 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2165 Level, Result, NewConstraint); 2166 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2167 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2168 Level, Result, NewConstraint, SplitIter); 2169 else 2170 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2171 Level, Result, NewConstraint); 2172 return disproven || 2173 gcdMIVtest(Src, Dst, Result) || 2174 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2175 } 2176 if (SrcAddRec) { 2177 const SCEV *SrcConst = SrcAddRec->getStart(); 2178 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2179 const SCEV *DstConst = Dst; 2180 const Loop *CurLoop = SrcAddRec->getLoop(); 2181 Level = mapSrcLoop(CurLoop); 2182 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2183 Level, Result, NewConstraint) || 2184 gcdMIVtest(Src, Dst, Result); 2185 } 2186 if (DstAddRec) { 2187 const SCEV *DstConst = DstAddRec->getStart(); 2188 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2189 const SCEV *SrcConst = Src; 2190 const Loop *CurLoop = DstAddRec->getLoop(); 2191 Level = mapDstLoop(CurLoop); 2192 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2193 CurLoop, Level, Result, NewConstraint) || 2194 gcdMIVtest(Src, Dst, Result); 2195 } 2196 llvm_unreachable("SIV test expected at least one AddRec"); 2197 return false; 2198 } 2199 2200 2201 // testRDIV - 2202 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2203 // where i and j are induction variables, c1 and c2 are loop invariant, 2204 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2205 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2206 // It doesn't make sense to talk about distance or direction in this case, 2207 // so there's no point in making special versions of the Strong SIV test or 2208 // the Weak-crossing SIV test. 2209 // 2210 // With minor algebra, this test can also be used for things like 2211 // [c1 + a1*i + a2*j][c2]. 2212 // 2213 // Return true if dependence disproved. 2214 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2215 FullDependence &Result) const { 2216 // we have 3 possible situations here: 2217 // 1) [a*i + b] and [c*j + d] 2218 // 2) [a*i + c*j + b] and [d] 2219 // 3) [b] and [a*i + c*j + d] 2220 // We need to find what we've got and get organized 2221 2222 const SCEV *SrcConst, *DstConst; 2223 const SCEV *SrcCoeff, *DstCoeff; 2224 const Loop *SrcLoop, *DstLoop; 2225 2226 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2227 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2228 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2229 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2230 if (SrcAddRec && DstAddRec) { 2231 SrcConst = SrcAddRec->getStart(); 2232 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2233 SrcLoop = SrcAddRec->getLoop(); 2234 DstConst = DstAddRec->getStart(); 2235 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2236 DstLoop = DstAddRec->getLoop(); 2237 } 2238 else if (SrcAddRec) { 2239 if (const SCEVAddRecExpr *tmpAddRec = 2240 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2241 SrcConst = tmpAddRec->getStart(); 2242 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2243 SrcLoop = tmpAddRec->getLoop(); 2244 DstConst = Dst; 2245 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2246 DstLoop = SrcAddRec->getLoop(); 2247 } 2248 else 2249 llvm_unreachable("RDIV reached by surprising SCEVs"); 2250 } 2251 else if (DstAddRec) { 2252 if (const SCEVAddRecExpr *tmpAddRec = 2253 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2254 DstConst = tmpAddRec->getStart(); 2255 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2256 DstLoop = tmpAddRec->getLoop(); 2257 SrcConst = Src; 2258 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2259 SrcLoop = DstAddRec->getLoop(); 2260 } 2261 else 2262 llvm_unreachable("RDIV reached by surprising SCEVs"); 2263 } 2264 else 2265 llvm_unreachable("RDIV expected at least one AddRec"); 2266 return exactRDIVtest(SrcCoeff, DstCoeff, 2267 SrcConst, DstConst, 2268 SrcLoop, DstLoop, 2269 Result) || 2270 gcdMIVtest(Src, Dst, Result) || 2271 symbolicRDIVtest(SrcCoeff, DstCoeff, 2272 SrcConst, DstConst, 2273 SrcLoop, DstLoop); 2274 } 2275 2276 2277 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2278 // Return true if dependence disproved. 2279 // Can sometimes refine direction vectors. 2280 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2281 const SmallBitVector &Loops, 2282 FullDependence &Result) const { 2283 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2284 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2285 Result.Consistent = false; 2286 return gcdMIVtest(Src, Dst, Result) || 2287 banerjeeMIVtest(Src, Dst, Loops, Result); 2288 } 2289 2290 2291 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2292 // in this case 10. If there is no constant part, returns NULL. 2293 static 2294 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2295 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2296 return Constant; 2297 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2298 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2299 return Constant; 2300 return nullptr; 2301 } 2302 2303 2304 //===----------------------------------------------------------------------===// 2305 // gcdMIVtest - 2306 // Tests an MIV subscript pair for dependence. 2307 // Returns true if any possible dependence is disproved. 2308 // Marks the result as inconsistent. 2309 // Can sometimes disprove the equal direction for 1 or more loops, 2310 // as discussed in Michael Wolfe's book, 2311 // High Performance Compilers for Parallel Computing, page 235. 2312 // 2313 // We spend some effort (code!) to handle cases like 2314 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2315 // but M and N are just loop-invariant variables. 2316 // This should help us handle linearized subscripts; 2317 // also makes this test a useful backup to the various SIV tests. 2318 // 2319 // It occurs to me that the presence of loop-invariant variables 2320 // changes the nature of the test from "greatest common divisor" 2321 // to "a common divisor". 2322 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2323 FullDependence &Result) const { 2324 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2325 ++GCDapplications; 2326 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2327 APInt RunningGCD = APInt::getNullValue(BitWidth); 2328 2329 // Examine Src coefficients. 2330 // Compute running GCD and record source constant. 2331 // Because we're looking for the constant at the end of the chain, 2332 // we can't quit the loop just because the GCD == 1. 2333 const SCEV *Coefficients = Src; 2334 while (const SCEVAddRecExpr *AddRec = 2335 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2336 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2337 // If the coefficient is the product of a constant and other stuff, 2338 // we can use the constant in the GCD computation. 2339 const auto *Constant = getConstantPart(Coeff); 2340 if (!Constant) 2341 return false; 2342 APInt ConstCoeff = Constant->getAPInt(); 2343 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2344 Coefficients = AddRec->getStart(); 2345 } 2346 const SCEV *SrcConst = Coefficients; 2347 2348 // Examine Dst coefficients. 2349 // Compute running GCD and record destination constant. 2350 // Because we're looking for the constant at the end of the chain, 2351 // we can't quit the loop just because the GCD == 1. 2352 Coefficients = Dst; 2353 while (const SCEVAddRecExpr *AddRec = 2354 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2355 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2356 // If the coefficient is the product of a constant and other stuff, 2357 // we can use the constant in the GCD computation. 2358 const auto *Constant = getConstantPart(Coeff); 2359 if (!Constant) 2360 return false; 2361 APInt ConstCoeff = Constant->getAPInt(); 2362 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2363 Coefficients = AddRec->getStart(); 2364 } 2365 const SCEV *DstConst = Coefficients; 2366 2367 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2368 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2369 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2370 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2371 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2372 // If Delta is a sum of products, we may be able to make further progress. 2373 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2374 const SCEV *Operand = Sum->getOperand(Op); 2375 if (isa<SCEVConstant>(Operand)) { 2376 assert(!Constant && "Surprised to find multiple constants"); 2377 Constant = cast<SCEVConstant>(Operand); 2378 } 2379 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2380 // Search for constant operand to participate in GCD; 2381 // If none found; return false. 2382 const SCEVConstant *ConstOp = getConstantPart(Product); 2383 if (!ConstOp) 2384 return false; 2385 APInt ConstOpValue = ConstOp->getAPInt(); 2386 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2387 ConstOpValue.abs()); 2388 } 2389 else 2390 return false; 2391 } 2392 } 2393 if (!Constant) 2394 return false; 2395 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2396 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2397 if (ConstDelta == 0) 2398 return false; 2399 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2400 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2401 APInt Remainder = ConstDelta.srem(RunningGCD); 2402 if (Remainder != 0) { 2403 ++GCDindependence; 2404 return true; 2405 } 2406 2407 // Try to disprove equal directions. 2408 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2409 // the code above can't disprove the dependence because the GCD = 1. 2410 // So we consider what happen if i = i' and what happens if j = j'. 2411 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2412 // which is infeasible, so we can disallow the = direction for the i level. 2413 // Setting j = j' doesn't help matters, so we end up with a direction vector 2414 // of [<>, *] 2415 // 2416 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2417 // we need to remember that the constant part is 5 and the RunningGCD should 2418 // be initialized to ExtraGCD = 30. 2419 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2420 2421 bool Improved = false; 2422 Coefficients = Src; 2423 while (const SCEVAddRecExpr *AddRec = 2424 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2425 Coefficients = AddRec->getStart(); 2426 const Loop *CurLoop = AddRec->getLoop(); 2427 RunningGCD = ExtraGCD; 2428 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2429 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2430 const SCEV *Inner = Src; 2431 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2432 AddRec = cast<SCEVAddRecExpr>(Inner); 2433 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2434 if (CurLoop == AddRec->getLoop()) 2435 ; // SrcCoeff == Coeff 2436 else { 2437 // If the coefficient is the product of a constant and other stuff, 2438 // we can use the constant in the GCD computation. 2439 Constant = getConstantPart(Coeff); 2440 if (!Constant) 2441 return false; 2442 APInt ConstCoeff = Constant->getAPInt(); 2443 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2444 } 2445 Inner = AddRec->getStart(); 2446 } 2447 Inner = Dst; 2448 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2449 AddRec = cast<SCEVAddRecExpr>(Inner); 2450 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2451 if (CurLoop == AddRec->getLoop()) 2452 DstCoeff = Coeff; 2453 else { 2454 // If the coefficient is the product of a constant and other stuff, 2455 // we can use the constant in the GCD computation. 2456 Constant = getConstantPart(Coeff); 2457 if (!Constant) 2458 return false; 2459 APInt ConstCoeff = Constant->getAPInt(); 2460 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2461 } 2462 Inner = AddRec->getStart(); 2463 } 2464 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2465 // If the coefficient is the product of a constant and other stuff, 2466 // we can use the constant in the GCD computation. 2467 Constant = getConstantPart(Delta); 2468 if (!Constant) 2469 // The difference of the two coefficients might not be a product 2470 // or constant, in which case we give up on this direction. 2471 continue; 2472 APInt ConstCoeff = Constant->getAPInt(); 2473 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2474 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2475 if (RunningGCD != 0) { 2476 Remainder = ConstDelta.srem(RunningGCD); 2477 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2478 if (Remainder != 0) { 2479 unsigned Level = mapSrcLoop(CurLoop); 2480 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2481 Improved = true; 2482 } 2483 } 2484 } 2485 if (Improved) 2486 ++GCDsuccesses; 2487 LLVM_DEBUG(dbgs() << "all done\n"); 2488 return false; 2489 } 2490 2491 2492 //===----------------------------------------------------------------------===// 2493 // banerjeeMIVtest - 2494 // Use Banerjee's Inequalities to test an MIV subscript pair. 2495 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2496 // Generally follows the discussion in Section 2.5.2 of 2497 // 2498 // Optimizing Supercompilers for Supercomputers 2499 // Michael Wolfe 2500 // 2501 // The inequalities given on page 25 are simplified in that loops are 2502 // normalized so that the lower bound is always 0 and the stride is always 1. 2503 // For example, Wolfe gives 2504 // 2505 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2506 // 2507 // where A_k is the coefficient of the kth index in the source subscript, 2508 // B_k is the coefficient of the kth index in the destination subscript, 2509 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2510 // index, and N_k is the stride of the kth index. Since all loops are normalized 2511 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2512 // equation to 2513 // 2514 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2515 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2516 // 2517 // Similar simplifications are possible for the other equations. 2518 // 2519 // When we can't determine the number of iterations for a loop, 2520 // we use NULL as an indicator for the worst case, infinity. 2521 // When computing the upper bound, NULL denotes +inf; 2522 // for the lower bound, NULL denotes -inf. 2523 // 2524 // Return true if dependence disproved. 2525 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2526 const SmallBitVector &Loops, 2527 FullDependence &Result) const { 2528 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2529 ++BanerjeeApplications; 2530 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2531 const SCEV *A0; 2532 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2533 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2534 const SCEV *B0; 2535 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2536 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2537 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2538 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2539 2540 // Compute bounds for all the * directions. 2541 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2542 for (unsigned K = 1; K <= MaxLevels; ++K) { 2543 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2544 Bound[K].Direction = Dependence::DVEntry::ALL; 2545 Bound[K].DirSet = Dependence::DVEntry::NONE; 2546 findBoundsALL(A, B, Bound, K); 2547 #ifndef NDEBUG 2548 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2549 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2550 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2551 else 2552 LLVM_DEBUG(dbgs() << "-inf\t"); 2553 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2554 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2555 else 2556 LLVM_DEBUG(dbgs() << "+inf\n"); 2557 #endif 2558 } 2559 2560 // Test the *, *, *, ... case. 2561 bool Disproved = false; 2562 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2563 // Explore the direction vector hierarchy. 2564 unsigned DepthExpanded = 0; 2565 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2566 Loops, DepthExpanded, Delta); 2567 if (NewDeps > 0) { 2568 bool Improved = false; 2569 for (unsigned K = 1; K <= CommonLevels; ++K) { 2570 if (Loops[K]) { 2571 unsigned Old = Result.DV[K - 1].Direction; 2572 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2573 Improved |= Old != Result.DV[K - 1].Direction; 2574 if (!Result.DV[K - 1].Direction) { 2575 Improved = false; 2576 Disproved = true; 2577 break; 2578 } 2579 } 2580 } 2581 if (Improved) 2582 ++BanerjeeSuccesses; 2583 } 2584 else { 2585 ++BanerjeeIndependence; 2586 Disproved = true; 2587 } 2588 } 2589 else { 2590 ++BanerjeeIndependence; 2591 Disproved = true; 2592 } 2593 delete [] Bound; 2594 delete [] A; 2595 delete [] B; 2596 return Disproved; 2597 } 2598 2599 2600 // Hierarchically expands the direction vector 2601 // search space, combining the directions of discovered dependences 2602 // in the DirSet field of Bound. Returns the number of distinct 2603 // dependences discovered. If the dependence is disproved, 2604 // it will return 0. 2605 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2606 CoefficientInfo *B, BoundInfo *Bound, 2607 const SmallBitVector &Loops, 2608 unsigned &DepthExpanded, 2609 const SCEV *Delta) const { 2610 // This algorithm has worst case complexity of O(3^n), where 'n' is the number 2611 // of common loop levels. To avoid excessive compile-time, pessimize all the 2612 // results and immediately return when the number of common levels is beyond 2613 // the given threshold. 2614 if (CommonLevels > MIVMaxLevelThreshold) { 2615 LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV " 2616 "direction exploration is terminated.\n"); 2617 for (unsigned K = 1; K <= CommonLevels; ++K) 2618 if (Loops[K]) 2619 Bound[K].DirSet = Dependence::DVEntry::ALL; 2620 return 1; 2621 } 2622 2623 if (Level > CommonLevels) { 2624 // record result 2625 LLVM_DEBUG(dbgs() << "\t["); 2626 for (unsigned K = 1; K <= CommonLevels; ++K) { 2627 if (Loops[K]) { 2628 Bound[K].DirSet |= Bound[K].Direction; 2629 #ifndef NDEBUG 2630 switch (Bound[K].Direction) { 2631 case Dependence::DVEntry::LT: 2632 LLVM_DEBUG(dbgs() << " <"); 2633 break; 2634 case Dependence::DVEntry::EQ: 2635 LLVM_DEBUG(dbgs() << " ="); 2636 break; 2637 case Dependence::DVEntry::GT: 2638 LLVM_DEBUG(dbgs() << " >"); 2639 break; 2640 case Dependence::DVEntry::ALL: 2641 LLVM_DEBUG(dbgs() << " *"); 2642 break; 2643 default: 2644 llvm_unreachable("unexpected Bound[K].Direction"); 2645 } 2646 #endif 2647 } 2648 } 2649 LLVM_DEBUG(dbgs() << " ]\n"); 2650 return 1; 2651 } 2652 if (Loops[Level]) { 2653 if (Level > DepthExpanded) { 2654 DepthExpanded = Level; 2655 // compute bounds for <, =, > at current level 2656 findBoundsLT(A, B, Bound, Level); 2657 findBoundsGT(A, B, Bound, Level); 2658 findBoundsEQ(A, B, Bound, Level); 2659 #ifndef NDEBUG 2660 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2661 LLVM_DEBUG(dbgs() << "\t <\t"); 2662 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2663 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2664 << '\t'); 2665 else 2666 LLVM_DEBUG(dbgs() << "-inf\t"); 2667 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2668 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2669 << '\n'); 2670 else 2671 LLVM_DEBUG(dbgs() << "+inf\n"); 2672 LLVM_DEBUG(dbgs() << "\t =\t"); 2673 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2674 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2675 << '\t'); 2676 else 2677 LLVM_DEBUG(dbgs() << "-inf\t"); 2678 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2679 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2680 << '\n'); 2681 else 2682 LLVM_DEBUG(dbgs() << "+inf\n"); 2683 LLVM_DEBUG(dbgs() << "\t >\t"); 2684 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2685 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2686 << '\t'); 2687 else 2688 LLVM_DEBUG(dbgs() << "-inf\t"); 2689 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2690 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2691 << '\n'); 2692 else 2693 LLVM_DEBUG(dbgs() << "+inf\n"); 2694 #endif 2695 } 2696 2697 unsigned NewDeps = 0; 2698 2699 // test bounds for <, *, *, ... 2700 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2701 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2702 Loops, DepthExpanded, Delta); 2703 2704 // Test bounds for =, *, *, ... 2705 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2706 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2707 Loops, DepthExpanded, Delta); 2708 2709 // test bounds for >, *, *, ... 2710 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2711 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2712 Loops, DepthExpanded, Delta); 2713 2714 Bound[Level].Direction = Dependence::DVEntry::ALL; 2715 return NewDeps; 2716 } 2717 else 2718 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2719 } 2720 2721 2722 // Returns true iff the current bounds are plausible. 2723 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2724 BoundInfo *Bound, const SCEV *Delta) const { 2725 Bound[Level].Direction = DirKind; 2726 if (const SCEV *LowerBound = getLowerBound(Bound)) 2727 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2728 return false; 2729 if (const SCEV *UpperBound = getUpperBound(Bound)) 2730 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2731 return false; 2732 return true; 2733 } 2734 2735 2736 // Computes the upper and lower bounds for level K 2737 // using the * direction. Records them in Bound. 2738 // Wolfe gives the equations 2739 // 2740 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2741 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2742 // 2743 // Since we normalize loops, we can simplify these equations to 2744 // 2745 // LB^*_k = (A^-_k - B^+_k)U_k 2746 // UB^*_k = (A^+_k - B^-_k)U_k 2747 // 2748 // We must be careful to handle the case where the upper bound is unknown. 2749 // Note that the lower bound is always <= 0 2750 // and the upper bound is always >= 0. 2751 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2752 BoundInfo *Bound, unsigned K) const { 2753 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2754 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2755 if (Bound[K].Iterations) { 2756 Bound[K].Lower[Dependence::DVEntry::ALL] = 2757 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2758 Bound[K].Iterations); 2759 Bound[K].Upper[Dependence::DVEntry::ALL] = 2760 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2761 Bound[K].Iterations); 2762 } 2763 else { 2764 // If the difference is 0, we won't need to know the number of iterations. 2765 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2766 Bound[K].Lower[Dependence::DVEntry::ALL] = 2767 SE->getZero(A[K].Coeff->getType()); 2768 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2769 Bound[K].Upper[Dependence::DVEntry::ALL] = 2770 SE->getZero(A[K].Coeff->getType()); 2771 } 2772 } 2773 2774 2775 // Computes the upper and lower bounds for level K 2776 // using the = direction. Records them in Bound. 2777 // Wolfe gives the equations 2778 // 2779 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2780 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2781 // 2782 // Since we normalize loops, we can simplify these equations to 2783 // 2784 // LB^=_k = (A_k - B_k)^- U_k 2785 // UB^=_k = (A_k - B_k)^+ U_k 2786 // 2787 // We must be careful to handle the case where the upper bound is unknown. 2788 // Note that the lower bound is always <= 0 2789 // and the upper bound is always >= 0. 2790 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2791 BoundInfo *Bound, unsigned K) const { 2792 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2793 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2794 if (Bound[K].Iterations) { 2795 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2796 const SCEV *NegativePart = getNegativePart(Delta); 2797 Bound[K].Lower[Dependence::DVEntry::EQ] = 2798 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2799 const SCEV *PositivePart = getPositivePart(Delta); 2800 Bound[K].Upper[Dependence::DVEntry::EQ] = 2801 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2802 } 2803 else { 2804 // If the positive/negative part of the difference is 0, 2805 // we won't need to know the number of iterations. 2806 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2807 const SCEV *NegativePart = getNegativePart(Delta); 2808 if (NegativePart->isZero()) 2809 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2810 const SCEV *PositivePart = getPositivePart(Delta); 2811 if (PositivePart->isZero()) 2812 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2813 } 2814 } 2815 2816 2817 // Computes the upper and lower bounds for level K 2818 // using the < direction. Records them in Bound. 2819 // Wolfe gives the equations 2820 // 2821 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2822 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2823 // 2824 // Since we normalize loops, we can simplify these equations to 2825 // 2826 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2827 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2828 // 2829 // We must be careful to handle the case where the upper bound is unknown. 2830 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2831 BoundInfo *Bound, unsigned K) const { 2832 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2833 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2834 if (Bound[K].Iterations) { 2835 const SCEV *Iter_1 = SE->getMinusSCEV( 2836 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2837 const SCEV *NegPart = 2838 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2839 Bound[K].Lower[Dependence::DVEntry::LT] = 2840 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2841 const SCEV *PosPart = 2842 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2843 Bound[K].Upper[Dependence::DVEntry::LT] = 2844 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2845 } 2846 else { 2847 // If the positive/negative part of the difference is 0, 2848 // we won't need to know the number of iterations. 2849 const SCEV *NegPart = 2850 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2851 if (NegPart->isZero()) 2852 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2853 const SCEV *PosPart = 2854 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2855 if (PosPart->isZero()) 2856 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2857 } 2858 } 2859 2860 2861 // Computes the upper and lower bounds for level K 2862 // using the > direction. Records them in Bound. 2863 // Wolfe gives the equations 2864 // 2865 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2866 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2867 // 2868 // Since we normalize loops, we can simplify these equations to 2869 // 2870 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2871 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2872 // 2873 // We must be careful to handle the case where the upper bound is unknown. 2874 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2875 BoundInfo *Bound, unsigned K) const { 2876 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2877 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2878 if (Bound[K].Iterations) { 2879 const SCEV *Iter_1 = SE->getMinusSCEV( 2880 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2881 const SCEV *NegPart = 2882 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2883 Bound[K].Lower[Dependence::DVEntry::GT] = 2884 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2885 const SCEV *PosPart = 2886 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2887 Bound[K].Upper[Dependence::DVEntry::GT] = 2888 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2889 } 2890 else { 2891 // If the positive/negative part of the difference is 0, 2892 // we won't need to know the number of iterations. 2893 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2894 if (NegPart->isZero()) 2895 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2896 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2897 if (PosPart->isZero()) 2898 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2899 } 2900 } 2901 2902 2903 // X^+ = max(X, 0) 2904 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2905 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2906 } 2907 2908 2909 // X^- = min(X, 0) 2910 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2911 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2912 } 2913 2914 2915 // Walks through the subscript, 2916 // collecting each coefficient, the associated loop bounds, 2917 // and recording its positive and negative parts for later use. 2918 DependenceInfo::CoefficientInfo * 2919 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2920 const SCEV *&Constant) const { 2921 const SCEV *Zero = SE->getZero(Subscript->getType()); 2922 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2923 for (unsigned K = 1; K <= MaxLevels; ++K) { 2924 CI[K].Coeff = Zero; 2925 CI[K].PosPart = Zero; 2926 CI[K].NegPart = Zero; 2927 CI[K].Iterations = nullptr; 2928 } 2929 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2930 const Loop *L = AddRec->getLoop(); 2931 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2932 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2933 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2934 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2935 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2936 Subscript = AddRec->getStart(); 2937 } 2938 Constant = Subscript; 2939 #ifndef NDEBUG 2940 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2941 for (unsigned K = 1; K <= MaxLevels; ++K) { 2942 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2943 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2944 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2945 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2946 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2947 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2948 if (CI[K].Iterations) 2949 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2950 else 2951 LLVM_DEBUG(dbgs() << "+inf"); 2952 LLVM_DEBUG(dbgs() << '\n'); 2953 } 2954 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2955 #endif 2956 return CI; 2957 } 2958 2959 2960 // Looks through all the bounds info and 2961 // computes the lower bound given the current direction settings 2962 // at each level. If the lower bound for any level is -inf, 2963 // the result is -inf. 2964 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2965 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2966 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2967 if (Bound[K].Lower[Bound[K].Direction]) 2968 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2969 else 2970 Sum = nullptr; 2971 } 2972 return Sum; 2973 } 2974 2975 2976 // Looks through all the bounds info and 2977 // computes the upper bound given the current direction settings 2978 // at each level. If the upper bound at any level is +inf, 2979 // the result is +inf. 2980 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2981 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2982 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2983 if (Bound[K].Upper[Bound[K].Direction]) 2984 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2985 else 2986 Sum = nullptr; 2987 } 2988 return Sum; 2989 } 2990 2991 2992 //===----------------------------------------------------------------------===// 2993 // Constraint manipulation for Delta test. 2994 2995 // Given a linear SCEV, 2996 // return the coefficient (the step) 2997 // corresponding to the specified loop. 2998 // If there isn't one, return 0. 2999 // For example, given a*i + b*j + c*k, finding the coefficient 3000 // corresponding to the j loop would yield b. 3001 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3002 const Loop *TargetLoop) const { 3003 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3004 if (!AddRec) 3005 return SE->getZero(Expr->getType()); 3006 if (AddRec->getLoop() == TargetLoop) 3007 return AddRec->getStepRecurrence(*SE); 3008 return findCoefficient(AddRec->getStart(), TargetLoop); 3009 } 3010 3011 3012 // Given a linear SCEV, 3013 // return the SCEV given by zeroing out the coefficient 3014 // corresponding to the specified loop. 3015 // For example, given a*i + b*j + c*k, zeroing the coefficient 3016 // corresponding to the j loop would yield a*i + c*k. 3017 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3018 const Loop *TargetLoop) const { 3019 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3020 if (!AddRec) 3021 return Expr; // ignore 3022 if (AddRec->getLoop() == TargetLoop) 3023 return AddRec->getStart(); 3024 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3025 AddRec->getStepRecurrence(*SE), 3026 AddRec->getLoop(), 3027 AddRec->getNoWrapFlags()); 3028 } 3029 3030 3031 // Given a linear SCEV Expr, 3032 // return the SCEV given by adding some Value to the 3033 // coefficient corresponding to the specified TargetLoop. 3034 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3035 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3036 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3037 const Loop *TargetLoop, 3038 const SCEV *Value) const { 3039 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3040 if (!AddRec) // create a new addRec 3041 return SE->getAddRecExpr(Expr, 3042 Value, 3043 TargetLoop, 3044 SCEV::FlagAnyWrap); // Worst case, with no info. 3045 if (AddRec->getLoop() == TargetLoop) { 3046 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3047 if (Sum->isZero()) 3048 return AddRec->getStart(); 3049 return SE->getAddRecExpr(AddRec->getStart(), 3050 Sum, 3051 AddRec->getLoop(), 3052 AddRec->getNoWrapFlags()); 3053 } 3054 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3055 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3056 return SE->getAddRecExpr( 3057 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3058 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3059 AddRec->getNoWrapFlags()); 3060 } 3061 3062 3063 // Review the constraints, looking for opportunities 3064 // to simplify a subscript pair (Src and Dst). 3065 // Return true if some simplification occurs. 3066 // If the simplification isn't exact (that is, if it is conservative 3067 // in terms of dependence), set consistent to false. 3068 // Corresponds to Figure 5 from the paper 3069 // 3070 // Practical Dependence Testing 3071 // Goff, Kennedy, Tseng 3072 // PLDI 1991 3073 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3074 SmallBitVector &Loops, 3075 SmallVectorImpl<Constraint> &Constraints, 3076 bool &Consistent) { 3077 bool Result = false; 3078 for (unsigned LI : Loops.set_bits()) { 3079 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3080 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3081 if (Constraints[LI].isDistance()) 3082 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3083 else if (Constraints[LI].isLine()) 3084 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3085 else if (Constraints[LI].isPoint()) 3086 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3087 } 3088 return Result; 3089 } 3090 3091 3092 // Attempt to propagate a distance 3093 // constraint into a subscript pair (Src and Dst). 3094 // Return true if some simplification occurs. 3095 // If the simplification isn't exact (that is, if it is conservative 3096 // in terms of dependence), set consistent to false. 3097 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3098 Constraint &CurConstraint, 3099 bool &Consistent) { 3100 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3101 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3102 const SCEV *A_K = findCoefficient(Src, CurLoop); 3103 if (A_K->isZero()) 3104 return false; 3105 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3106 Src = SE->getMinusSCEV(Src, DA_K); 3107 Src = zeroCoefficient(Src, CurLoop); 3108 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3109 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3110 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3111 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3112 if (!findCoefficient(Dst, CurLoop)->isZero()) 3113 Consistent = false; 3114 return true; 3115 } 3116 3117 3118 // Attempt to propagate a line 3119 // constraint into a subscript pair (Src and Dst). 3120 // Return true if some simplification occurs. 3121 // If the simplification isn't exact (that is, if it is conservative 3122 // in terms of dependence), set consistent to false. 3123 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3124 Constraint &CurConstraint, 3125 bool &Consistent) { 3126 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3127 const SCEV *A = CurConstraint.getA(); 3128 const SCEV *B = CurConstraint.getB(); 3129 const SCEV *C = CurConstraint.getC(); 3130 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3131 << "\n"); 3132 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3133 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3134 if (A->isZero()) { 3135 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3136 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3137 if (!Bconst || !Cconst) return false; 3138 APInt Beta = Bconst->getAPInt(); 3139 APInt Charlie = Cconst->getAPInt(); 3140 APInt CdivB = Charlie.sdiv(Beta); 3141 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3142 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3143 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3144 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3145 Dst = zeroCoefficient(Dst, CurLoop); 3146 if (!findCoefficient(Src, CurLoop)->isZero()) 3147 Consistent = false; 3148 } 3149 else if (B->isZero()) { 3150 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3151 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3152 if (!Aconst || !Cconst) return false; 3153 APInt Alpha = Aconst->getAPInt(); 3154 APInt Charlie = Cconst->getAPInt(); 3155 APInt CdivA = Charlie.sdiv(Alpha); 3156 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3157 const SCEV *A_K = findCoefficient(Src, CurLoop); 3158 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3159 Src = zeroCoefficient(Src, CurLoop); 3160 if (!findCoefficient(Dst, CurLoop)->isZero()) 3161 Consistent = false; 3162 } 3163 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3164 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3165 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3166 if (!Aconst || !Cconst) return false; 3167 APInt Alpha = Aconst->getAPInt(); 3168 APInt Charlie = Cconst->getAPInt(); 3169 APInt CdivA = Charlie.sdiv(Alpha); 3170 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3171 const SCEV *A_K = findCoefficient(Src, CurLoop); 3172 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3173 Src = zeroCoefficient(Src, CurLoop); 3174 Dst = addToCoefficient(Dst, CurLoop, A_K); 3175 if (!findCoefficient(Dst, CurLoop)->isZero()) 3176 Consistent = false; 3177 } 3178 else { 3179 // paper is incorrect here, or perhaps just misleading 3180 const SCEV *A_K = findCoefficient(Src, CurLoop); 3181 Src = SE->getMulExpr(Src, A); 3182 Dst = SE->getMulExpr(Dst, A); 3183 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3184 Src = zeroCoefficient(Src, CurLoop); 3185 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3186 if (!findCoefficient(Dst, CurLoop)->isZero()) 3187 Consistent = false; 3188 } 3189 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3190 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3191 return true; 3192 } 3193 3194 3195 // Attempt to propagate a point 3196 // constraint into a subscript pair (Src and Dst). 3197 // Return true if some simplification occurs. 3198 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3199 Constraint &CurConstraint) { 3200 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3201 const SCEV *A_K = findCoefficient(Src, CurLoop); 3202 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3203 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3204 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3205 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3206 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3207 Src = zeroCoefficient(Src, CurLoop); 3208 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3209 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3210 Dst = zeroCoefficient(Dst, CurLoop); 3211 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3212 return true; 3213 } 3214 3215 3216 // Update direction vector entry based on the current constraint. 3217 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3218 const Constraint &CurConstraint) const { 3219 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3220 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3221 if (CurConstraint.isAny()) 3222 ; // use defaults 3223 else if (CurConstraint.isDistance()) { 3224 // this one is consistent, the others aren't 3225 Level.Scalar = false; 3226 Level.Distance = CurConstraint.getD(); 3227 unsigned NewDirection = Dependence::DVEntry::NONE; 3228 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3229 NewDirection = Dependence::DVEntry::EQ; 3230 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3231 NewDirection |= Dependence::DVEntry::LT; 3232 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3233 NewDirection |= Dependence::DVEntry::GT; 3234 Level.Direction &= NewDirection; 3235 } 3236 else if (CurConstraint.isLine()) { 3237 Level.Scalar = false; 3238 Level.Distance = nullptr; 3239 // direction should be accurate 3240 } 3241 else if (CurConstraint.isPoint()) { 3242 Level.Scalar = false; 3243 Level.Distance = nullptr; 3244 unsigned NewDirection = Dependence::DVEntry::NONE; 3245 if (!isKnownPredicate(CmpInst::ICMP_NE, 3246 CurConstraint.getY(), 3247 CurConstraint.getX())) 3248 // if X may be = Y 3249 NewDirection |= Dependence::DVEntry::EQ; 3250 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3251 CurConstraint.getY(), 3252 CurConstraint.getX())) 3253 // if Y may be > X 3254 NewDirection |= Dependence::DVEntry::LT; 3255 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3256 CurConstraint.getY(), 3257 CurConstraint.getX())) 3258 // if Y may be < X 3259 NewDirection |= Dependence::DVEntry::GT; 3260 Level.Direction &= NewDirection; 3261 } 3262 else 3263 llvm_unreachable("constraint has unexpected kind"); 3264 } 3265 3266 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3267 /// source and destination array references are recurrences on a nested loop, 3268 /// this function flattens the nested recurrences into separate recurrences 3269 /// for each loop level. 3270 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3271 SmallVectorImpl<Subscript> &Pair) { 3272 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3273 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3274 Value *SrcPtr = getLoadStorePointerOperand(Src); 3275 Value *DstPtr = getLoadStorePointerOperand(Dst); 3276 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3277 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3278 const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); 3279 const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); 3280 const SCEVUnknown *SrcBase = 3281 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3282 const SCEVUnknown *DstBase = 3283 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3284 3285 if (!SrcBase || !DstBase || SrcBase != DstBase) 3286 return false; 3287 3288 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3289 3290 if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn, 3291 SrcSubscripts, DstSubscripts) && 3292 !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn, 3293 SrcSubscripts, DstSubscripts)) 3294 return false; 3295 3296 int Size = SrcSubscripts.size(); 3297 LLVM_DEBUG({ 3298 dbgs() << "\nSrcSubscripts: "; 3299 for (int I = 0; I < Size; I++) 3300 dbgs() << *SrcSubscripts[I]; 3301 dbgs() << "\nDstSubscripts: "; 3302 for (int I = 0; I < Size; I++) 3303 dbgs() << *DstSubscripts[I]; 3304 }); 3305 3306 // The delinearization transforms a single-subscript MIV dependence test into 3307 // a multi-subscript SIV dependence test that is easier to compute. So we 3308 // resize Pair to contain as many pairs of subscripts as the delinearization 3309 // has found, and then initialize the pairs following the delinearization. 3310 Pair.resize(Size); 3311 for (int I = 0; I < Size; ++I) { 3312 Pair[I].Src = SrcSubscripts[I]; 3313 Pair[I].Dst = DstSubscripts[I]; 3314 unifySubscriptType(&Pair[I]); 3315 } 3316 3317 return true; 3318 } 3319 3320 bool DependenceInfo::tryDelinearizeFixedSize( 3321 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3322 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3323 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3324 3325 Value *SrcPtr = getLoadStorePointerOperand(Src); 3326 Value *DstPtr = getLoadStorePointerOperand(Dst); 3327 const SCEVUnknown *SrcBase = 3328 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3329 const SCEVUnknown *DstBase = 3330 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3331 assert(SrcBase && DstBase && SrcBase == DstBase && 3332 "expected src and dst scev unknowns to be equal"); 3333 3334 // Check the simple case where the array dimensions are fixed size. 3335 auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr); 3336 auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr); 3337 if (!SrcGEP || !DstGEP) 3338 return false; 3339 3340 SmallVector<int, 4> SrcSizes, DstSizes; 3341 SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes); 3342 SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes); 3343 3344 // Check that the two size arrays are non-empty and equal in length and 3345 // value. 3346 if (SrcSizes.empty() || SrcSubscripts.size() <= 1 || 3347 SrcSizes.size() != DstSizes.size() || 3348 !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { 3349 SrcSubscripts.clear(); 3350 DstSubscripts.clear(); 3351 return false; 3352 } 3353 3354 Value *SrcBasePtr = SrcGEP->getOperand(0); 3355 Value *DstBasePtr = DstGEP->getOperand(0); 3356 while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr)) 3357 SrcBasePtr = PCast->getOperand(0); 3358 while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr)) 3359 DstBasePtr = PCast->getOperand(0); 3360 3361 // Check that for identical base pointers we do not miss index offsets 3362 // that have been added before this GEP is applied. 3363 if (SrcBasePtr != SrcBase->getValue() || DstBasePtr != DstBase->getValue()) { 3364 SrcSubscripts.clear(); 3365 DstSubscripts.clear(); 3366 return false; 3367 } 3368 3369 assert(SrcSubscripts.size() == DstSubscripts.size() && 3370 SrcSubscripts.size() == SrcSizes.size() + 1 && 3371 "Expected equal number of entries in the list of sizes and " 3372 "subscripts."); 3373 3374 // In general we cannot safely assume that the subscripts recovered from GEPs 3375 // are in the range of values defined for their corresponding array 3376 // dimensions. For example some C language usage/interpretation make it 3377 // impossible to verify this at compile-time. As such we can only delinearize 3378 // iff the subscripts are positive and are less than the range of the 3379 // dimension. 3380 if (!DisableDelinearizationChecks) { 3381 auto AllIndiciesInRange = [&](SmallVector<int, 4> &DimensionSizes, 3382 SmallVectorImpl<const SCEV *> &Subscripts, 3383 Value *Ptr) { 3384 size_t SSize = Subscripts.size(); 3385 for (size_t I = 1; I < SSize; ++I) { 3386 const SCEV *S = Subscripts[I]; 3387 if (!isKnownNonNegative(S, Ptr)) 3388 return false; 3389 if (auto *SType = dyn_cast<IntegerType>(S->getType())) { 3390 const SCEV *Range = SE->getConstant( 3391 ConstantInt::get(SType, DimensionSizes[I - 1], false)); 3392 if (!isKnownLessThan(S, Range)) 3393 return false; 3394 } 3395 } 3396 return true; 3397 }; 3398 3399 if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || 3400 !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { 3401 SrcSubscripts.clear(); 3402 DstSubscripts.clear(); 3403 return false; 3404 } 3405 } 3406 LLVM_DEBUG({ 3407 dbgs() << "Delinearized subscripts of fixed-size array\n" 3408 << "SrcGEP:" << *SrcGEP << "\n" 3409 << "DstGEP:" << *DstGEP << "\n"; 3410 }); 3411 return true; 3412 } 3413 3414 bool DependenceInfo::tryDelinearizeParametricSize( 3415 Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn, 3416 const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts, 3417 SmallVectorImpl<const SCEV *> &DstSubscripts) { 3418 3419 Value *SrcPtr = getLoadStorePointerOperand(Src); 3420 Value *DstPtr = getLoadStorePointerOperand(Dst); 3421 const SCEVUnknown *SrcBase = 3422 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3423 const SCEVUnknown *DstBase = 3424 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3425 assert(SrcBase && DstBase && SrcBase == DstBase && 3426 "expected src and dst scev unknowns to be equal"); 3427 3428 const SCEV *ElementSize = SE->getElementSize(Src); 3429 if (ElementSize != SE->getElementSize(Dst)) 3430 return false; 3431 3432 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3433 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3434 3435 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3436 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3437 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3438 return false; 3439 3440 // First step: collect parametric terms in both array references. 3441 SmallVector<const SCEV *, 4> Terms; 3442 SE->collectParametricTerms(SrcAR, Terms); 3443 SE->collectParametricTerms(DstAR, Terms); 3444 3445 // Second step: find subscript sizes. 3446 SmallVector<const SCEV *, 4> Sizes; 3447 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3448 3449 // Third step: compute the access functions for each subscript. 3450 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3451 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3452 3453 // Fail when there is only a subscript: that's a linearized access function. 3454 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3455 SrcSubscripts.size() != DstSubscripts.size()) 3456 return false; 3457 3458 size_t Size = SrcSubscripts.size(); 3459 3460 // Statically check that the array bounds are in-range. The first subscript we 3461 // don't have a size for and it cannot overflow into another subscript, so is 3462 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3463 // and dst. 3464 // FIXME: It may be better to record these sizes and add them as constraints 3465 // to the dependency checks. 3466 if (!DisableDelinearizationChecks) 3467 for (size_t I = 1; I < Size; ++I) { 3468 if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) 3469 return false; 3470 3471 if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) 3472 return false; 3473 3474 if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) 3475 return false; 3476 3477 if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) 3478 return false; 3479 } 3480 3481 return true; 3482 } 3483 3484 //===----------------------------------------------------------------------===// 3485 3486 #ifndef NDEBUG 3487 // For debugging purposes, dump a small bit vector to dbgs(). 3488 static void dumpSmallBitVector(SmallBitVector &BV) { 3489 dbgs() << "{"; 3490 for (unsigned VI : BV.set_bits()) { 3491 dbgs() << VI; 3492 if (BV.find_next(VI) >= 0) 3493 dbgs() << ' '; 3494 } 3495 dbgs() << "}\n"; 3496 } 3497 #endif 3498 3499 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3500 FunctionAnalysisManager::Invalidator &Inv) { 3501 // Check if the analysis itself has been invalidated. 3502 auto PAC = PA.getChecker<DependenceAnalysis>(); 3503 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3504 return true; 3505 3506 // Check transitive dependencies. 3507 return Inv.invalidate<AAManager>(F, PA) || 3508 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3509 Inv.invalidate<LoopAnalysis>(F, PA); 3510 } 3511 3512 // depends - 3513 // Returns NULL if there is no dependence. 3514 // Otherwise, return a Dependence with as many details as possible. 3515 // Corresponds to Section 3.1 in the paper 3516 // 3517 // Practical Dependence Testing 3518 // Goff, Kennedy, Tseng 3519 // PLDI 1991 3520 // 3521 // Care is required to keep the routine below, getSplitIteration(), 3522 // up to date with respect to this routine. 3523 std::unique_ptr<Dependence> 3524 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3525 bool PossiblyLoopIndependent) { 3526 if (Src == Dst) 3527 PossiblyLoopIndependent = false; 3528 3529 if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) 3530 // if both instructions don't reference memory, there's no dependence 3531 return nullptr; 3532 3533 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3534 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3535 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3536 return std::make_unique<Dependence>(Src, Dst); 3537 } 3538 3539 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3540 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3541 Value *SrcPtr = getLoadStorePointerOperand(Src); 3542 Value *DstPtr = getLoadStorePointerOperand(Dst); 3543 3544 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3545 MemoryLocation::get(Dst), 3546 MemoryLocation::get(Src))) { 3547 case AliasResult::MayAlias: 3548 case AliasResult::PartialAlias: 3549 // cannot analyse objects if we don't understand their aliasing. 3550 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3551 return std::make_unique<Dependence>(Src, Dst); 3552 case AliasResult::NoAlias: 3553 // If the objects noalias, they are distinct, accesses are independent. 3554 LLVM_DEBUG(dbgs() << "no alias\n"); 3555 return nullptr; 3556 case AliasResult::MustAlias: 3557 break; // The underlying objects alias; test accesses for dependence. 3558 } 3559 3560 // establish loop nesting levels 3561 establishNestingLevels(Src, Dst); 3562 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3563 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3564 3565 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3566 ++TotalArrayPairs; 3567 3568 unsigned Pairs = 1; 3569 SmallVector<Subscript, 2> Pair(Pairs); 3570 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3571 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3572 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3573 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3574 if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { 3575 // If two pointers have different bases, trying to analyze indexes won't 3576 // work; we can't compare them to each other. This can happen, for example, 3577 // if one is produced by an LCSSA PHI node. 3578 // 3579 // We check this upfront so we don't crash in cases where getMinusSCEV() 3580 // returns a SCEVCouldNotCompute. 3581 LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); 3582 return std::make_unique<Dependence>(Src, Dst); 3583 } 3584 Pair[0].Src = SrcSCEV; 3585 Pair[0].Dst = DstSCEV; 3586 3587 if (Delinearize) { 3588 if (tryDelinearize(Src, Dst, Pair)) { 3589 LLVM_DEBUG(dbgs() << " delinearized\n"); 3590 Pairs = Pair.size(); 3591 } 3592 } 3593 3594 for (unsigned P = 0; P < Pairs; ++P) { 3595 Pair[P].Loops.resize(MaxLevels + 1); 3596 Pair[P].GroupLoops.resize(MaxLevels + 1); 3597 Pair[P].Group.resize(Pairs); 3598 removeMatchingExtensions(&Pair[P]); 3599 Pair[P].Classification = 3600 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3601 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3602 Pair[P].Loops); 3603 Pair[P].GroupLoops = Pair[P].Loops; 3604 Pair[P].Group.set(P); 3605 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3606 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3607 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3608 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3609 LLVM_DEBUG(dbgs() << "\tloops = "); 3610 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3611 } 3612 3613 SmallBitVector Separable(Pairs); 3614 SmallBitVector Coupled(Pairs); 3615 3616 // Partition subscripts into separable and minimally-coupled groups 3617 // Algorithm in paper is algorithmically better; 3618 // this may be faster in practice. Check someday. 3619 // 3620 // Here's an example of how it works. Consider this code: 3621 // 3622 // for (i = ...) { 3623 // for (j = ...) { 3624 // for (k = ...) { 3625 // for (l = ...) { 3626 // for (m = ...) { 3627 // A[i][j][k][m] = ...; 3628 // ... = A[0][j][l][i + j]; 3629 // } 3630 // } 3631 // } 3632 // } 3633 // } 3634 // 3635 // There are 4 subscripts here: 3636 // 0 [i] and [0] 3637 // 1 [j] and [j] 3638 // 2 [k] and [l] 3639 // 3 [m] and [i + j] 3640 // 3641 // We've already classified each subscript pair as ZIV, SIV, etc., 3642 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3643 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3644 // and set Pair[P].Group = {P}. 3645 // 3646 // Src Dst Classification Loops GroupLoops Group 3647 // 0 [i] [0] SIV {1} {1} {0} 3648 // 1 [j] [j] SIV {2} {2} {1} 3649 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3650 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3651 // 3652 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3653 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3654 // 3655 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3656 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3657 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3658 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3659 // to either Separable or Coupled). 3660 // 3661 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3662 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3663 // so Pair[3].Group = {0, 1, 3} and Done = false. 3664 // 3665 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3666 // Since Done remains true, we add 2 to the set of Separable pairs. 3667 // 3668 // Finally, we consider 3. There's nothing to compare it with, 3669 // so Done remains true and we add it to the Coupled set. 3670 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3671 // 3672 // In the end, we've got 1 separable subscript and 1 coupled group. 3673 for (unsigned SI = 0; SI < Pairs; ++SI) { 3674 if (Pair[SI].Classification == Subscript::NonLinear) { 3675 // ignore these, but collect loops for later 3676 ++NonlinearSubscriptPairs; 3677 collectCommonLoops(Pair[SI].Src, 3678 LI->getLoopFor(Src->getParent()), 3679 Pair[SI].Loops); 3680 collectCommonLoops(Pair[SI].Dst, 3681 LI->getLoopFor(Dst->getParent()), 3682 Pair[SI].Loops); 3683 Result.Consistent = false; 3684 } else if (Pair[SI].Classification == Subscript::ZIV) { 3685 // always separable 3686 Separable.set(SI); 3687 } 3688 else { 3689 // SIV, RDIV, or MIV, so check for coupled group 3690 bool Done = true; 3691 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3692 SmallBitVector Intersection = Pair[SI].GroupLoops; 3693 Intersection &= Pair[SJ].GroupLoops; 3694 if (Intersection.any()) { 3695 // accumulate set of all the loops in group 3696 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3697 // accumulate set of all subscripts in group 3698 Pair[SJ].Group |= Pair[SI].Group; 3699 Done = false; 3700 } 3701 } 3702 if (Done) { 3703 if (Pair[SI].Group.count() == 1) { 3704 Separable.set(SI); 3705 ++SeparableSubscriptPairs; 3706 } 3707 else { 3708 Coupled.set(SI); 3709 ++CoupledSubscriptPairs; 3710 } 3711 } 3712 } 3713 } 3714 3715 LLVM_DEBUG(dbgs() << " Separable = "); 3716 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3717 LLVM_DEBUG(dbgs() << " Coupled = "); 3718 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3719 3720 Constraint NewConstraint; 3721 NewConstraint.setAny(SE); 3722 3723 // test separable subscripts 3724 for (unsigned SI : Separable.set_bits()) { 3725 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3726 switch (Pair[SI].Classification) { 3727 case Subscript::ZIV: 3728 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3729 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3730 return nullptr; 3731 break; 3732 case Subscript::SIV: { 3733 LLVM_DEBUG(dbgs() << ", SIV\n"); 3734 unsigned Level; 3735 const SCEV *SplitIter = nullptr; 3736 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3737 SplitIter)) 3738 return nullptr; 3739 break; 3740 } 3741 case Subscript::RDIV: 3742 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3743 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3744 return nullptr; 3745 break; 3746 case Subscript::MIV: 3747 LLVM_DEBUG(dbgs() << ", MIV\n"); 3748 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3749 return nullptr; 3750 break; 3751 default: 3752 llvm_unreachable("subscript has unexpected classification"); 3753 } 3754 } 3755 3756 if (Coupled.count()) { 3757 // test coupled subscript groups 3758 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3759 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3760 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3761 for (unsigned II = 0; II <= MaxLevels; ++II) 3762 Constraints[II].setAny(SE); 3763 for (unsigned SI : Coupled.set_bits()) { 3764 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3765 SmallBitVector Group(Pair[SI].Group); 3766 SmallBitVector Sivs(Pairs); 3767 SmallBitVector Mivs(Pairs); 3768 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3769 SmallVector<Subscript *, 4> PairsInGroup; 3770 for (unsigned SJ : Group.set_bits()) { 3771 LLVM_DEBUG(dbgs() << SJ << " "); 3772 if (Pair[SJ].Classification == Subscript::SIV) 3773 Sivs.set(SJ); 3774 else 3775 Mivs.set(SJ); 3776 PairsInGroup.push_back(&Pair[SJ]); 3777 } 3778 unifySubscriptType(PairsInGroup); 3779 LLVM_DEBUG(dbgs() << "}\n"); 3780 while (Sivs.any()) { 3781 bool Changed = false; 3782 for (unsigned SJ : Sivs.set_bits()) { 3783 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3784 // SJ is an SIV subscript that's part of the current coupled group 3785 unsigned Level; 3786 const SCEV *SplitIter = nullptr; 3787 LLVM_DEBUG(dbgs() << "SIV\n"); 3788 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3789 SplitIter)) 3790 return nullptr; 3791 ConstrainedLevels.set(Level); 3792 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3793 if (Constraints[Level].isEmpty()) { 3794 ++DeltaIndependence; 3795 return nullptr; 3796 } 3797 Changed = true; 3798 } 3799 Sivs.reset(SJ); 3800 } 3801 if (Changed) { 3802 // propagate, possibly creating new SIVs and ZIVs 3803 LLVM_DEBUG(dbgs() << " propagating\n"); 3804 LLVM_DEBUG(dbgs() << "\tMivs = "); 3805 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3806 for (unsigned SJ : Mivs.set_bits()) { 3807 // SJ is an MIV subscript that's part of the current coupled group 3808 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3809 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3810 Constraints, Result.Consistent)) { 3811 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3812 ++DeltaPropagations; 3813 Pair[SJ].Classification = 3814 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3815 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3816 Pair[SJ].Loops); 3817 switch (Pair[SJ].Classification) { 3818 case Subscript::ZIV: 3819 LLVM_DEBUG(dbgs() << "ZIV\n"); 3820 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3821 return nullptr; 3822 Mivs.reset(SJ); 3823 break; 3824 case Subscript::SIV: 3825 Sivs.set(SJ); 3826 Mivs.reset(SJ); 3827 break; 3828 case Subscript::RDIV: 3829 case Subscript::MIV: 3830 break; 3831 default: 3832 llvm_unreachable("bad subscript classification"); 3833 } 3834 } 3835 } 3836 } 3837 } 3838 3839 // test & propagate remaining RDIVs 3840 for (unsigned SJ : Mivs.set_bits()) { 3841 if (Pair[SJ].Classification == Subscript::RDIV) { 3842 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3843 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3844 return nullptr; 3845 // I don't yet understand how to propagate RDIV results 3846 Mivs.reset(SJ); 3847 } 3848 } 3849 3850 // test remaining MIVs 3851 // This code is temporary. 3852 // Better to somehow test all remaining subscripts simultaneously. 3853 for (unsigned SJ : Mivs.set_bits()) { 3854 if (Pair[SJ].Classification == Subscript::MIV) { 3855 LLVM_DEBUG(dbgs() << "MIV test\n"); 3856 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3857 return nullptr; 3858 } 3859 else 3860 llvm_unreachable("expected only MIV subscripts at this point"); 3861 } 3862 3863 // update Result.DV from constraint vector 3864 LLVM_DEBUG(dbgs() << " updating\n"); 3865 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3866 if (SJ > CommonLevels) 3867 break; 3868 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3869 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3870 return nullptr; 3871 } 3872 } 3873 } 3874 3875 // Make sure the Scalar flags are set correctly. 3876 SmallBitVector CompleteLoops(MaxLevels + 1); 3877 for (unsigned SI = 0; SI < Pairs; ++SI) 3878 CompleteLoops |= Pair[SI].Loops; 3879 for (unsigned II = 1; II <= CommonLevels; ++II) 3880 if (CompleteLoops[II]) 3881 Result.DV[II - 1].Scalar = false; 3882 3883 if (PossiblyLoopIndependent) { 3884 // Make sure the LoopIndependent flag is set correctly. 3885 // All directions must include equal, otherwise no 3886 // loop-independent dependence is possible. 3887 for (unsigned II = 1; II <= CommonLevels; ++II) { 3888 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3889 Result.LoopIndependent = false; 3890 break; 3891 } 3892 } 3893 } 3894 else { 3895 // On the other hand, if all directions are equal and there's no 3896 // loop-independent dependence possible, then no dependence exists. 3897 bool AllEqual = true; 3898 for (unsigned II = 1; II <= CommonLevels; ++II) { 3899 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3900 AllEqual = false; 3901 break; 3902 } 3903 } 3904 if (AllEqual) 3905 return nullptr; 3906 } 3907 3908 return std::make_unique<FullDependence>(std::move(Result)); 3909 } 3910 3911 //===----------------------------------------------------------------------===// 3912 // getSplitIteration - 3913 // Rather than spend rarely-used space recording the splitting iteration 3914 // during the Weak-Crossing SIV test, we re-compute it on demand. 3915 // The re-computation is basically a repeat of the entire dependence test, 3916 // though simplified since we know that the dependence exists. 3917 // It's tedious, since we must go through all propagations, etc. 3918 // 3919 // Care is required to keep this code up to date with respect to the routine 3920 // above, depends(). 3921 // 3922 // Generally, the dependence analyzer will be used to build 3923 // a dependence graph for a function (basically a map from instructions 3924 // to dependences). Looking for cycles in the graph shows us loops 3925 // that cannot be trivially vectorized/parallelized. 3926 // 3927 // We can try to improve the situation by examining all the dependences 3928 // that make up the cycle, looking for ones we can break. 3929 // Sometimes, peeling the first or last iteration of a loop will break 3930 // dependences, and we've got flags for those possibilities. 3931 // Sometimes, splitting a loop at some other iteration will do the trick, 3932 // and we've got a flag for that case. Rather than waste the space to 3933 // record the exact iteration (since we rarely know), we provide 3934 // a method that calculates the iteration. It's a drag that it must work 3935 // from scratch, but wonderful in that it's possible. 3936 // 3937 // Here's an example: 3938 // 3939 // for (i = 0; i < 10; i++) 3940 // A[i] = ... 3941 // ... = A[11 - i] 3942 // 3943 // There's a loop-carried flow dependence from the store to the load, 3944 // found by the weak-crossing SIV test. The dependence will have a flag, 3945 // indicating that the dependence can be broken by splitting the loop. 3946 // Calling getSplitIteration will return 5. 3947 // Splitting the loop breaks the dependence, like so: 3948 // 3949 // for (i = 0; i <= 5; i++) 3950 // A[i] = ... 3951 // ... = A[11 - i] 3952 // for (i = 6; i < 10; i++) 3953 // A[i] = ... 3954 // ... = A[11 - i] 3955 // 3956 // breaks the dependence and allows us to vectorize/parallelize 3957 // both loops. 3958 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3959 unsigned SplitLevel) { 3960 assert(Dep.isSplitable(SplitLevel) && 3961 "Dep should be splitable at SplitLevel"); 3962 Instruction *Src = Dep.getSrc(); 3963 Instruction *Dst = Dep.getDst(); 3964 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3965 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3966 assert(isLoadOrStore(Src)); 3967 assert(isLoadOrStore(Dst)); 3968 Value *SrcPtr = getLoadStorePointerOperand(Src); 3969 Value *DstPtr = getLoadStorePointerOperand(Dst); 3970 assert(underlyingObjectsAlias( 3971 AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), 3972 MemoryLocation::get(Src)) == AliasResult::MustAlias); 3973 3974 // establish loop nesting levels 3975 establishNestingLevels(Src, Dst); 3976 3977 FullDependence Result(Src, Dst, false, CommonLevels); 3978 3979 unsigned Pairs = 1; 3980 SmallVector<Subscript, 2> Pair(Pairs); 3981 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3982 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3983 Pair[0].Src = SrcSCEV; 3984 Pair[0].Dst = DstSCEV; 3985 3986 if (Delinearize) { 3987 if (tryDelinearize(Src, Dst, Pair)) { 3988 LLVM_DEBUG(dbgs() << " delinearized\n"); 3989 Pairs = Pair.size(); 3990 } 3991 } 3992 3993 for (unsigned P = 0; P < Pairs; ++P) { 3994 Pair[P].Loops.resize(MaxLevels + 1); 3995 Pair[P].GroupLoops.resize(MaxLevels + 1); 3996 Pair[P].Group.resize(Pairs); 3997 removeMatchingExtensions(&Pair[P]); 3998 Pair[P].Classification = 3999 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 4000 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 4001 Pair[P].Loops); 4002 Pair[P].GroupLoops = Pair[P].Loops; 4003 Pair[P].Group.set(P); 4004 } 4005 4006 SmallBitVector Separable(Pairs); 4007 SmallBitVector Coupled(Pairs); 4008 4009 // partition subscripts into separable and minimally-coupled groups 4010 for (unsigned SI = 0; SI < Pairs; ++SI) { 4011 if (Pair[SI].Classification == Subscript::NonLinear) { 4012 // ignore these, but collect loops for later 4013 collectCommonLoops(Pair[SI].Src, 4014 LI->getLoopFor(Src->getParent()), 4015 Pair[SI].Loops); 4016 collectCommonLoops(Pair[SI].Dst, 4017 LI->getLoopFor(Dst->getParent()), 4018 Pair[SI].Loops); 4019 Result.Consistent = false; 4020 } 4021 else if (Pair[SI].Classification == Subscript::ZIV) 4022 Separable.set(SI); 4023 else { 4024 // SIV, RDIV, or MIV, so check for coupled group 4025 bool Done = true; 4026 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 4027 SmallBitVector Intersection = Pair[SI].GroupLoops; 4028 Intersection &= Pair[SJ].GroupLoops; 4029 if (Intersection.any()) { 4030 // accumulate set of all the loops in group 4031 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 4032 // accumulate set of all subscripts in group 4033 Pair[SJ].Group |= Pair[SI].Group; 4034 Done = false; 4035 } 4036 } 4037 if (Done) { 4038 if (Pair[SI].Group.count() == 1) 4039 Separable.set(SI); 4040 else 4041 Coupled.set(SI); 4042 } 4043 } 4044 } 4045 4046 Constraint NewConstraint; 4047 NewConstraint.setAny(SE); 4048 4049 // test separable subscripts 4050 for (unsigned SI : Separable.set_bits()) { 4051 switch (Pair[SI].Classification) { 4052 case Subscript::SIV: { 4053 unsigned Level; 4054 const SCEV *SplitIter = nullptr; 4055 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 4056 Result, NewConstraint, SplitIter); 4057 if (Level == SplitLevel) { 4058 assert(SplitIter != nullptr); 4059 return SplitIter; 4060 } 4061 break; 4062 } 4063 case Subscript::ZIV: 4064 case Subscript::RDIV: 4065 case Subscript::MIV: 4066 break; 4067 default: 4068 llvm_unreachable("subscript has unexpected classification"); 4069 } 4070 } 4071 4072 if (Coupled.count()) { 4073 // test coupled subscript groups 4074 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 4075 for (unsigned II = 0; II <= MaxLevels; ++II) 4076 Constraints[II].setAny(SE); 4077 for (unsigned SI : Coupled.set_bits()) { 4078 SmallBitVector Group(Pair[SI].Group); 4079 SmallBitVector Sivs(Pairs); 4080 SmallBitVector Mivs(Pairs); 4081 SmallBitVector ConstrainedLevels(MaxLevels + 1); 4082 for (unsigned SJ : Group.set_bits()) { 4083 if (Pair[SJ].Classification == Subscript::SIV) 4084 Sivs.set(SJ); 4085 else 4086 Mivs.set(SJ); 4087 } 4088 while (Sivs.any()) { 4089 bool Changed = false; 4090 for (unsigned SJ : Sivs.set_bits()) { 4091 // SJ is an SIV subscript that's part of the current coupled group 4092 unsigned Level; 4093 const SCEV *SplitIter = nullptr; 4094 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 4095 Result, NewConstraint, SplitIter); 4096 if (Level == SplitLevel && SplitIter) 4097 return SplitIter; 4098 ConstrainedLevels.set(Level); 4099 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 4100 Changed = true; 4101 Sivs.reset(SJ); 4102 } 4103 if (Changed) { 4104 // propagate, possibly creating new SIVs and ZIVs 4105 for (unsigned SJ : Mivs.set_bits()) { 4106 // SJ is an MIV subscript that's part of the current coupled group 4107 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 4108 Pair[SJ].Loops, Constraints, Result.Consistent)) { 4109 Pair[SJ].Classification = 4110 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 4111 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 4112 Pair[SJ].Loops); 4113 switch (Pair[SJ].Classification) { 4114 case Subscript::ZIV: 4115 Mivs.reset(SJ); 4116 break; 4117 case Subscript::SIV: 4118 Sivs.set(SJ); 4119 Mivs.reset(SJ); 4120 break; 4121 case Subscript::RDIV: 4122 case Subscript::MIV: 4123 break; 4124 default: 4125 llvm_unreachable("bad subscript classification"); 4126 } 4127 } 4128 } 4129 } 4130 } 4131 } 4132 } 4133 llvm_unreachable("somehow reached end of routine"); 4134 return nullptr; 4135 } 4136