1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to 28 // differentiate subscripts. Since Clang linearizes subscripts 29 // for most arrays, we give up some precision (though the existing MIV tests 30 // will help). We trust that the GEP instruction will eventually be extended. 31 // In the meantime, we should explore Maslov's ideas about delinearization. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #define DEBUG_TYPE "da" 55 56 #include "llvm/Analysis/DependenceAnalysis.h" 57 #include "llvm/ADT/Statistic.h" 58 #include "llvm/Instructions.h" 59 #include "llvm/Operator.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/Support/Debug.h" 62 #include "llvm/Support/ErrorHandling.h" 63 #include "llvm/Support/InstIterator.h" 64 65 using namespace llvm; 66 67 //===----------------------------------------------------------------------===// 68 // statistics 69 70 STATISTIC(TotalArrayPairs, "Array pairs tested"); 71 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 72 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 73 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 74 STATISTIC(ZIVapplications, "ZIV applications"); 75 STATISTIC(ZIVindependence, "ZIV independence"); 76 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 77 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 78 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 79 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 80 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 81 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 82 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 83 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 84 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 85 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 86 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 87 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 88 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 89 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 90 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 91 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 92 STATISTIC(DeltaApplications, "Delta applications"); 93 STATISTIC(DeltaSuccesses, "Delta successes"); 94 STATISTIC(DeltaIndependence, "Delta independence"); 95 STATISTIC(DeltaPropagations, "Delta propagations"); 96 STATISTIC(GCDapplications, "GCD applications"); 97 STATISTIC(GCDsuccesses, "GCD successes"); 98 STATISTIC(GCDindependence, "GCD independence"); 99 STATISTIC(BanerjeeApplications, "Banerjee applications"); 100 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 101 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 102 103 //===----------------------------------------------------------------------===// 104 // basics 105 106 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 107 "Dependence Analysis", true, true) 108 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 109 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 110 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 111 INITIALIZE_PASS_END(DependenceAnalysis, "da", 112 "Dependence Analysis", true, true) 113 114 char DependenceAnalysis::ID = 0; 115 116 117 FunctionPass *llvm::createDependenceAnalysisPass() { 118 return new DependenceAnalysis(); 119 } 120 121 122 bool DependenceAnalysis::runOnFunction(Function &F) { 123 this->F = &F; 124 AA = &getAnalysis<AliasAnalysis>(); 125 SE = &getAnalysis<ScalarEvolution>(); 126 LI = &getAnalysis<LoopInfo>(); 127 return false; 128 } 129 130 131 void DependenceAnalysis::releaseMemory() { 132 } 133 134 135 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 136 AU.setPreservesAll(); 137 AU.addRequiredTransitive<AliasAnalysis>(); 138 AU.addRequiredTransitive<ScalarEvolution>(); 139 AU.addRequiredTransitive<LoopInfo>(); 140 } 141 142 143 // Used to test the dependence analyzer. 144 // Looks through the function, noting the first store instruction 145 // and the first load instruction 146 // (which always follows the first load in our tests). 147 // Calls depends() and prints out the result. 148 // Ignores all other instructions. 149 static 150 void dumpExampleDependence(raw_ostream &OS, Function *F, 151 DependenceAnalysis *DA) { 152 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 153 SrcI != SrcE; ++SrcI) { 154 if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) { 155 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 156 DstI != DstE; ++DstI) { 157 if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) { 158 OS << "da analyze - "; 159 if (Dependence *D = DA->depends(Src, Dst, true)) { 160 D->dump(OS); 161 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 162 if (D->isSplitable(Level)) { 163 OS << "da analyze - split level = " << Level; 164 OS << ", iteration = " << *DA->getSplitIteration(D, Level); 165 OS << "!\n"; 166 } 167 } 168 delete D; 169 } 170 else 171 OS << "none!\n"; 172 return; 173 } 174 } 175 } 176 } 177 } 178 179 180 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 181 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 182 } 183 184 //===----------------------------------------------------------------------===// 185 // Dependence methods 186 187 // Returns true if this is an input dependence. 188 bool Dependence::isInput() const { 189 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 190 } 191 192 193 // Returns true if this is an output dependence. 194 bool Dependence::isOutput() const { 195 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 196 } 197 198 199 // Returns true if this is an flow (aka true) dependence. 200 bool Dependence::isFlow() const { 201 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 202 } 203 204 205 // Returns true if this is an anti dependence. 206 bool Dependence::isAnti() const { 207 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 208 } 209 210 211 // Returns true if a particular level is scalar; that is, 212 // if no subscript in the source or destination mention the induction 213 // variable associated with the loop at this level. 214 // Leave this out of line, so it will serve as a virtual method anchor 215 bool Dependence::isScalar(unsigned level) const { 216 return false; 217 } 218 219 220 //===----------------------------------------------------------------------===// 221 // FullDependence methods 222 223 FullDependence::FullDependence(const Instruction *Source, 224 const Instruction *Destination, 225 bool PossiblyLoopIndependent, 226 unsigned CommonLevels) : 227 Dependence(Source, Destination), 228 Levels(CommonLevels), 229 LoopIndependent(PossiblyLoopIndependent) { 230 Consistent = true; 231 DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; 232 } 233 234 // The rest are simple getters that hide the implementation. 235 236 // getDirection - Returns the direction associated with a particular level. 237 unsigned FullDependence::getDirection(unsigned Level) const { 238 assert(0 < Level && Level <= Levels && "Level out of range"); 239 return DV[Level - 1].Direction; 240 } 241 242 243 // Returns the distance (or NULL) associated with a particular level. 244 const SCEV *FullDependence::getDistance(unsigned Level) const { 245 assert(0 < Level && Level <= Levels && "Level out of range"); 246 return DV[Level - 1].Distance; 247 } 248 249 250 // Returns true if a particular level is scalar; that is, 251 // if no subscript in the source or destination mention the induction 252 // variable associated with the loop at this level. 253 bool FullDependence::isScalar(unsigned Level) const { 254 assert(0 < Level && Level <= Levels && "Level out of range"); 255 return DV[Level - 1].Scalar; 256 } 257 258 259 // Returns true if peeling the first iteration from this loop 260 // will break this dependence. 261 bool FullDependence::isPeelFirst(unsigned Level) const { 262 assert(0 < Level && Level <= Levels && "Level out of range"); 263 return DV[Level - 1].PeelFirst; 264 } 265 266 267 // Returns true if peeling the last iteration from this loop 268 // will break this dependence. 269 bool FullDependence::isPeelLast(unsigned Level) const { 270 assert(0 < Level && Level <= Levels && "Level out of range"); 271 return DV[Level - 1].PeelLast; 272 } 273 274 275 // Returns true if splitting this loop will break the dependence. 276 bool FullDependence::isSplitable(unsigned Level) const { 277 assert(0 < Level && Level <= Levels && "Level out of range"); 278 return DV[Level - 1].Splitable; 279 } 280 281 282 //===----------------------------------------------------------------------===// 283 // DependenceAnalysis::Constraint methods 284 285 // If constraint is a point <X, Y>, returns X. 286 // Otherwise assert. 287 const SCEV *DependenceAnalysis::Constraint::getX() const { 288 assert(Kind == Point && "Kind should be Point"); 289 return A; 290 } 291 292 293 // If constraint is a point <X, Y>, returns Y. 294 // Otherwise assert. 295 const SCEV *DependenceAnalysis::Constraint::getY() const { 296 assert(Kind == Point && "Kind should be Point"); 297 return B; 298 } 299 300 301 // If constraint is a line AX + BY = C, returns A. 302 // Otherwise assert. 303 const SCEV *DependenceAnalysis::Constraint::getA() const { 304 assert((Kind == Line || Kind == Distance) && 305 "Kind should be Line (or Distance)"); 306 return A; 307 } 308 309 310 // If constraint is a line AX + BY = C, returns B. 311 // Otherwise assert. 312 const SCEV *DependenceAnalysis::Constraint::getB() const { 313 assert((Kind == Line || Kind == Distance) && 314 "Kind should be Line (or Distance)"); 315 return B; 316 } 317 318 319 // If constraint is a line AX + BY = C, returns C. 320 // Otherwise assert. 321 const SCEV *DependenceAnalysis::Constraint::getC() const { 322 assert((Kind == Line || Kind == Distance) && 323 "Kind should be Line (or Distance)"); 324 return C; 325 } 326 327 328 // If constraint is a distance, returns D. 329 // Otherwise assert. 330 const SCEV *DependenceAnalysis::Constraint::getD() const { 331 assert(Kind == Distance && "Kind should be Distance"); 332 return SE->getNegativeSCEV(C); 333 } 334 335 336 // Returns the loop associated with this constraint. 337 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 338 assert((Kind == Distance || Kind == Line || Kind == Point) && 339 "Kind should be Distance, Line, or Point"); 340 return AssociatedLoop; 341 } 342 343 344 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 345 const SCEV *Y, 346 const Loop *CurLoop) { 347 Kind = Point; 348 A = X; 349 B = Y; 350 AssociatedLoop = CurLoop; 351 } 352 353 354 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 355 const SCEV *BB, 356 const SCEV *CC, 357 const Loop *CurLoop) { 358 Kind = Line; 359 A = AA; 360 B = BB; 361 C = CC; 362 AssociatedLoop = CurLoop; 363 } 364 365 366 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 367 const Loop *CurLoop) { 368 Kind = Distance; 369 A = SE->getConstant(D->getType(), 1); 370 B = SE->getNegativeSCEV(A); 371 C = SE->getNegativeSCEV(D); 372 AssociatedLoop = CurLoop; 373 } 374 375 376 void DependenceAnalysis::Constraint::setEmpty() { 377 Kind = Empty; 378 } 379 380 381 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 382 SE = NewSE; 383 Kind = Any; 384 } 385 386 387 // For debugging purposes. Dumps the constraint out to OS. 388 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 389 if (isEmpty()) 390 OS << " Empty\n"; 391 else if (isAny()) 392 OS << " Any\n"; 393 else if (isPoint()) 394 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 395 else if (isDistance()) 396 OS << " Distance is " << *getD() << 397 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 398 else if (isLine()) 399 OS << " Line is " << *getA() << "*X + " << 400 *getB() << "*Y = " << *getC() << "\n"; 401 else 402 llvm_unreachable("unknown constraint type in Constraint::dump"); 403 } 404 405 406 // Updates X with the intersection 407 // of the Constraints X and Y. Returns true if X has changed. 408 // Corresponds to Figure 4 from the paper 409 // 410 // Practical Dependence Testing 411 // Goff, Kennedy, Tseng 412 // PLDI 1991 413 bool DependenceAnalysis::intersectConstraints(Constraint *X, 414 const Constraint *Y) { 415 ++DeltaApplications; 416 DEBUG(dbgs() << "\tintersect constraints\n"); 417 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 418 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 419 assert(!Y->isPoint() && "Y must not be a Point"); 420 if (X->isAny()) { 421 if (Y->isAny()) 422 return false; 423 *X = *Y; 424 return true; 425 } 426 if (X->isEmpty()) 427 return false; 428 if (Y->isEmpty()) { 429 X->setEmpty(); 430 return true; 431 } 432 433 if (X->isDistance() && Y->isDistance()) { 434 DEBUG(dbgs() << "\t intersect 2 distances\n"); 435 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 436 return false; 437 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 438 X->setEmpty(); 439 ++DeltaSuccesses; 440 return true; 441 } 442 // Hmmm, interesting situation. 443 // I guess if either is constant, keep it and ignore the other. 444 if (isa<SCEVConstant>(Y->getD())) { 445 *X = *Y; 446 return true; 447 } 448 return false; 449 } 450 451 // At this point, the pseudo-code in Figure 4 of the paper 452 // checks if (X->isPoint() && Y->isPoint()). 453 // This case can't occur in our implementation, 454 // since a Point can only arise as the result of intersecting 455 // two Line constraints, and the right-hand value, Y, is never 456 // the result of an intersection. 457 assert(!(X->isPoint() && Y->isPoint()) && 458 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 459 460 if (X->isLine() && Y->isLine()) { 461 DEBUG(dbgs() << "\t intersect 2 lines\n"); 462 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 463 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 464 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 465 // slopes are equal, so lines are parallel 466 DEBUG(dbgs() << "\t\tsame slope\n"); 467 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 468 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 469 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 470 return false; 471 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 472 X->setEmpty(); 473 ++DeltaSuccesses; 474 return true; 475 } 476 return false; 477 } 478 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 479 // slopes differ, so lines intersect 480 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 481 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 482 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 483 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 484 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 485 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 486 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 487 const SCEVConstant *C1A2_C2A1 = 488 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 489 const SCEVConstant *C1B2_C2B1 = 490 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 491 const SCEVConstant *A1B2_A2B1 = 492 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 493 const SCEVConstant *A2B1_A1B2 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 495 if (!C1B2_C2B1 || !C1A2_C2A1 || 496 !A1B2_A2B1 || !A2B1_A1B2) 497 return false; 498 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 499 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 500 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 501 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 502 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 503 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 504 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 505 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 506 APInt Xq = Xtop; // these need to be initialized, even 507 APInt Xr = Xtop; // though they're just going to be overwritten 508 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 509 APInt Yq = Ytop; 510 APInt Yr = Ytop;; 511 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 512 if (Xr != 0 || Yr != 0) { 513 X->setEmpty(); 514 ++DeltaSuccesses; 515 return true; 516 } 517 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 518 if (Xq.slt(0) || Yq.slt(0)) { 519 X->setEmpty(); 520 ++DeltaSuccesses; 521 return true; 522 } 523 if (const SCEVConstant *CUB = 524 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 525 APInt UpperBound = CUB->getValue()->getValue(); 526 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 527 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 528 X->setEmpty(); 529 ++DeltaSuccesses; 530 return true; 531 } 532 } 533 X->setPoint(SE->getConstant(Xq), 534 SE->getConstant(Yq), 535 X->getAssociatedLoop()); 536 ++DeltaSuccesses; 537 return true; 538 } 539 return false; 540 } 541 542 // if (X->isLine() && Y->isPoint()) This case can't occur. 543 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 544 545 if (X->isPoint() && Y->isLine()) { 546 DEBUG(dbgs() << "\t intersect Point and Line\n"); 547 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 548 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 549 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 550 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 551 return false; 552 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 553 X->setEmpty(); 554 ++DeltaSuccesses; 555 return true; 556 } 557 return false; 558 } 559 560 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 561 return false; 562 } 563 564 565 //===----------------------------------------------------------------------===// 566 // DependenceAnalysis methods 567 568 // For debugging purposes. Dumps a dependence to OS. 569 void Dependence::dump(raw_ostream &OS) const { 570 bool Splitable = false; 571 if (isConfused()) 572 OS << "confused"; 573 else { 574 if (isConsistent()) 575 OS << "consistent "; 576 if (isFlow()) 577 OS << "flow"; 578 else if (isOutput()) 579 OS << "output"; 580 else if (isAnti()) 581 OS << "anti"; 582 else if (isInput()) 583 OS << "input"; 584 unsigned Levels = getLevels(); 585 if (Levels) { 586 OS << " ["; 587 for (unsigned II = 1; II <= Levels; ++II) { 588 if (isSplitable(II)) 589 Splitable = true; 590 if (isPeelFirst(II)) 591 OS << 'p'; 592 const SCEV *Distance = getDistance(II); 593 if (Distance) 594 OS << *Distance; 595 else if (isScalar(II)) 596 OS << "S"; 597 else { 598 unsigned Direction = getDirection(II); 599 if (Direction == DVEntry::ALL) 600 OS << "*"; 601 else { 602 if (Direction & DVEntry::LT) 603 OS << "<"; 604 if (Direction & DVEntry::EQ) 605 OS << "="; 606 if (Direction & DVEntry::GT) 607 OS << ">"; 608 } 609 } 610 if (isPeelLast(II)) 611 OS << 'p'; 612 if (II < Levels) 613 OS << " "; 614 } 615 if (isLoopIndependent()) 616 OS << "|<"; 617 OS << "]"; 618 if (Splitable) 619 OS << " splitable"; 620 } 621 } 622 OS << "!\n"; 623 } 624 625 626 627 static 628 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 629 const Value *A, 630 const Value *B) { 631 const Value *AObj = GetUnderlyingObject(A); 632 const Value *BObj = GetUnderlyingObject(B); 633 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 634 BObj, AA->getTypeStoreSize(BObj->getType())); 635 } 636 637 638 // Returns true if the load or store can be analyzed. Atomic and volatile 639 // operations have properties which this analysis does not understand. 640 static 641 bool isLoadOrStore(const Instruction *I) { 642 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 643 return LI->isUnordered(); 644 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 645 return SI->isUnordered(); 646 return false; 647 } 648 649 650 static 651 const Value *getPointerOperand(const Instruction *I) { 652 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 653 return LI->getPointerOperand(); 654 if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 655 return SI->getPointerOperand(); 656 llvm_unreachable("Value is not load or store instruction"); 657 return 0; 658 } 659 660 661 // Examines the loop nesting of the Src and Dst 662 // instructions and establishes their shared loops. Sets the variables 663 // CommonLevels, SrcLevels, and MaxLevels. 664 // The source and destination instructions needn't be contained in the same 665 // loop. The routine establishNestingLevels finds the level of most deeply 666 // nested loop that contains them both, CommonLevels. An instruction that's 667 // not contained in a loop is at level = 0. MaxLevels is equal to the level 668 // of the source plus the level of the destination, minus CommonLevels. 669 // This lets us allocate vectors MaxLevels in length, with room for every 670 // distinct loop referenced in both the source and destination subscripts. 671 // The variable SrcLevels is the nesting depth of the source instruction. 672 // It's used to help calculate distinct loops referenced by the destination. 673 // Here's the map from loops to levels: 674 // 0 - unused 675 // 1 - outermost common loop 676 // ... - other common loops 677 // CommonLevels - innermost common loop 678 // ... - loops containing Src but not Dst 679 // SrcLevels - innermost loop containing Src but not Dst 680 // ... - loops containing Dst but not Src 681 // MaxLevels - innermost loops containing Dst but not Src 682 // Consider the follow code fragment: 683 // for (a = ...) { 684 // for (b = ...) { 685 // for (c = ...) { 686 // for (d = ...) { 687 // A[] = ...; 688 // } 689 // } 690 // for (e = ...) { 691 // for (f = ...) { 692 // for (g = ...) { 693 // ... = A[]; 694 // } 695 // } 696 // } 697 // } 698 // } 699 // If we're looking at the possibility of a dependence between the store 700 // to A (the Src) and the load from A (the Dst), we'll note that they 701 // have 2 loops in common, so CommonLevels will equal 2 and the direction 702 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 703 // A map from loop names to loop numbers would look like 704 // a - 1 705 // b - 2 = CommonLevels 706 // c - 3 707 // d - 4 = SrcLevels 708 // e - 5 709 // f - 6 710 // g - 7 = MaxLevels 711 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 712 const Instruction *Dst) { 713 const BasicBlock *SrcBlock = Src->getParent(); 714 const BasicBlock *DstBlock = Dst->getParent(); 715 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 716 unsigned DstLevel = LI->getLoopDepth(DstBlock); 717 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 718 const Loop *DstLoop = LI->getLoopFor(DstBlock); 719 SrcLevels = SrcLevel; 720 MaxLevels = SrcLevel + DstLevel; 721 while (SrcLevel > DstLevel) { 722 SrcLoop = SrcLoop->getParentLoop(); 723 SrcLevel--; 724 } 725 while (DstLevel > SrcLevel) { 726 DstLoop = DstLoop->getParentLoop(); 727 DstLevel--; 728 } 729 while (SrcLoop != DstLoop) { 730 SrcLoop = SrcLoop->getParentLoop(); 731 DstLoop = DstLoop->getParentLoop(); 732 SrcLevel--; 733 } 734 CommonLevels = SrcLevel; 735 MaxLevels -= CommonLevels; 736 } 737 738 739 // Given one of the loops containing the source, return 740 // its level index in our numbering scheme. 741 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 742 return SrcLoop->getLoopDepth(); 743 } 744 745 746 // Given one of the loops containing the destination, 747 // return its level index in our numbering scheme. 748 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 749 unsigned D = DstLoop->getLoopDepth(); 750 if (D > CommonLevels) 751 return D - CommonLevels + SrcLevels; 752 else 753 return D; 754 } 755 756 757 // Returns true if Expression is loop invariant in LoopNest. 758 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 759 const Loop *LoopNest) const { 760 if (!LoopNest) 761 return true; 762 return SE->isLoopInvariant(Expression, LoopNest) && 763 isLoopInvariant(Expression, LoopNest->getParentLoop()); 764 } 765 766 767 768 // Finds the set of loops from the LoopNest that 769 // have a level <= CommonLevels and are referred to by the SCEV Expression. 770 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 771 const Loop *LoopNest, 772 SmallBitVector &Loops) const { 773 while (LoopNest) { 774 unsigned Level = LoopNest->getLoopDepth(); 775 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 776 Loops.set(Level); 777 LoopNest = LoopNest->getParentLoop(); 778 } 779 } 780 781 782 // removeMatchingExtensions - Examines a subscript pair. 783 // If the source and destination are identically sign (or zero) 784 // extended, it strips off the extension in an effect to simplify 785 // the actual analysis. 786 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 787 const SCEV *Src = Pair->Src; 788 const SCEV *Dst = Pair->Dst; 789 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 790 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 791 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 792 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 793 if (SrcCast->getType() == DstCast->getType()) { 794 Pair->Src = SrcCast->getOperand(); 795 Pair->Dst = DstCast->getOperand(); 796 } 797 } 798 } 799 800 801 // Examine the scev and return true iff it's linear. 802 // Collect any loops mentioned in the set of "Loops". 803 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 804 const Loop *LoopNest, 805 SmallBitVector &Loops) { 806 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 807 if (!AddRec) 808 return isLoopInvariant(Src, LoopNest); 809 const SCEV *Start = AddRec->getStart(); 810 const SCEV *Step = AddRec->getStepRecurrence(*SE); 811 if (!isLoopInvariant(Step, LoopNest)) 812 return false; 813 Loops.set(mapSrcLoop(AddRec->getLoop())); 814 return checkSrcSubscript(Start, LoopNest, Loops); 815 } 816 817 818 819 // Examine the scev and return true iff it's linear. 820 // Collect any loops mentioned in the set of "Loops". 821 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 822 const Loop *LoopNest, 823 SmallBitVector &Loops) { 824 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 825 if (!AddRec) 826 return isLoopInvariant(Dst, LoopNest); 827 const SCEV *Start = AddRec->getStart(); 828 const SCEV *Step = AddRec->getStepRecurrence(*SE); 829 if (!isLoopInvariant(Step, LoopNest)) 830 return false; 831 Loops.set(mapDstLoop(AddRec->getLoop())); 832 return checkDstSubscript(Start, LoopNest, Loops); 833 } 834 835 836 // Examines the subscript pair (the Src and Dst SCEVs) 837 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 838 // Collects the associated loops in a set. 839 DependenceAnalysis::Subscript::ClassificationKind 840 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 841 const SCEV *Dst, const Loop *DstLoopNest, 842 SmallBitVector &Loops) { 843 SmallBitVector SrcLoops(MaxLevels + 1); 844 SmallBitVector DstLoops(MaxLevels + 1); 845 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 846 return Subscript::NonLinear; 847 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 848 return Subscript::NonLinear; 849 Loops = SrcLoops; 850 Loops |= DstLoops; 851 unsigned N = Loops.count(); 852 if (N == 0) 853 return Subscript::ZIV; 854 if (N == 1) 855 return Subscript::SIV; 856 if (N == 2 && (SrcLoops.count() == 0 || 857 DstLoops.count() == 0 || 858 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 859 return Subscript::RDIV; 860 return Subscript::MIV; 861 } 862 863 864 // A wrapper around SCEV::isKnownPredicate. 865 // Looks for cases where we're interested in comparing for equality. 866 // If both X and Y have been identically sign or zero extended, 867 // it strips off the (confusing) extensions before invoking 868 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 869 // will be similarly updated. 870 // 871 // If SCEV::isKnownPredicate can't prove the predicate, 872 // we try simple subtraction, which seems to help in some cases 873 // involving symbolics. 874 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 875 const SCEV *X, 876 const SCEV *Y) const { 877 if (Pred == CmpInst::ICMP_EQ || 878 Pred == CmpInst::ICMP_NE) { 879 if ((isa<SCEVSignExtendExpr>(X) && 880 isa<SCEVSignExtendExpr>(Y)) || 881 (isa<SCEVZeroExtendExpr>(X) && 882 isa<SCEVZeroExtendExpr>(Y))) { 883 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 884 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 885 const SCEV *Xop = CX->getOperand(); 886 const SCEV *Yop = CY->getOperand(); 887 if (Xop->getType() == Yop->getType()) { 888 X = Xop; 889 Y = Yop; 890 } 891 } 892 } 893 if (SE->isKnownPredicate(Pred, X, Y)) 894 return true; 895 // If SE->isKnownPredicate can't prove the condition, 896 // we try the brute-force approach of subtracting 897 // and testing the difference. 898 // By testing with SE->isKnownPredicate first, we avoid 899 // the possibility of overflow when the arguments are constants. 900 const SCEV *Delta = SE->getMinusSCEV(X, Y); 901 switch (Pred) { 902 case CmpInst::ICMP_EQ: 903 return Delta->isZero(); 904 case CmpInst::ICMP_NE: 905 return SE->isKnownNonZero(Delta); 906 case CmpInst::ICMP_SGE: 907 return SE->isKnownNonNegative(Delta); 908 case CmpInst::ICMP_SLE: 909 return SE->isKnownNonPositive(Delta); 910 case CmpInst::ICMP_SGT: 911 return SE->isKnownPositive(Delta); 912 case CmpInst::ICMP_SLT: 913 return SE->isKnownNegative(Delta); 914 default: 915 llvm_unreachable("unexpected predicate in isKnownPredicate"); 916 } 917 } 918 919 920 // All subscripts are all the same type. 921 // Loop bound may be smaller (e.g., a char). 922 // Should zero extend loop bound, since it's always >= 0. 923 // This routine collects upper bound and extends if needed. 924 // Return null if no bound available. 925 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 926 Type *T) const { 927 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 928 const SCEV *UB = SE->getBackedgeTakenCount(L); 929 return SE->getNoopOrZeroExtend(UB, T); 930 } 931 return NULL; 932 } 933 934 935 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 936 // If the cast fails, returns NULL. 937 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 938 Type *T 939 ) const { 940 if (const SCEV *UB = collectUpperBound(L, T)) 941 return dyn_cast<SCEVConstant>(UB); 942 return NULL; 943 } 944 945 946 // testZIV - 947 // When we have a pair of subscripts of the form [c1] and [c2], 948 // where c1 and c2 are both loop invariant, we attack it using 949 // the ZIV test. Basically, we test by comparing the two values, 950 // but there are actually three possible results: 951 // 1) the values are equal, so there's a dependence 952 // 2) the values are different, so there's no dependence 953 // 3) the values might be equal, so we have to assume a dependence. 954 // 955 // Return true if dependence disproved. 956 bool DependenceAnalysis::testZIV(const SCEV *Src, 957 const SCEV *Dst, 958 FullDependence &Result) const { 959 DEBUG(dbgs() << " src = " << *Src << "\n"); 960 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 961 ++ZIVapplications; 962 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 963 DEBUG(dbgs() << " provably dependent\n"); 964 return false; // provably dependent 965 } 966 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 967 DEBUG(dbgs() << " provably independent\n"); 968 ++ZIVindependence; 969 return true; // provably independent 970 } 971 DEBUG(dbgs() << " possibly dependent\n"); 972 Result.Consistent = false; 973 return false; // possibly dependent 974 } 975 976 977 // strongSIVtest - 978 // From the paper, Practical Dependence Testing, Section 4.2.1 979 // 980 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 981 // where i is an induction variable, c1 and c2 are loop invariant, 982 // and a is a constant, we can solve it exactly using the Strong SIV test. 983 // 984 // Can prove independence. Failing that, can compute distance (and direction). 985 // In the presence of symbolic terms, we can sometimes make progress. 986 // 987 // If there's a dependence, 988 // 989 // c1 + a*i = c2 + a*i' 990 // 991 // The dependence distance is 992 // 993 // d = i' - i = (c1 - c2)/a 994 // 995 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 996 // loop's upper bound. If a dependence exists, the dependence direction is 997 // defined as 998 // 999 // { < if d > 0 1000 // direction = { = if d = 0 1001 // { > if d < 0 1002 // 1003 // Return true if dependence disproved. 1004 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1005 const SCEV *SrcConst, 1006 const SCEV *DstConst, 1007 const Loop *CurLoop, 1008 unsigned Level, 1009 FullDependence &Result, 1010 Constraint &NewConstraint) const { 1011 DEBUG(dbgs() << "\tStrong SIV test\n"); 1012 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1013 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1014 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1015 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1016 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1017 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1018 ++StrongSIVapplications; 1019 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1020 Level--; 1021 1022 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1023 DEBUG(dbgs() << "\t Delta = " << *Delta); 1024 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1025 1026 // check that |Delta| < iteration count 1027 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1028 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1029 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1030 const SCEV *AbsDelta = 1031 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1032 const SCEV *AbsCoeff = 1033 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1034 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1035 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1036 // Distance greater than trip count - no dependence 1037 ++StrongSIVindependence; 1038 ++StrongSIVsuccesses; 1039 return true; 1040 } 1041 } 1042 1043 // Can we compute distance? 1044 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1045 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1046 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1047 APInt Distance = ConstDelta; // these need to be initialized 1048 APInt Remainder = ConstDelta; 1049 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1050 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1051 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1052 // Make sure Coeff divides Delta exactly 1053 if (Remainder != 0) { 1054 // Coeff doesn't divide Distance, no dependence 1055 ++StrongSIVindependence; 1056 ++StrongSIVsuccesses; 1057 return true; 1058 } 1059 Result.DV[Level].Distance = SE->getConstant(Distance); 1060 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1061 if (Distance.sgt(0)) 1062 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1063 else if (Distance.slt(0)) 1064 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1065 else 1066 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1067 ++StrongSIVsuccesses; 1068 } 1069 else if (Delta->isZero()) { 1070 // since 0/X == 0 1071 Result.DV[Level].Distance = Delta; 1072 NewConstraint.setDistance(Delta, CurLoop); 1073 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1074 ++StrongSIVsuccesses; 1075 } 1076 else { 1077 if (Coeff->isOne()) { 1078 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1079 Result.DV[Level].Distance = Delta; // since X/1 == X 1080 NewConstraint.setDistance(Delta, CurLoop); 1081 } 1082 else { 1083 Result.Consistent = false; 1084 NewConstraint.setLine(Coeff, 1085 SE->getNegativeSCEV(Coeff), 1086 SE->getNegativeSCEV(Delta), CurLoop); 1087 } 1088 1089 // maybe we can get a useful direction 1090 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1091 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1092 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1093 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1094 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1095 // The double negatives above are confusing. 1096 // It helps to read !SE->isKnownNonZero(Delta) 1097 // as "Delta might be Zero" 1098 unsigned NewDirection = Dependence::DVEntry::NONE; 1099 if ((DeltaMaybePositive && CoeffMaybePositive) || 1100 (DeltaMaybeNegative && CoeffMaybeNegative)) 1101 NewDirection = Dependence::DVEntry::LT; 1102 if (DeltaMaybeZero) 1103 NewDirection |= Dependence::DVEntry::EQ; 1104 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1105 (DeltaMaybePositive && CoeffMaybeNegative)) 1106 NewDirection |= Dependence::DVEntry::GT; 1107 if (NewDirection < Result.DV[Level].Direction) 1108 ++StrongSIVsuccesses; 1109 Result.DV[Level].Direction &= NewDirection; 1110 } 1111 return false; 1112 } 1113 1114 1115 // weakCrossingSIVtest - 1116 // From the paper, Practical Dependence Testing, Section 4.2.2 1117 // 1118 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1119 // where i is an induction variable, c1 and c2 are loop invariant, 1120 // and a is a constant, we can solve it exactly using the 1121 // Weak-Crossing SIV test. 1122 // 1123 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1124 // the two lines, where i = i', yielding 1125 // 1126 // c1 + a*i = c2 - a*i 1127 // 2a*i = c2 - c1 1128 // i = (c2 - c1)/2a 1129 // 1130 // If i < 0, there is no dependence. 1131 // If i > upperbound, there is no dependence. 1132 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1133 // If i = upperbound, there's a dependence with distance = 0. 1134 // If i is integral, there's a dependence (all directions). 1135 // If the non-integer part = 1/2, there's a dependence (<> directions). 1136 // Otherwise, there's no dependence. 1137 // 1138 // Can prove independence. Failing that, 1139 // can sometimes refine the directions. 1140 // Can determine iteration for splitting. 1141 // 1142 // Return true if dependence disproved. 1143 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1144 const SCEV *SrcConst, 1145 const SCEV *DstConst, 1146 const Loop *CurLoop, 1147 unsigned Level, 1148 FullDependence &Result, 1149 Constraint &NewConstraint, 1150 const SCEV *&SplitIter) const { 1151 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1152 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1153 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1154 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1155 ++WeakCrossingSIVapplications; 1156 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1157 Level--; 1158 Result.Consistent = false; 1159 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1160 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1161 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1162 if (Delta->isZero()) { 1163 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1164 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1165 ++WeakCrossingSIVsuccesses; 1166 if (!Result.DV[Level].Direction) { 1167 ++WeakCrossingSIVindependence; 1168 return true; 1169 } 1170 Result.DV[Level].Distance = Delta; // = 0 1171 return false; 1172 } 1173 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1174 if (!ConstCoeff) 1175 return false; 1176 1177 Result.DV[Level].Splitable = true; 1178 if (SE->isKnownNegative(ConstCoeff)) { 1179 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1180 assert(ConstCoeff && 1181 "dynamic cast of negative of ConstCoeff should yield constant"); 1182 Delta = SE->getNegativeSCEV(Delta); 1183 } 1184 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1185 1186 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1187 SplitIter = 1188 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1189 Delta), 1190 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1191 ConstCoeff)); 1192 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1193 1194 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1195 if (!ConstDelta) 1196 return false; 1197 1198 // We're certain that ConstCoeff > 0; therefore, 1199 // if Delta < 0, then no dependence. 1200 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1201 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1202 if (SE->isKnownNegative(Delta)) { 1203 // No dependence, Delta < 0 1204 ++WeakCrossingSIVindependence; 1205 ++WeakCrossingSIVsuccesses; 1206 return true; 1207 } 1208 1209 // We're certain that Delta > 0 and ConstCoeff > 0. 1210 // Check Delta/(2*ConstCoeff) against upper loop bound 1211 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1212 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1213 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1214 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1215 ConstantTwo); 1216 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1217 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1218 // Delta too big, no dependence 1219 ++WeakCrossingSIVindependence; 1220 ++WeakCrossingSIVsuccesses; 1221 return true; 1222 } 1223 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1224 // i = i' = UB 1225 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1226 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1227 ++WeakCrossingSIVsuccesses; 1228 if (!Result.DV[Level].Direction) { 1229 ++WeakCrossingSIVindependence; 1230 return true; 1231 } 1232 Result.DV[Level].Splitable = false; 1233 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1234 return false; 1235 } 1236 } 1237 1238 // check that Coeff divides Delta 1239 APInt APDelta = ConstDelta->getValue()->getValue(); 1240 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1241 APInt Distance = APDelta; // these need to be initialzed 1242 APInt Remainder = APDelta; 1243 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1244 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1245 if (Remainder != 0) { 1246 // Coeff doesn't divide Delta, no dependence 1247 ++WeakCrossingSIVindependence; 1248 ++WeakCrossingSIVsuccesses; 1249 return true; 1250 } 1251 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1252 1253 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1254 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1255 Remainder = Distance.srem(Two); 1256 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1257 if (Remainder != 0) { 1258 // Equal direction isn't possible 1259 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1260 ++WeakCrossingSIVsuccesses; 1261 } 1262 return false; 1263 } 1264 1265 1266 // Kirch's algorithm, from 1267 // 1268 // Optimizing Supercompilers for Supercomputers 1269 // Michael Wolfe 1270 // MIT Press, 1989 1271 // 1272 // Program 2.1, page 29. 1273 // Computes the GCD of AM and BM. 1274 // Also finds a solution to the equation ax - by = gdc(a, b). 1275 // Returns true iff the gcd divides Delta. 1276 static 1277 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1278 APInt &G, APInt &X, APInt &Y) { 1279 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1280 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1281 APInt G0 = AM.abs(); 1282 APInt G1 = BM.abs(); 1283 APInt Q = G0; // these need to be initialized 1284 APInt R = G0; 1285 APInt::sdivrem(G0, G1, Q, R); 1286 while (R != 0) { 1287 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1288 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1289 G0 = G1; G1 = R; 1290 APInt::sdivrem(G0, G1, Q, R); 1291 } 1292 G = G1; 1293 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1294 X = AM.slt(0) ? -A1 : A1; 1295 Y = BM.slt(0) ? B1 : -B1; 1296 1297 // make sure gcd divides Delta 1298 R = Delta.srem(G); 1299 if (R != 0) 1300 return true; // gcd doesn't divide Delta, no dependence 1301 Q = Delta.sdiv(G); 1302 X *= Q; 1303 Y *= Q; 1304 return false; 1305 } 1306 1307 1308 static 1309 APInt floorOfQuotient(APInt A, APInt B) { 1310 APInt Q = A; // these need to be initialized 1311 APInt R = A; 1312 APInt::sdivrem(A, B, Q, R); 1313 if (R == 0) 1314 return Q; 1315 if ((A.sgt(0) && B.sgt(0)) || 1316 (A.slt(0) && B.slt(0))) 1317 return Q; 1318 else 1319 return Q - 1; 1320 } 1321 1322 1323 static 1324 APInt ceilingOfQuotient(APInt A, APInt B) { 1325 APInt Q = A; // these need to be initialized 1326 APInt R = A; 1327 APInt::sdivrem(A, B, Q, R); 1328 if (R == 0) 1329 return Q; 1330 if ((A.sgt(0) && B.sgt(0)) || 1331 (A.slt(0) && B.slt(0))) 1332 return Q + 1; 1333 else 1334 return Q; 1335 } 1336 1337 1338 static 1339 APInt maxAPInt(APInt A, APInt B) { 1340 return A.sgt(B) ? A : B; 1341 } 1342 1343 1344 static 1345 APInt minAPInt(APInt A, APInt B) { 1346 return A.slt(B) ? A : B; 1347 } 1348 1349 1350 // exactSIVtest - 1351 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1352 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1353 // and a2 are constant, we can solve it exactly using an algorithm developed 1354 // by Banerjee and Wolfe. See Section 2.5.3 in 1355 // 1356 // Optimizing Supercompilers for Supercomputers 1357 // Michael Wolfe 1358 // MIT Press, 1989 1359 // 1360 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1361 // so use them if possible. They're also a bit better with symbolics and, 1362 // in the case of the strong SIV test, can compute Distances. 1363 // 1364 // Return true if dependence disproved. 1365 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1366 const SCEV *DstCoeff, 1367 const SCEV *SrcConst, 1368 const SCEV *DstConst, 1369 const Loop *CurLoop, 1370 unsigned Level, 1371 FullDependence &Result, 1372 Constraint &NewConstraint) const { 1373 DEBUG(dbgs() << "\tExact SIV test\n"); 1374 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1375 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1376 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1377 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1378 ++ExactSIVapplications; 1379 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1380 Level--; 1381 Result.Consistent = false; 1382 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1383 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1384 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1385 Delta, CurLoop); 1386 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1387 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1388 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1389 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1390 return false; 1391 1392 // find gcd 1393 APInt G, X, Y; 1394 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1395 APInt BM = ConstDstCoeff->getValue()->getValue(); 1396 unsigned Bits = AM.getBitWidth(); 1397 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1398 // gcd doesn't divide Delta, no dependence 1399 ++ExactSIVindependence; 1400 ++ExactSIVsuccesses; 1401 return true; 1402 } 1403 1404 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1405 1406 // since SCEV construction normalizes, LM = 0 1407 APInt UM(Bits, 1, true); 1408 bool UMvalid = false; 1409 // UM is perhaps unavailable, let's check 1410 if (const SCEVConstant *CUB = 1411 collectConstantUpperBound(CurLoop, Delta->getType())) { 1412 UM = CUB->getValue()->getValue(); 1413 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1414 UMvalid = true; 1415 } 1416 1417 APInt TU(APInt::getSignedMaxValue(Bits)); 1418 APInt TL(APInt::getSignedMinValue(Bits)); 1419 1420 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1421 APInt TMUL = BM.sdiv(G); 1422 if (TMUL.sgt(0)) { 1423 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1424 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1425 if (UMvalid) { 1426 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1427 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1428 } 1429 } 1430 else { 1431 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1432 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1433 if (UMvalid) { 1434 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1435 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1436 } 1437 } 1438 1439 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1440 TMUL = AM.sdiv(G); 1441 if (TMUL.sgt(0)) { 1442 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1443 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1444 if (UMvalid) { 1445 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1446 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1447 } 1448 } 1449 else { 1450 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1451 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1452 if (UMvalid) { 1453 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1454 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1455 } 1456 } 1457 if (TL.sgt(TU)) { 1458 ++ExactSIVindependence; 1459 ++ExactSIVsuccesses; 1460 return true; 1461 } 1462 1463 // explore directions 1464 unsigned NewDirection = Dependence::DVEntry::NONE; 1465 1466 // less than 1467 APInt SaveTU(TU); // save these 1468 APInt SaveTL(TL); 1469 DEBUG(dbgs() << "\t exploring LT direction\n"); 1470 TMUL = AM - BM; 1471 if (TMUL.sgt(0)) { 1472 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1473 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1474 } 1475 else { 1476 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1477 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1478 } 1479 if (TL.sle(TU)) { 1480 NewDirection |= Dependence::DVEntry::LT; 1481 ++ExactSIVsuccesses; 1482 } 1483 1484 // equal 1485 TU = SaveTU; // restore 1486 TL = SaveTL; 1487 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1488 if (TMUL.sgt(0)) { 1489 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1490 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1491 } 1492 else { 1493 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1494 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1495 } 1496 TMUL = BM - AM; 1497 if (TMUL.sgt(0)) { 1498 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1499 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1500 } 1501 else { 1502 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1503 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1504 } 1505 if (TL.sle(TU)) { 1506 NewDirection |= Dependence::DVEntry::EQ; 1507 ++ExactSIVsuccesses; 1508 } 1509 1510 // greater than 1511 TU = SaveTU; // restore 1512 TL = SaveTL; 1513 DEBUG(dbgs() << "\t exploring GT direction\n"); 1514 if (TMUL.sgt(0)) { 1515 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1516 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1517 } 1518 else { 1519 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1520 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1521 } 1522 if (TL.sle(TU)) { 1523 NewDirection |= Dependence::DVEntry::GT; 1524 ++ExactSIVsuccesses; 1525 } 1526 1527 // finished 1528 Result.DV[Level].Direction &= NewDirection; 1529 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1530 ++ExactSIVindependence; 1531 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1532 } 1533 1534 1535 1536 // Return true if the divisor evenly divides the dividend. 1537 static 1538 bool isRemainderZero(const SCEVConstant *Dividend, 1539 const SCEVConstant *Divisor) { 1540 APInt ConstDividend = Dividend->getValue()->getValue(); 1541 APInt ConstDivisor = Divisor->getValue()->getValue(); 1542 return ConstDividend.srem(ConstDivisor) == 0; 1543 } 1544 1545 1546 // weakZeroSrcSIVtest - 1547 // From the paper, Practical Dependence Testing, Section 4.2.2 1548 // 1549 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1550 // where i is an induction variable, c1 and c2 are loop invariant, 1551 // and a is a constant, we can solve it exactly using the 1552 // Weak-Zero SIV test. 1553 // 1554 // Given 1555 // 1556 // c1 = c2 + a*i 1557 // 1558 // we get 1559 // 1560 // (c1 - c2)/a = i 1561 // 1562 // If i is not an integer, there's no dependence. 1563 // If i < 0 or > UB, there's no dependence. 1564 // If i = 0, the direction is <= and peeling the 1565 // 1st iteration will break the dependence. 1566 // If i = UB, the direction is >= and peeling the 1567 // last iteration will break the dependence. 1568 // Otherwise, the direction is *. 1569 // 1570 // Can prove independence. Failing that, we can sometimes refine 1571 // the directions. Can sometimes show that first or last 1572 // iteration carries all the dependences (so worth peeling). 1573 // 1574 // (see also weakZeroDstSIVtest) 1575 // 1576 // Return true if dependence disproved. 1577 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1578 const SCEV *SrcConst, 1579 const SCEV *DstConst, 1580 const Loop *CurLoop, 1581 unsigned Level, 1582 FullDependence &Result, 1583 Constraint &NewConstraint) const { 1584 // For the WeakSIV test, it's possible the loop isn't common to 1585 // the Src and Dst loops. If it isn't, then there's no need to 1586 // record a direction. 1587 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1588 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1589 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1590 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1591 ++WeakZeroSIVapplications; 1592 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1593 Level--; 1594 Result.Consistent = false; 1595 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1596 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1597 DstCoeff, Delta, CurLoop); 1598 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1599 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1600 if (Level < CommonLevels) { 1601 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1602 Result.DV[Level].PeelFirst = true; 1603 ++WeakZeroSIVsuccesses; 1604 } 1605 return false; // dependences caused by first iteration 1606 } 1607 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1608 if (!ConstCoeff) 1609 return false; 1610 const SCEV *AbsCoeff = 1611 SE->isKnownNegative(ConstCoeff) ? 1612 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1613 const SCEV *NewDelta = 1614 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1615 1616 // check that Delta/SrcCoeff < iteration count 1617 // really check NewDelta < count*AbsCoeff 1618 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1619 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1620 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1621 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1622 ++WeakZeroSIVindependence; 1623 ++WeakZeroSIVsuccesses; 1624 return true; 1625 } 1626 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1627 // dependences caused by last iteration 1628 if (Level < CommonLevels) { 1629 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1630 Result.DV[Level].PeelLast = true; 1631 ++WeakZeroSIVsuccesses; 1632 } 1633 return false; 1634 } 1635 } 1636 1637 // check that Delta/SrcCoeff >= 0 1638 // really check that NewDelta >= 0 1639 if (SE->isKnownNegative(NewDelta)) { 1640 // No dependence, newDelta < 0 1641 ++WeakZeroSIVindependence; 1642 ++WeakZeroSIVsuccesses; 1643 return true; 1644 } 1645 1646 // if SrcCoeff doesn't divide Delta, then no dependence 1647 if (isa<SCEVConstant>(Delta) && 1648 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1649 ++WeakZeroSIVindependence; 1650 ++WeakZeroSIVsuccesses; 1651 return true; 1652 } 1653 return false; 1654 } 1655 1656 1657 // weakZeroDstSIVtest - 1658 // From the paper, Practical Dependence Testing, Section 4.2.2 1659 // 1660 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1661 // where i is an induction variable, c1 and c2 are loop invariant, 1662 // and a is a constant, we can solve it exactly using the 1663 // Weak-Zero SIV test. 1664 // 1665 // Given 1666 // 1667 // c1 + a*i = c2 1668 // 1669 // we get 1670 // 1671 // i = (c2 - c1)/a 1672 // 1673 // If i is not an integer, there's no dependence. 1674 // If i < 0 or > UB, there's no dependence. 1675 // If i = 0, the direction is <= and peeling the 1676 // 1st iteration will break the dependence. 1677 // If i = UB, the direction is >= and peeling the 1678 // last iteration will break the dependence. 1679 // Otherwise, the direction is *. 1680 // 1681 // Can prove independence. Failing that, we can sometimes refine 1682 // the directions. Can sometimes show that first or last 1683 // iteration carries all the dependences (so worth peeling). 1684 // 1685 // (see also weakZeroSrcSIVtest) 1686 // 1687 // Return true if dependence disproved. 1688 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1689 const SCEV *SrcConst, 1690 const SCEV *DstConst, 1691 const Loop *CurLoop, 1692 unsigned Level, 1693 FullDependence &Result, 1694 Constraint &NewConstraint) const { 1695 // For the WeakSIV test, it's possible the loop isn't common to the 1696 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1697 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1698 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1699 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1700 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1701 ++WeakZeroSIVapplications; 1702 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1703 Level--; 1704 Result.Consistent = false; 1705 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1706 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1707 Delta, CurLoop); 1708 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1709 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1710 if (Level < CommonLevels) { 1711 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1712 Result.DV[Level].PeelFirst = true; 1713 ++WeakZeroSIVsuccesses; 1714 } 1715 return false; // dependences caused by first iteration 1716 } 1717 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1718 if (!ConstCoeff) 1719 return false; 1720 const SCEV *AbsCoeff = 1721 SE->isKnownNegative(ConstCoeff) ? 1722 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1723 const SCEV *NewDelta = 1724 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1725 1726 // check that Delta/SrcCoeff < iteration count 1727 // really check NewDelta < count*AbsCoeff 1728 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1729 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1730 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1731 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1732 ++WeakZeroSIVindependence; 1733 ++WeakZeroSIVsuccesses; 1734 return true; 1735 } 1736 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1737 // dependences caused by last iteration 1738 if (Level < CommonLevels) { 1739 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1740 Result.DV[Level].PeelLast = true; 1741 ++WeakZeroSIVsuccesses; 1742 } 1743 return false; 1744 } 1745 } 1746 1747 // check that Delta/SrcCoeff >= 0 1748 // really check that NewDelta >= 0 1749 if (SE->isKnownNegative(NewDelta)) { 1750 // No dependence, newDelta < 0 1751 ++WeakZeroSIVindependence; 1752 ++WeakZeroSIVsuccesses; 1753 return true; 1754 } 1755 1756 // if SrcCoeff doesn't divide Delta, then no dependence 1757 if (isa<SCEVConstant>(Delta) && 1758 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1759 ++WeakZeroSIVindependence; 1760 ++WeakZeroSIVsuccesses; 1761 return true; 1762 } 1763 return false; 1764 } 1765 1766 1767 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1768 // Things of the form [c1 + a*i] and [c2 + b*j], 1769 // where i and j are induction variable, c1 and c2 are loop invariant, 1770 // and a and b are constants. 1771 // Returns true if any possible dependence is disproved. 1772 // Marks the result as inconsistant. 1773 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1774 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1775 const SCEV *DstCoeff, 1776 const SCEV *SrcConst, 1777 const SCEV *DstConst, 1778 const Loop *SrcLoop, 1779 const Loop *DstLoop, 1780 FullDependence &Result) const { 1781 DEBUG(dbgs() << "\tExact RDIV test\n"); 1782 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1783 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1784 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1785 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1786 ++ExactRDIVapplications; 1787 Result.Consistent = false; 1788 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1789 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1790 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1791 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1792 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1793 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1794 return false; 1795 1796 // find gcd 1797 APInt G, X, Y; 1798 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1799 APInt BM = ConstDstCoeff->getValue()->getValue(); 1800 unsigned Bits = AM.getBitWidth(); 1801 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1802 // gcd doesn't divide Delta, no dependence 1803 ++ExactRDIVindependence; 1804 return true; 1805 } 1806 1807 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1808 1809 // since SCEV construction seems to normalize, LM = 0 1810 APInt SrcUM(Bits, 1, true); 1811 bool SrcUMvalid = false; 1812 // SrcUM is perhaps unavailable, let's check 1813 if (const SCEVConstant *UpperBound = 1814 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1815 SrcUM = UpperBound->getValue()->getValue(); 1816 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1817 SrcUMvalid = true; 1818 } 1819 1820 APInt DstUM(Bits, 1, true); 1821 bool DstUMvalid = false; 1822 // UM is perhaps unavailable, let's check 1823 if (const SCEVConstant *UpperBound = 1824 collectConstantUpperBound(DstLoop, Delta->getType())) { 1825 DstUM = UpperBound->getValue()->getValue(); 1826 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1827 DstUMvalid = true; 1828 } 1829 1830 APInt TU(APInt::getSignedMaxValue(Bits)); 1831 APInt TL(APInt::getSignedMinValue(Bits)); 1832 1833 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1834 APInt TMUL = BM.sdiv(G); 1835 if (TMUL.sgt(0)) { 1836 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1837 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1838 if (SrcUMvalid) { 1839 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1840 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1841 } 1842 } 1843 else { 1844 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1845 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1846 if (SrcUMvalid) { 1847 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1848 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1849 } 1850 } 1851 1852 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1853 TMUL = AM.sdiv(G); 1854 if (TMUL.sgt(0)) { 1855 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1856 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1857 if (DstUMvalid) { 1858 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1859 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1860 } 1861 } 1862 else { 1863 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1864 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1865 if (DstUMvalid) { 1866 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1867 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1868 } 1869 } 1870 if (TL.sgt(TU)) 1871 ++ExactRDIVindependence; 1872 return TL.sgt(TU); 1873 } 1874 1875 1876 // symbolicRDIVtest - 1877 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1878 // introduce a special case of Banerjee's Inequalities (also called the 1879 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1880 // particularly cases with symbolics. Since it's only able to disprove 1881 // dependence (not compute distances or directions), we'll use it as a 1882 // fall back for the other tests. 1883 // 1884 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1885 // where i and j are induction variables and c1 and c2 are loop invariants, 1886 // we can use the symbolic tests to disprove some dependences, serving as a 1887 // backup for the RDIV test. Note that i and j can be the same variable, 1888 // letting this test serve as a backup for the various SIV tests. 1889 // 1890 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1891 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1892 // loop bounds for the i and j loops, respectively. So, ... 1893 // 1894 // c1 + a1*i = c2 + a2*j 1895 // a1*i - a2*j = c2 - c1 1896 // 1897 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1898 // range of the maximum and minimum possible values of a1*i - a2*j. 1899 // Considering the signs of a1 and a2, we have 4 possible cases: 1900 // 1901 // 1) If a1 >= 0 and a2 >= 0, then 1902 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1903 // -a2*N2 <= c2 - c1 <= a1*N1 1904 // 1905 // 2) If a1 >= 0 and a2 <= 0, then 1906 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1907 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1908 // 1909 // 3) If a1 <= 0 and a2 >= 0, then 1910 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1911 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1912 // 1913 // 4) If a1 <= 0 and a2 <= 0, then 1914 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1915 // a1*N1 <= c2 - c1 <= -a2*N2 1916 // 1917 // return true if dependence disproved 1918 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1919 const SCEV *A2, 1920 const SCEV *C1, 1921 const SCEV *C2, 1922 const Loop *Loop1, 1923 const Loop *Loop2) const { 1924 ++SymbolicRDIVapplications; 1925 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1926 DEBUG(dbgs() << "\t A1 = " << *A1); 1927 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1928 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1929 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1930 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1931 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1932 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1933 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1934 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1935 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1936 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1937 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1938 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1939 if (SE->isKnownNonNegative(A1)) { 1940 if (SE->isKnownNonNegative(A2)) { 1941 // A1 >= 0 && A2 >= 0 1942 if (N1) { 1943 // make sure that c2 - c1 <= a1*N1 1944 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1945 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1946 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1947 ++SymbolicRDIVindependence; 1948 return true; 1949 } 1950 } 1951 if (N2) { 1952 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1953 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1954 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1955 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1956 ++SymbolicRDIVindependence; 1957 return true; 1958 } 1959 } 1960 } 1961 else if (SE->isKnownNonPositive(A2)) { 1962 // a1 >= 0 && a2 <= 0 1963 if (N1 && N2) { 1964 // make sure that c2 - c1 <= a1*N1 - a2*N2 1965 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1966 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1967 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1968 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1969 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1970 ++SymbolicRDIVindependence; 1971 return true; 1972 } 1973 } 1974 // make sure that 0 <= c2 - c1 1975 if (SE->isKnownNegative(C2_C1)) { 1976 ++SymbolicRDIVindependence; 1977 return true; 1978 } 1979 } 1980 } 1981 else if (SE->isKnownNonPositive(A1)) { 1982 if (SE->isKnownNonNegative(A2)) { 1983 // a1 <= 0 && a2 >= 0 1984 if (N1 && N2) { 1985 // make sure that a1*N1 - a2*N2 <= c2 - c1 1986 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1987 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1988 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1989 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1990 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 1991 ++SymbolicRDIVindependence; 1992 return true; 1993 } 1994 } 1995 // make sure that c2 - c1 <= 0 1996 if (SE->isKnownPositive(C2_C1)) { 1997 ++SymbolicRDIVindependence; 1998 return true; 1999 } 2000 } 2001 else if (SE->isKnownNonPositive(A2)) { 2002 // a1 <= 0 && a2 <= 0 2003 if (N1) { 2004 // make sure that a1*N1 <= c2 - c1 2005 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2006 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2007 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2008 ++SymbolicRDIVindependence; 2009 return true; 2010 } 2011 } 2012 if (N2) { 2013 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2014 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2015 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2016 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2017 ++SymbolicRDIVindependence; 2018 return true; 2019 } 2020 } 2021 } 2022 } 2023 return false; 2024 } 2025 2026 2027 // testSIV - 2028 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2029 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2030 // a2 are constant, we attack it with an SIV test. While they can all be 2031 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2032 // they apply; they're cheaper and sometimes more precise. 2033 // 2034 // Return true if dependence disproved. 2035 bool DependenceAnalysis::testSIV(const SCEV *Src, 2036 const SCEV *Dst, 2037 unsigned &Level, 2038 FullDependence &Result, 2039 Constraint &NewConstraint, 2040 const SCEV *&SplitIter) const { 2041 DEBUG(dbgs() << " src = " << *Src << "\n"); 2042 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2043 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2044 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2045 if (SrcAddRec && DstAddRec) { 2046 const SCEV *SrcConst = SrcAddRec->getStart(); 2047 const SCEV *DstConst = DstAddRec->getStart(); 2048 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2049 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2050 const Loop *CurLoop = SrcAddRec->getLoop(); 2051 assert(CurLoop == DstAddRec->getLoop() && 2052 "both loops in SIV should be same"); 2053 Level = mapSrcLoop(CurLoop); 2054 bool disproven; 2055 if (SrcCoeff == DstCoeff) 2056 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2057 Level, Result, NewConstraint); 2058 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2059 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2060 Level, Result, NewConstraint, SplitIter); 2061 else 2062 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2063 Level, Result, NewConstraint); 2064 return disproven || 2065 gcdMIVtest(Src, Dst, Result) || 2066 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2067 } 2068 if (SrcAddRec) { 2069 const SCEV *SrcConst = SrcAddRec->getStart(); 2070 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2071 const SCEV *DstConst = Dst; 2072 const Loop *CurLoop = SrcAddRec->getLoop(); 2073 Level = mapSrcLoop(CurLoop); 2074 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2075 Level, Result, NewConstraint) || 2076 gcdMIVtest(Src, Dst, Result); 2077 } 2078 if (DstAddRec) { 2079 const SCEV *DstConst = DstAddRec->getStart(); 2080 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2081 const SCEV *SrcConst = Src; 2082 const Loop *CurLoop = DstAddRec->getLoop(); 2083 Level = mapDstLoop(CurLoop); 2084 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2085 CurLoop, Level, Result, NewConstraint) || 2086 gcdMIVtest(Src, Dst, Result); 2087 } 2088 llvm_unreachable("SIV test expected at least one AddRec"); 2089 return false; 2090 } 2091 2092 2093 // testRDIV - 2094 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2095 // where i and j are induction variables, c1 and c2 are loop invariant, 2096 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2097 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2098 // It doesn't make sense to talk about distance or direction in this case, 2099 // so there's no point in making special versions of the Strong SIV test or 2100 // the Weak-crossing SIV test. 2101 // 2102 // With minor algebra, this test can also be used for things like 2103 // [c1 + a1*i + a2*j][c2]. 2104 // 2105 // Return true if dependence disproved. 2106 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2107 const SCEV *Dst, 2108 FullDependence &Result) const { 2109 // we have 3 possible situations here: 2110 // 1) [a*i + b] and [c*j + d] 2111 // 2) [a*i + c*j + b] and [d] 2112 // 3) [b] and [a*i + c*j + d] 2113 // We need to find what we've got and get organized 2114 2115 const SCEV *SrcConst, *DstConst; 2116 const SCEV *SrcCoeff, *DstCoeff; 2117 const Loop *SrcLoop, *DstLoop; 2118 2119 DEBUG(dbgs() << " src = " << *Src << "\n"); 2120 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2121 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2122 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2123 if (SrcAddRec && DstAddRec) { 2124 SrcConst = SrcAddRec->getStart(); 2125 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2126 SrcLoop = SrcAddRec->getLoop(); 2127 DstConst = DstAddRec->getStart(); 2128 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2129 DstLoop = DstAddRec->getLoop(); 2130 } 2131 else if (SrcAddRec) { 2132 if (const SCEVAddRecExpr *tmpAddRec = 2133 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2134 SrcConst = tmpAddRec->getStart(); 2135 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2136 SrcLoop = tmpAddRec->getLoop(); 2137 DstConst = Dst; 2138 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2139 DstLoop = SrcAddRec->getLoop(); 2140 } 2141 else 2142 llvm_unreachable("RDIV reached by surprising SCEVs"); 2143 } 2144 else if (DstAddRec) { 2145 if (const SCEVAddRecExpr *tmpAddRec = 2146 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2147 DstConst = tmpAddRec->getStart(); 2148 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2149 DstLoop = tmpAddRec->getLoop(); 2150 SrcConst = Src; 2151 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2152 SrcLoop = DstAddRec->getLoop(); 2153 } 2154 else 2155 llvm_unreachable("RDIV reached by surprising SCEVs"); 2156 } 2157 else 2158 llvm_unreachable("RDIV expected at least one AddRec"); 2159 return exactRDIVtest(SrcCoeff, DstCoeff, 2160 SrcConst, DstConst, 2161 SrcLoop, DstLoop, 2162 Result) || 2163 gcdMIVtest(Src, Dst, Result) || 2164 symbolicRDIVtest(SrcCoeff, DstCoeff, 2165 SrcConst, DstConst, 2166 SrcLoop, DstLoop); 2167 } 2168 2169 2170 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2171 // Return true if dependence disproved. 2172 // Can sometimes refine direction vectors. 2173 bool DependenceAnalysis::testMIV(const SCEV *Src, 2174 const SCEV *Dst, 2175 const SmallBitVector &Loops, 2176 FullDependence &Result) const { 2177 DEBUG(dbgs() << " src = " << *Src << "\n"); 2178 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2179 Result.Consistent = false; 2180 return gcdMIVtest(Src, Dst, Result) || 2181 banerjeeMIVtest(Src, Dst, Loops, Result); 2182 } 2183 2184 2185 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2186 // in this case 10. If there is no constant part, returns NULL. 2187 static 2188 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2189 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2190 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2191 return Constant; 2192 } 2193 return NULL; 2194 } 2195 2196 2197 //===----------------------------------------------------------------------===// 2198 // gcdMIVtest - 2199 // Tests an MIV subscript pair for dependence. 2200 // Returns true if any possible dependence is disproved. 2201 // Marks the result as inconsistant. 2202 // Can sometimes disprove the equal direction for 1 or more loops, 2203 // as discussed in Michael Wolfe's book, 2204 // High Performance Compilers for Parallel Computing, page 235. 2205 // 2206 // We spend some effort (code!) to handle cases like 2207 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2208 // but M and N are just loop-invariant variables. 2209 // This should help us handle linearized subscripts; 2210 // also makes this test a useful backup to the various SIV tests. 2211 // 2212 // It occurs to me that the presence of loop-invariant variables 2213 // changes the nature of the test from "greatest common divisor" 2214 // to "a common divisor!" 2215 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2216 const SCEV *Dst, 2217 FullDependence &Result) const { 2218 DEBUG(dbgs() << "starting gcd\n"); 2219 ++GCDapplications; 2220 unsigned BitWidth = Src->getType()->getIntegerBitWidth(); 2221 APInt RunningGCD = APInt::getNullValue(BitWidth); 2222 2223 // Examine Src coefficients. 2224 // Compute running GCD and record source constant. 2225 // Because we're looking for the constant at the end of the chain, 2226 // we can't quit the loop just because the GCD == 1. 2227 const SCEV *Coefficients = Src; 2228 while (const SCEVAddRecExpr *AddRec = 2229 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2230 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2231 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2232 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2233 // If the coefficient is the product of a constant and other stuff, 2234 // we can use the constant in the GCD computation. 2235 Constant = getConstantPart(Product); 2236 if (!Constant) 2237 return false; 2238 APInt ConstCoeff = Constant->getValue()->getValue(); 2239 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2240 Coefficients = AddRec->getStart(); 2241 } 2242 const SCEV *SrcConst = Coefficients; 2243 2244 // Examine Dst coefficients. 2245 // Compute running GCD and record destination constant. 2246 // Because we're looking for the constant at the end of the chain, 2247 // we can't quit the loop just because the GCD == 1. 2248 Coefficients = Dst; 2249 while (const SCEVAddRecExpr *AddRec = 2250 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2251 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2252 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2253 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2254 // If the coefficient is the product of a constant and other stuff, 2255 // we can use the constant in the GCD computation. 2256 Constant = getConstantPart(Product); 2257 if (!Constant) 2258 return false; 2259 APInt ConstCoeff = Constant->getValue()->getValue(); 2260 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2261 Coefficients = AddRec->getStart(); 2262 } 2263 const SCEV *DstConst = Coefficients; 2264 2265 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2266 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2267 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2268 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2269 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2270 // If Delta is a sum of products, we may be able to make further progress. 2271 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2272 const SCEV *Operand = Sum->getOperand(Op); 2273 if (isa<SCEVConstant>(Operand)) { 2274 assert(!Constant && "Surprised to find multiple constants"); 2275 Constant = cast<SCEVConstant>(Operand); 2276 } 2277 else if (isa<SCEVMulExpr>(Operand)) { 2278 // Search for constant operand to participate in GCD; 2279 // If none found; return false. 2280 const SCEVConstant *ConstOp = 2281 getConstantPart(cast<SCEVMulExpr>(Operand)); 2282 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2283 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2284 ConstOpValue.abs()); 2285 } 2286 else 2287 return false; 2288 } 2289 } 2290 if (!Constant) 2291 return false; 2292 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2293 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2294 if (ConstDelta == 0) 2295 return false; 2296 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2297 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2298 APInt Remainder = ConstDelta.srem(RunningGCD); 2299 if (Remainder != 0) { 2300 ++GCDindependence; 2301 return true; 2302 } 2303 2304 // Try to disprove equal directions. 2305 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2306 // the code above can't disprove the dependence because the GCD = 1. 2307 // So we consider what happen if i = i' and what happens if j = j'. 2308 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2309 // which is infeasible, so we can disallow the = direction for the i level. 2310 // Setting j = j' doesn't help matters, so we end up with a direction vector 2311 // of [<>, *] 2312 // 2313 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2314 // we need to remember that the constant part is 5 and the RunningGCD should 2315 // be initialized to ExtraGCD = 30. 2316 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2317 2318 bool Improved = false; 2319 Coefficients = Src; 2320 while (const SCEVAddRecExpr *AddRec = 2321 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2322 Coefficients = AddRec->getStart(); 2323 const Loop *CurLoop = AddRec->getLoop(); 2324 RunningGCD = ExtraGCD; 2325 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2326 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2327 const SCEV *Inner = Src; 2328 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2329 AddRec = cast<SCEVAddRecExpr>(Inner); 2330 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2331 if (CurLoop == AddRec->getLoop()) 2332 ; // SrcCoeff == Coeff 2333 else { 2334 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2335 // If the coefficient is the product of a constant and other stuff, 2336 // we can use the constant in the GCD computation. 2337 Constant = getConstantPart(Product); 2338 else 2339 Constant = cast<SCEVConstant>(Coeff); 2340 APInt ConstCoeff = Constant->getValue()->getValue(); 2341 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2342 } 2343 Inner = AddRec->getStart(); 2344 } 2345 Inner = Dst; 2346 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2347 AddRec = cast<SCEVAddRecExpr>(Inner); 2348 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2349 if (CurLoop == AddRec->getLoop()) 2350 DstCoeff = Coeff; 2351 else { 2352 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2353 // If the coefficient is the product of a constant and other stuff, 2354 // we can use the constant in the GCD computation. 2355 Constant = getConstantPart(Product); 2356 else 2357 Constant = cast<SCEVConstant>(Coeff); 2358 APInt ConstCoeff = Constant->getValue()->getValue(); 2359 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2360 } 2361 Inner = AddRec->getStart(); 2362 } 2363 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2364 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2365 // If the coefficient is the product of a constant and other stuff, 2366 // we can use the constant in the GCD computation. 2367 Constant = getConstantPart(Product); 2368 else if (isa<SCEVConstant>(Delta)) 2369 Constant = cast<SCEVConstant>(Delta); 2370 else { 2371 // The difference of the two coefficients might not be a product 2372 // or constant, in which case we give up on this direction. 2373 continue; 2374 } 2375 APInt ConstCoeff = Constant->getValue()->getValue(); 2376 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2377 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2378 if (RunningGCD != 0) { 2379 Remainder = ConstDelta.srem(RunningGCD); 2380 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2381 if (Remainder != 0) { 2382 unsigned Level = mapSrcLoop(CurLoop); 2383 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2384 Improved = true; 2385 } 2386 } 2387 } 2388 if (Improved) 2389 ++GCDsuccesses; 2390 DEBUG(dbgs() << "all done\n"); 2391 return false; 2392 } 2393 2394 2395 //===----------------------------------------------------------------------===// 2396 // banerjeeMIVtest - 2397 // Use Banerjee's Inequalities to test an MIV subscript pair. 2398 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2399 // Generally follows the discussion in Section 2.5.2 of 2400 // 2401 // Optimizing Supercompilers for Supercomputers 2402 // Michael Wolfe 2403 // 2404 // The inequalities given on page 25 are simplified in that loops are 2405 // normalized so that the lower bound is always 0 and the stride is always 1. 2406 // For example, Wolfe gives 2407 // 2408 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2409 // 2410 // where A_k is the coefficient of the kth index in the source subscript, 2411 // B_k is the coefficient of the kth index in the destination subscript, 2412 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2413 // index, and N_k is the stride of the kth index. Since all loops are normalized 2414 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2415 // equation to 2416 // 2417 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2418 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2419 // 2420 // Similar simplifications are possible for the other equations. 2421 // 2422 // When we can't determine the number of iterations for a loop, 2423 // we use NULL as an indicator for the worst case, infinity. 2424 // When computing the upper bound, NULL denotes +inf; 2425 // for the lower bound, NULL denotes -inf. 2426 // 2427 // Return true if dependence disproved. 2428 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2429 const SCEV *Dst, 2430 const SmallBitVector &Loops, 2431 FullDependence &Result) const { 2432 DEBUG(dbgs() << "starting Banerjee\n"); 2433 ++BanerjeeApplications; 2434 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2435 const SCEV *A0; 2436 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2437 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2438 const SCEV *B0; 2439 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2440 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2441 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2442 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2443 2444 // Compute bounds for all the * directions. 2445 DEBUG(dbgs() << "\tBounds[*]\n"); 2446 for (unsigned K = 1; K <= MaxLevels; ++K) { 2447 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2448 Bound[K].Direction = Dependence::DVEntry::ALL; 2449 Bound[K].DirSet = Dependence::DVEntry::NONE; 2450 findBoundsALL(A, B, Bound, K); 2451 #ifndef NDEBUG 2452 DEBUG(dbgs() << "\t " << K << '\t'); 2453 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2454 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2455 else 2456 DEBUG(dbgs() << "-inf\t"); 2457 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2458 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2459 else 2460 DEBUG(dbgs() << "+inf\n"); 2461 #endif 2462 } 2463 2464 // Test the *, *, *, ... case. 2465 bool Disproved = false; 2466 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2467 // Explore the direction vector hierarchy. 2468 unsigned DepthExpanded = 0; 2469 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2470 Loops, DepthExpanded, Delta); 2471 if (NewDeps > 0) { 2472 bool Improved = false; 2473 for (unsigned K = 1; K <= CommonLevels; ++K) { 2474 if (Loops[K]) { 2475 unsigned Old = Result.DV[K - 1].Direction; 2476 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2477 Improved |= Old != Result.DV[K - 1].Direction; 2478 if (!Result.DV[K - 1].Direction) { 2479 Improved = false; 2480 Disproved = true; 2481 break; 2482 } 2483 } 2484 } 2485 if (Improved) 2486 ++BanerjeeSuccesses; 2487 } 2488 else { 2489 ++BanerjeeIndependence; 2490 Disproved = true; 2491 } 2492 } 2493 else { 2494 ++BanerjeeIndependence; 2495 Disproved = true; 2496 } 2497 delete [] Bound; 2498 delete [] A; 2499 delete [] B; 2500 return Disproved; 2501 } 2502 2503 2504 // Hierarchically expands the direction vector 2505 // search space, combining the directions of discovered dependences 2506 // in the DirSet field of Bound. Returns the number of distinct 2507 // dependences discovered. If the dependence is disproved, 2508 // it will return 0. 2509 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2510 CoefficientInfo *A, 2511 CoefficientInfo *B, 2512 BoundInfo *Bound, 2513 const SmallBitVector &Loops, 2514 unsigned &DepthExpanded, 2515 const SCEV *Delta) const { 2516 if (Level > CommonLevels) { 2517 // record result 2518 DEBUG(dbgs() << "\t["); 2519 for (unsigned K = 1; K <= CommonLevels; ++K) { 2520 if (Loops[K]) { 2521 Bound[K].DirSet |= Bound[K].Direction; 2522 #ifndef NDEBUG 2523 switch (Bound[K].Direction) { 2524 case Dependence::DVEntry::LT: 2525 DEBUG(dbgs() << " <"); 2526 break; 2527 case Dependence::DVEntry::EQ: 2528 DEBUG(dbgs() << " ="); 2529 break; 2530 case Dependence::DVEntry::GT: 2531 DEBUG(dbgs() << " >"); 2532 break; 2533 case Dependence::DVEntry::ALL: 2534 DEBUG(dbgs() << " *"); 2535 break; 2536 default: 2537 llvm_unreachable("unexpected Bound[K].Direction"); 2538 } 2539 #endif 2540 } 2541 } 2542 DEBUG(dbgs() << " ]\n"); 2543 return 1; 2544 } 2545 if (Loops[Level]) { 2546 if (Level > DepthExpanded) { 2547 DepthExpanded = Level; 2548 // compute bounds for <, =, > at current level 2549 findBoundsLT(A, B, Bound, Level); 2550 findBoundsGT(A, B, Bound, Level); 2551 findBoundsEQ(A, B, Bound, Level); 2552 #ifndef NDEBUG 2553 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2554 DEBUG(dbgs() << "\t <\t"); 2555 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2556 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2557 else 2558 DEBUG(dbgs() << "-inf\t"); 2559 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2560 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2561 else 2562 DEBUG(dbgs() << "+inf\n"); 2563 DEBUG(dbgs() << "\t =\t"); 2564 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2565 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2566 else 2567 DEBUG(dbgs() << "-inf\t"); 2568 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2569 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2570 else 2571 DEBUG(dbgs() << "+inf\n"); 2572 DEBUG(dbgs() << "\t >\t"); 2573 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2574 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2575 else 2576 DEBUG(dbgs() << "-inf\t"); 2577 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2578 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2579 else 2580 DEBUG(dbgs() << "+inf\n"); 2581 #endif 2582 } 2583 2584 unsigned NewDeps = 0; 2585 2586 // test bounds for <, *, *, ... 2587 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2588 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2589 Loops, DepthExpanded, Delta); 2590 2591 // Test bounds for =, *, *, ... 2592 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2593 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2594 Loops, DepthExpanded, Delta); 2595 2596 // test bounds for >, *, *, ... 2597 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2598 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2599 Loops, DepthExpanded, Delta); 2600 2601 Bound[Level].Direction = Dependence::DVEntry::ALL; 2602 return NewDeps; 2603 } 2604 else 2605 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2606 } 2607 2608 2609 // Returns true iff the current bounds are plausible. 2610 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2611 unsigned Level, 2612 BoundInfo *Bound, 2613 const SCEV *Delta) const { 2614 Bound[Level].Direction = DirKind; 2615 if (const SCEV *LowerBound = getLowerBound(Bound)) 2616 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2617 return false; 2618 if (const SCEV *UpperBound = getUpperBound(Bound)) 2619 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2620 return false; 2621 return true; 2622 } 2623 2624 2625 // Computes the upper and lower bounds for level K 2626 // using the * direction. Records them in Bound. 2627 // Wolfe gives the equations 2628 // 2629 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2630 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2631 // 2632 // Since we normalize loops, we can simplify these equations to 2633 // 2634 // LB^*_k = (A^-_k - B^+_k)U_k 2635 // UB^*_k = (A^+_k - B^-_k)U_k 2636 // 2637 // We must be careful to handle the case where the upper bound is unknown. 2638 // Note that the lower bound is always <= 0 2639 // and the upper bound is always >= 0. 2640 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2641 CoefficientInfo *B, 2642 BoundInfo *Bound, 2643 unsigned K) const { 2644 Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. 2645 Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. 2646 if (Bound[K].Iterations) { 2647 Bound[K].Lower[Dependence::DVEntry::ALL] = 2648 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2649 Bound[K].Iterations); 2650 Bound[K].Upper[Dependence::DVEntry::ALL] = 2651 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2652 Bound[K].Iterations); 2653 } 2654 else { 2655 // If the difference is 0, we won't need to know the number of iterations. 2656 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2657 Bound[K].Lower[Dependence::DVEntry::ALL] = 2658 SE->getConstant(A[K].Coeff->getType(), 0); 2659 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2660 Bound[K].Upper[Dependence::DVEntry::ALL] = 2661 SE->getConstant(A[K].Coeff->getType(), 0); 2662 } 2663 } 2664 2665 2666 // Computes the upper and lower bounds for level K 2667 // using the = direction. Records them in Bound. 2668 // Wolfe gives the equations 2669 // 2670 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2671 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2672 // 2673 // Since we normalize loops, we can simplify these equations to 2674 // 2675 // LB^=_k = (A_k - B_k)^- U_k 2676 // UB^=_k = (A_k - B_k)^+ U_k 2677 // 2678 // We must be careful to handle the case where the upper bound is unknown. 2679 // Note that the lower bound is always <= 0 2680 // and the upper bound is always >= 0. 2681 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2682 CoefficientInfo *B, 2683 BoundInfo *Bound, 2684 unsigned K) const { 2685 Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. 2686 Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. 2687 if (Bound[K].Iterations) { 2688 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2689 const SCEV *NegativePart = getNegativePart(Delta); 2690 Bound[K].Lower[Dependence::DVEntry::EQ] = 2691 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2692 const SCEV *PositivePart = getPositivePart(Delta); 2693 Bound[K].Upper[Dependence::DVEntry::EQ] = 2694 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2695 } 2696 else { 2697 // If the positive/negative part of the difference is 0, 2698 // we won't need to know the number of iterations. 2699 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2700 const SCEV *NegativePart = getNegativePart(Delta); 2701 if (NegativePart->isZero()) 2702 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2703 const SCEV *PositivePart = getPositivePart(Delta); 2704 if (PositivePart->isZero()) 2705 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2706 } 2707 } 2708 2709 2710 // Computes the upper and lower bounds for level K 2711 // using the < direction. Records them in Bound. 2712 // Wolfe gives the equations 2713 // 2714 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2715 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2716 // 2717 // Since we normalize loops, we can simplify these equations to 2718 // 2719 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2720 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2721 // 2722 // We must be careful to handle the case where the upper bound is unknown. 2723 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2724 CoefficientInfo *B, 2725 BoundInfo *Bound, 2726 unsigned K) const { 2727 Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. 2728 Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. 2729 if (Bound[K].Iterations) { 2730 const SCEV *Iter_1 = 2731 SE->getMinusSCEV(Bound[K].Iterations, 2732 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2733 const SCEV *NegPart = 2734 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2735 Bound[K].Lower[Dependence::DVEntry::LT] = 2736 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2737 const SCEV *PosPart = 2738 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2739 Bound[K].Upper[Dependence::DVEntry::LT] = 2740 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2741 } 2742 else { 2743 // If the positive/negative part of the difference is 0, 2744 // we won't need to know the number of iterations. 2745 const SCEV *NegPart = 2746 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2747 if (NegPart->isZero()) 2748 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2749 const SCEV *PosPart = 2750 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2751 if (PosPart->isZero()) 2752 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2753 } 2754 } 2755 2756 2757 // Computes the upper and lower bounds for level K 2758 // using the > direction. Records them in Bound. 2759 // Wolfe gives the equations 2760 // 2761 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2762 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2763 // 2764 // Since we normalize loops, we can simplify these equations to 2765 // 2766 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2767 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2768 // 2769 // We must be careful to handle the case where the upper bound is unknown. 2770 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2771 CoefficientInfo *B, 2772 BoundInfo *Bound, 2773 unsigned K) const { 2774 Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. 2775 Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. 2776 if (Bound[K].Iterations) { 2777 const SCEV *Iter_1 = 2778 SE->getMinusSCEV(Bound[K].Iterations, 2779 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2780 const SCEV *NegPart = 2781 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2782 Bound[K].Lower[Dependence::DVEntry::GT] = 2783 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2784 const SCEV *PosPart = 2785 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2786 Bound[K].Upper[Dependence::DVEntry::GT] = 2787 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2788 } 2789 else { 2790 // If the positive/negative part of the difference is 0, 2791 // we won't need to know the number of iterations. 2792 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2793 if (NegPart->isZero()) 2794 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2795 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2796 if (PosPart->isZero()) 2797 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2798 } 2799 } 2800 2801 2802 // X^+ = max(X, 0) 2803 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2804 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2805 } 2806 2807 2808 // X^- = min(X, 0) 2809 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2810 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2811 } 2812 2813 2814 // Walks through the subscript, 2815 // collecting each coefficient, the associated loop bounds, 2816 // and recording its positive and negative parts for later use. 2817 DependenceAnalysis::CoefficientInfo * 2818 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2819 bool SrcFlag, 2820 const SCEV *&Constant) const { 2821 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2822 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2823 for (unsigned K = 1; K <= MaxLevels; ++K) { 2824 CI[K].Coeff = Zero; 2825 CI[K].PosPart = Zero; 2826 CI[K].NegPart = Zero; 2827 CI[K].Iterations = NULL; 2828 } 2829 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2830 const Loop *L = AddRec->getLoop(); 2831 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2832 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2833 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2834 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2835 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2836 Subscript = AddRec->getStart(); 2837 } 2838 Constant = Subscript; 2839 #ifndef NDEBUG 2840 DEBUG(dbgs() << "\tCoefficient Info\n"); 2841 for (unsigned K = 1; K <= MaxLevels; ++K) { 2842 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2843 DEBUG(dbgs() << "\tPos Part = "); 2844 DEBUG(dbgs() << *CI[K].PosPart); 2845 DEBUG(dbgs() << "\tNeg Part = "); 2846 DEBUG(dbgs() << *CI[K].NegPart); 2847 DEBUG(dbgs() << "\tUpper Bound = "); 2848 if (CI[K].Iterations) 2849 DEBUG(dbgs() << *CI[K].Iterations); 2850 else 2851 DEBUG(dbgs() << "+inf"); 2852 DEBUG(dbgs() << '\n'); 2853 } 2854 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2855 #endif 2856 return CI; 2857 } 2858 2859 2860 // Looks through all the bounds info and 2861 // computes the lower bound given the current direction settings 2862 // at each level. If the lower bound for any level is -inf, 2863 // the result is -inf. 2864 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2865 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2866 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2867 if (Bound[K].Lower[Bound[K].Direction]) 2868 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2869 else 2870 Sum = NULL; 2871 } 2872 return Sum; 2873 } 2874 2875 2876 // Looks through all the bounds info and 2877 // computes the upper bound given the current direction settings 2878 // at each level. If the upper bound at any level is +inf, 2879 // the result is +inf. 2880 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2881 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2882 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2883 if (Bound[K].Upper[Bound[K].Direction]) 2884 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2885 else 2886 Sum = NULL; 2887 } 2888 return Sum; 2889 } 2890 2891 2892 //===----------------------------------------------------------------------===// 2893 // Constraint manipulation for Delta test. 2894 2895 // Given a linear SCEV, 2896 // return the coefficient (the step) 2897 // corresponding to the specified loop. 2898 // If there isn't one, return 0. 2899 // For example, given a*i + b*j + c*k, zeroing the coefficient 2900 // corresponding to the j loop would yield b. 2901 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2902 const Loop *TargetLoop) const { 2903 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2904 if (!AddRec) 2905 return SE->getConstant(Expr->getType(), 0); 2906 if (AddRec->getLoop() == TargetLoop) 2907 return AddRec->getStepRecurrence(*SE); 2908 return findCoefficient(AddRec->getStart(), TargetLoop); 2909 } 2910 2911 2912 // Given a linear SCEV, 2913 // return the SCEV given by zeroing out the coefficient 2914 // corresponding to the specified loop. 2915 // For example, given a*i + b*j + c*k, zeroing the coefficient 2916 // corresponding to the j loop would yield a*i + c*k. 2917 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2918 const Loop *TargetLoop) const { 2919 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2920 if (!AddRec) 2921 return Expr; // ignore 2922 if (AddRec->getLoop() == TargetLoop) 2923 return AddRec->getStart(); 2924 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2925 AddRec->getStepRecurrence(*SE), 2926 AddRec->getLoop(), 2927 AddRec->getNoWrapFlags()); 2928 } 2929 2930 2931 // Given a linear SCEV Expr, 2932 // return the SCEV given by adding some Value to the 2933 // coefficient corresponding to the specified TargetLoop. 2934 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2935 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2936 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2937 const Loop *TargetLoop, 2938 const SCEV *Value) const { 2939 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2940 if (!AddRec) // create a new addRec 2941 return SE->getAddRecExpr(Expr, 2942 Value, 2943 TargetLoop, 2944 SCEV::FlagAnyWrap); // Worst case, with no info. 2945 if (AddRec->getLoop() == TargetLoop) { 2946 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2947 if (Sum->isZero()) 2948 return AddRec->getStart(); 2949 return SE->getAddRecExpr(AddRec->getStart(), 2950 Sum, 2951 AddRec->getLoop(), 2952 AddRec->getNoWrapFlags()); 2953 } 2954 return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), 2955 TargetLoop, Value), 2956 AddRec->getStepRecurrence(*SE), 2957 AddRec->getLoop(), 2958 AddRec->getNoWrapFlags()); 2959 } 2960 2961 2962 // Review the constraints, looking for opportunities 2963 // to simplify a subscript pair (Src and Dst). 2964 // Return true if some simplification occurs. 2965 // If the simplification isn't exact (that is, if it is conservative 2966 // in terms of dependence), set consistent to false. 2967 // Corresponds to Figure 5 from the paper 2968 // 2969 // Practical Dependence Testing 2970 // Goff, Kennedy, Tseng 2971 // PLDI 1991 2972 bool DependenceAnalysis::propagate(const SCEV *&Src, 2973 const SCEV *&Dst, 2974 SmallBitVector &Loops, 2975 SmallVector<Constraint, 4> &Constraints, 2976 bool &Consistent) { 2977 bool Result = false; 2978 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 2979 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2980 DEBUG(Constraints[LI].dump(dbgs())); 2981 if (Constraints[LI].isDistance()) 2982 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2983 else if (Constraints[LI].isLine()) 2984 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2985 else if (Constraints[LI].isPoint()) 2986 Result |= propagatePoint(Src, Dst, Constraints[LI]); 2987 } 2988 return Result; 2989 } 2990 2991 2992 // Attempt to propagate a distance 2993 // constraint into a subscript pair (Src and Dst). 2994 // Return true if some simplification occurs. 2995 // If the simplification isn't exact (that is, if it is conservative 2996 // in terms of dependence), set consistent to false. 2997 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 2998 const SCEV *&Dst, 2999 Constraint &CurConstraint, 3000 bool &Consistent) { 3001 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3002 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3003 const SCEV *A_K = findCoefficient(Src, CurLoop); 3004 if (A_K->isZero()) 3005 return false; 3006 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3007 Src = SE->getMinusSCEV(Src, DA_K); 3008 Src = zeroCoefficient(Src, CurLoop); 3009 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3010 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3011 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3012 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3013 if (!findCoefficient(Dst, CurLoop)->isZero()) 3014 Consistent = false; 3015 return true; 3016 } 3017 3018 3019 // Attempt to propagate a line 3020 // constraint into a subscript pair (Src and Dst). 3021 // Return true if some simplification occurs. 3022 // If the simplification isn't exact (that is, if it is conservative 3023 // in terms of dependence), set consistent to false. 3024 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3025 const SCEV *&Dst, 3026 Constraint &CurConstraint, 3027 bool &Consistent) { 3028 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3029 const SCEV *A = CurConstraint.getA(); 3030 const SCEV *B = CurConstraint.getB(); 3031 const SCEV *C = CurConstraint.getC(); 3032 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3033 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3034 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3035 if (A->isZero()) { 3036 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3037 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3038 if (!Bconst || !Cconst) return false; 3039 APInt Beta = Bconst->getValue()->getValue(); 3040 APInt Charlie = Cconst->getValue()->getValue(); 3041 APInt CdivB = Charlie.sdiv(Beta); 3042 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3043 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3044 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3045 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3046 Dst = zeroCoefficient(Dst, CurLoop); 3047 if (!findCoefficient(Src, CurLoop)->isZero()) 3048 Consistent = false; 3049 } 3050 else if (B->isZero()) { 3051 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3052 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3053 if (!Aconst || !Cconst) return false; 3054 APInt Alpha = Aconst->getValue()->getValue(); 3055 APInt Charlie = Cconst->getValue()->getValue(); 3056 APInt CdivA = Charlie.sdiv(Alpha); 3057 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3058 const SCEV *A_K = findCoefficient(Src, CurLoop); 3059 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3060 Src = zeroCoefficient(Src, CurLoop); 3061 if (!findCoefficient(Dst, CurLoop)->isZero()) 3062 Consistent = false; 3063 } 3064 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3065 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3066 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3067 if (!Aconst || !Cconst) return false; 3068 APInt Alpha = Aconst->getValue()->getValue(); 3069 APInt Charlie = Cconst->getValue()->getValue(); 3070 APInt CdivA = Charlie.sdiv(Alpha); 3071 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3072 const SCEV *A_K = findCoefficient(Src, CurLoop); 3073 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3074 Src = zeroCoefficient(Src, CurLoop); 3075 Dst = addToCoefficient(Dst, CurLoop, A_K); 3076 if (!findCoefficient(Dst, CurLoop)->isZero()) 3077 Consistent = false; 3078 } 3079 else { 3080 // paper is incorrect here, or perhaps just misleading 3081 const SCEV *A_K = findCoefficient(Src, CurLoop); 3082 Src = SE->getMulExpr(Src, A); 3083 Dst = SE->getMulExpr(Dst, A); 3084 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3085 Src = zeroCoefficient(Src, CurLoop); 3086 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3087 if (!findCoefficient(Dst, CurLoop)->isZero()) 3088 Consistent = false; 3089 } 3090 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3091 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3092 return true; 3093 } 3094 3095 3096 // Attempt to propagate a point 3097 // constraint into a subscript pair (Src and Dst). 3098 // Return true if some simplification occurs. 3099 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3100 const SCEV *&Dst, 3101 Constraint &CurConstraint) { 3102 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3103 const SCEV *A_K = findCoefficient(Src, CurLoop); 3104 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3105 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3106 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3107 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3108 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3109 Src = zeroCoefficient(Src, CurLoop); 3110 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3111 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3112 Dst = zeroCoefficient(Dst, CurLoop); 3113 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3114 return true; 3115 } 3116 3117 3118 // Update direction vector entry based on the current constraint. 3119 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3120 const Constraint &CurConstraint 3121 ) const { 3122 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3123 DEBUG(CurConstraint.dump(dbgs())); 3124 if (CurConstraint.isAny()) 3125 ; // use defaults 3126 else if (CurConstraint.isDistance()) { 3127 // this one is consistent, the others aren't 3128 Level.Scalar = false; 3129 Level.Distance = CurConstraint.getD(); 3130 unsigned NewDirection = Dependence::DVEntry::NONE; 3131 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3132 NewDirection = Dependence::DVEntry::EQ; 3133 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3134 NewDirection |= Dependence::DVEntry::LT; 3135 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3136 NewDirection |= Dependence::DVEntry::GT; 3137 Level.Direction &= NewDirection; 3138 } 3139 else if (CurConstraint.isLine()) { 3140 Level.Scalar = false; 3141 Level.Distance = NULL; 3142 // direction should be accurate 3143 } 3144 else if (CurConstraint.isPoint()) { 3145 Level.Scalar = false; 3146 Level.Distance = NULL; 3147 unsigned NewDirection = Dependence::DVEntry::NONE; 3148 if (!isKnownPredicate(CmpInst::ICMP_NE, 3149 CurConstraint.getY(), 3150 CurConstraint.getX())) 3151 // if X may be = Y 3152 NewDirection |= Dependence::DVEntry::EQ; 3153 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3154 CurConstraint.getY(), 3155 CurConstraint.getX())) 3156 // if Y may be > X 3157 NewDirection |= Dependence::DVEntry::LT; 3158 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3159 CurConstraint.getY(), 3160 CurConstraint.getX())) 3161 // if Y may be < X 3162 NewDirection |= Dependence::DVEntry::GT; 3163 Level.Direction &= NewDirection; 3164 } 3165 else 3166 llvm_unreachable("constraint has unexpected kind"); 3167 } 3168 3169 3170 //===----------------------------------------------------------------------===// 3171 3172 #ifndef NDEBUG 3173 // For debugging purposes, dump a small bit vector to dbgs(). 3174 static void dumpSmallBitVector(SmallBitVector &BV) { 3175 dbgs() << "{"; 3176 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3177 dbgs() << VI; 3178 if (BV.find_next(VI) >= 0) 3179 dbgs() << ' '; 3180 } 3181 dbgs() << "}\n"; 3182 } 3183 #endif 3184 3185 3186 // depends - 3187 // Returns NULL if there is no dependence. 3188 // Otherwise, return a Dependence with as many details as possible. 3189 // Corresponds to Section 3.1 in the paper 3190 // 3191 // Practical Dependence Testing 3192 // Goff, Kennedy, Tseng 3193 // PLDI 1991 3194 // 3195 // Care is required to keep the code below up to date w.r.t. this routine. 3196 Dependence *DependenceAnalysis::depends(const Instruction *Src, 3197 const Instruction *Dst, 3198 bool PossiblyLoopIndependent) { 3199 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3200 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3201 // if both instructions don't reference memory, there's no dependence 3202 return NULL; 3203 3204 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) 3205 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3206 return new Dependence(Src, Dst); 3207 3208 const Value *SrcPtr = getPointerOperand(Src); 3209 const Value *DstPtr = getPointerOperand(Dst); 3210 3211 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3212 case AliasAnalysis::MayAlias: 3213 case AliasAnalysis::PartialAlias: 3214 // cannot analyse objects if we don't understand their aliasing. 3215 return new Dependence(Src, Dst); 3216 case AliasAnalysis::NoAlias: 3217 // If the objects noalias, they are distinct, accesses are independent. 3218 return NULL; 3219 case AliasAnalysis::MustAlias: 3220 break; // The underlying objects alias; test accesses for dependence. 3221 } 3222 3223 const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3224 const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3225 if (!SrcGEP || !DstGEP) 3226 return new Dependence(Src, Dst); // missing GEP, assume dependence 3227 3228 if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) 3229 return new Dependence(Src, Dst); // different types, assume dependence 3230 3231 // establish loop nesting levels 3232 establishNestingLevels(Src, Dst); 3233 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3234 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3235 3236 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3237 ++TotalArrayPairs; 3238 3239 // classify subscript pairs 3240 unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); 3241 SmallVector<Subscript, 4> Pair(Pairs); 3242 for (unsigned SI = 0; SI < Pairs; ++SI) { 3243 Pair[SI].Loops.resize(MaxLevels + 1); 3244 Pair[SI].GroupLoops.resize(MaxLevels + 1); 3245 Pair[SI].Group.resize(Pairs); 3246 } 3247 Pairs = 0; 3248 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3249 SrcEnd = SrcGEP->idx_end(), 3250 DstIdx = DstGEP->idx_begin(), 3251 DstEnd = DstGEP->idx_end(); 3252 SrcIdx != SrcEnd && DstIdx != DstEnd; 3253 ++SrcIdx, ++DstIdx, ++Pairs) { 3254 Pair[Pairs].Src = SE->getSCEV(*SrcIdx); 3255 Pair[Pairs].Dst = SE->getSCEV(*DstIdx); 3256 removeMatchingExtensions(&Pair[Pairs]); 3257 Pair[Pairs].Classification = 3258 classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), 3259 Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), 3260 Pair[Pairs].Loops); 3261 Pair[Pairs].GroupLoops = Pair[Pairs].Loops; 3262 Pair[Pairs].Group.set(Pairs); 3263 DEBUG(dbgs() << " subscript " << Pairs << "\n"); 3264 DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); 3265 DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); 3266 DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); 3267 DEBUG(dbgs() << "\tloops = "); 3268 DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); 3269 } 3270 3271 SmallBitVector Separable(Pairs); 3272 SmallBitVector Coupled(Pairs); 3273 3274 // Partition subscripts into separable and minimally-coupled groups 3275 // Algorithm in paper is algorithmically better; 3276 // this may be faster in practice. Check someday. 3277 // 3278 // Here's an example of how it works. Consider this code: 3279 // 3280 // for (i = ...) { 3281 // for (j = ...) { 3282 // for (k = ...) { 3283 // for (l = ...) { 3284 // for (m = ...) { 3285 // A[i][j][k][m] = ...; 3286 // ... = A[0][j][l][i + j]; 3287 // } 3288 // } 3289 // } 3290 // } 3291 // } 3292 // 3293 // There are 4 subscripts here: 3294 // 0 [i] and [0] 3295 // 1 [j] and [j] 3296 // 2 [k] and [l] 3297 // 3 [m] and [i + j] 3298 // 3299 // We've already classified each subscript pair as ZIV, SIV, etc., 3300 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3301 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3302 // and set Pair[P].Group = {P}. 3303 // 3304 // Src Dst Classification Loops GroupLoops Group 3305 // 0 [i] [0] SIV {1} {1} {0} 3306 // 1 [j] [j] SIV {2} {2} {1} 3307 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3308 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3309 // 3310 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3311 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3312 // 3313 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3314 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3315 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3316 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3317 // to either Separable or Coupled). 3318 // 3319 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3320 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3321 // so Pair[3].Group = {0, 1, 3} and Done = false. 3322 // 3323 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3324 // Since Done remains true, we add 2 to the set of Separable pairs. 3325 // 3326 // Finally, we consider 3. There's nothing to compare it with, 3327 // so Done remains true and we add it to the Coupled set. 3328 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3329 // 3330 // In the end, we've got 1 separable subscript and 1 coupled group. 3331 for (unsigned SI = 0; SI < Pairs; ++SI) { 3332 if (Pair[SI].Classification == Subscript::NonLinear) { 3333 // ignore these, but collect loops for later 3334 ++NonlinearSubscriptPairs; 3335 collectCommonLoops(Pair[SI].Src, 3336 LI->getLoopFor(Src->getParent()), 3337 Pair[SI].Loops); 3338 collectCommonLoops(Pair[SI].Dst, 3339 LI->getLoopFor(Dst->getParent()), 3340 Pair[SI].Loops); 3341 Result.Consistent = false; 3342 } 3343 else if (Pair[SI].Classification == Subscript::ZIV) { 3344 // always separable 3345 Separable.set(SI); 3346 } 3347 else { 3348 // SIV, RDIV, or MIV, so check for coupled group 3349 bool Done = true; 3350 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3351 SmallBitVector Intersection = Pair[SI].GroupLoops; 3352 Intersection &= Pair[SJ].GroupLoops; 3353 if (Intersection.any()) { 3354 // accumulate set of all the loops in group 3355 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3356 // accumulate set of all subscripts in group 3357 Pair[SJ].Group |= Pair[SI].Group; 3358 Done = false; 3359 } 3360 } 3361 if (Done) { 3362 if (Pair[SI].Group.count() == 1) { 3363 Separable.set(SI); 3364 ++SeparableSubscriptPairs; 3365 } 3366 else { 3367 Coupled.set(SI); 3368 ++CoupledSubscriptPairs; 3369 } 3370 } 3371 } 3372 } 3373 3374 DEBUG(dbgs() << " Separable = "); 3375 DEBUG(dumpSmallBitVector(Separable)); 3376 DEBUG(dbgs() << " Coupled = "); 3377 DEBUG(dumpSmallBitVector(Coupled)); 3378 3379 Constraint NewConstraint; 3380 NewConstraint.setAny(SE); 3381 3382 // test separable subscripts 3383 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3384 DEBUG(dbgs() << "testing subscript " << SI); 3385 switch (Pair[SI].Classification) { 3386 case Subscript::ZIV: 3387 DEBUG(dbgs() << ", ZIV\n"); 3388 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3389 return NULL; 3390 break; 3391 case Subscript::SIV: { 3392 DEBUG(dbgs() << ", SIV\n"); 3393 unsigned Level; 3394 const SCEV *SplitIter = NULL; 3395 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3396 Result, NewConstraint, SplitIter)) 3397 return NULL; 3398 break; 3399 } 3400 case Subscript::RDIV: 3401 DEBUG(dbgs() << ", RDIV\n"); 3402 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3403 return NULL; 3404 break; 3405 case Subscript::MIV: 3406 DEBUG(dbgs() << ", MIV\n"); 3407 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3408 return NULL; 3409 break; 3410 default: 3411 llvm_unreachable("subscript has unexpected classification"); 3412 } 3413 } 3414 3415 if (Coupled.count()) { 3416 // test coupled subscript groups 3417 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3418 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3419 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3420 for (unsigned II = 0; II <= MaxLevels; ++II) 3421 Constraints[II].setAny(SE); 3422 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3423 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3424 SmallBitVector Group(Pair[SI].Group); 3425 SmallBitVector Sivs(Pairs); 3426 SmallBitVector Mivs(Pairs); 3427 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3428 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3429 DEBUG(dbgs() << SJ << " "); 3430 if (Pair[SJ].Classification == Subscript::SIV) 3431 Sivs.set(SJ); 3432 else 3433 Mivs.set(SJ); 3434 } 3435 DEBUG(dbgs() << "}\n"); 3436 while (Sivs.any()) { 3437 bool Changed = false; 3438 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3439 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3440 // SJ is an SIV subscript that's part of the current coupled group 3441 unsigned Level; 3442 const SCEV *SplitIter = NULL; 3443 DEBUG(dbgs() << "SIV\n"); 3444 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3445 Result, NewConstraint, SplitIter)) 3446 return NULL; 3447 ConstrainedLevels.set(Level); 3448 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3449 if (Constraints[Level].isEmpty()) { 3450 ++DeltaIndependence; 3451 return NULL; 3452 } 3453 Changed = true; 3454 } 3455 Sivs.reset(SJ); 3456 } 3457 if (Changed) { 3458 // propagate, possibly creating new SIVs and ZIVs 3459 DEBUG(dbgs() << " propagating\n"); 3460 DEBUG(dbgs() << "\tMivs = "); 3461 DEBUG(dumpSmallBitVector(Mivs)); 3462 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3463 // SJ is an MIV subscript that's part of the current coupled group 3464 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3465 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3466 Constraints, Result.Consistent)) { 3467 DEBUG(dbgs() << "\t Changed\n"); 3468 ++DeltaPropagations; 3469 Pair[SJ].Classification = 3470 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3471 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3472 Pair[SJ].Loops); 3473 switch (Pair[SJ].Classification) { 3474 case Subscript::ZIV: 3475 DEBUG(dbgs() << "ZIV\n"); 3476 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3477 return NULL; 3478 Mivs.reset(SJ); 3479 break; 3480 case Subscript::SIV: 3481 Sivs.set(SJ); 3482 Mivs.reset(SJ); 3483 break; 3484 case Subscript::RDIV: 3485 case Subscript::MIV: 3486 break; 3487 default: 3488 llvm_unreachable("bad subscript classification"); 3489 } 3490 } 3491 } 3492 } 3493 } 3494 3495 // test & propagate remaining RDIVs 3496 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3497 if (Pair[SJ].Classification == Subscript::RDIV) { 3498 DEBUG(dbgs() << "RDIV test\n"); 3499 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3500 return NULL; 3501 // I don't yet understand how to propagate RDIV results 3502 Mivs.reset(SJ); 3503 } 3504 } 3505 3506 // test remaining MIVs 3507 // This code is temporary. 3508 // Better to somehow test all remaining subscripts simultaneously. 3509 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3510 if (Pair[SJ].Classification == Subscript::MIV) { 3511 DEBUG(dbgs() << "MIV test\n"); 3512 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3513 return NULL; 3514 } 3515 else 3516 llvm_unreachable("expected only MIV subscripts at this point"); 3517 } 3518 3519 // update Result.DV from constraint vector 3520 DEBUG(dbgs() << " updating\n"); 3521 for (int SJ = ConstrainedLevels.find_first(); 3522 SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { 3523 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3524 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3525 return NULL; 3526 } 3527 } 3528 } 3529 3530 // make sure Scalar flags are set correctly 3531 SmallBitVector CompleteLoops(MaxLevels + 1); 3532 for (unsigned SI = 0; SI < Pairs; ++SI) 3533 CompleteLoops |= Pair[SI].Loops; 3534 for (unsigned II = 1; II <= CommonLevels; ++II) 3535 if (CompleteLoops[II]) 3536 Result.DV[II - 1].Scalar = false; 3537 3538 // make sure loopIndepent flag is set correctly 3539 if (PossiblyLoopIndependent) { 3540 for (unsigned II = 1; II <= CommonLevels; ++II) { 3541 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3542 Result.LoopIndependent = false; 3543 break; 3544 } 3545 } 3546 } 3547 3548 FullDependence *Final = new FullDependence(Result); 3549 Result.DV = NULL; 3550 return Final; 3551 } 3552 3553 3554 3555 //===----------------------------------------------------------------------===// 3556 // getSplitIteration - 3557 // Rather than spend rarely-used space recording the splitting iteration 3558 // during the Weak-Crossing SIV test, we re-compute it on demand. 3559 // The re-computation is basically a repeat of the entire dependence test, 3560 // though simplified since we know that the dependence exists. 3561 // It's tedious, since we must go through all propagations, etc. 3562 // 3563 // Care is required to keep this code up to date w.r.t. the code above. 3564 // 3565 // Generally, the dependence analyzer will be used to build 3566 // a dependence graph for a function (basically a map from instructions 3567 // to dependences). Looking for cycles in the graph shows us loops 3568 // that cannot be trivially vectorized/parallelized. 3569 // 3570 // We can try to improve the situation by examining all the dependences 3571 // that make up the cycle, looking for ones we can break. 3572 // Sometimes, peeling the first or last iteration of a loop will break 3573 // dependences, and we've got flags for those possibilities. 3574 // Sometimes, splitting a loop at some other iteration will do the trick, 3575 // and we've got a flag for that case. Rather than waste the space to 3576 // record the exact iteration (since we rarely know), we provide 3577 // a method that calculates the iteration. It's a drag that it must work 3578 // from scratch, but wonderful in that it's possible. 3579 // 3580 // Here's an example: 3581 // 3582 // for (i = 0; i < 10; i++) 3583 // A[i] = ... 3584 // ... = A[11 - i] 3585 // 3586 // There's a loop-carried flow dependence from the store to the load, 3587 // found by the weak-crossing SIV test. The dependence will have a flag, 3588 // indicating that the dependence can be broken by splitting the loop. 3589 // Calling getSplitIteration will return 5. 3590 // Splitting the loop breaks the dependence, like so: 3591 // 3592 // for (i = 0; i <= 5; i++) 3593 // A[i] = ... 3594 // ... = A[11 - i] 3595 // for (i = 6; i < 10; i++) 3596 // A[i] = ... 3597 // ... = A[11 - i] 3598 // 3599 // breaks the dependence and allows us to vectorize/parallelize 3600 // both loops. 3601 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, 3602 unsigned SplitLevel) { 3603 assert(Dep && "expected a pointer to a Dependence"); 3604 assert(Dep->isSplitable(SplitLevel) && 3605 "Dep should be splitable at SplitLevel"); 3606 const Instruction *Src = Dep->getSrc(); 3607 const Instruction *Dst = Dep->getDst(); 3608 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3609 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3610 assert(isLoadOrStore(Src)); 3611 assert(isLoadOrStore(Dst)); 3612 const Value *SrcPtr = getPointerOperand(Src); 3613 const Value *DstPtr = getPointerOperand(Dst); 3614 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3615 AliasAnalysis::MustAlias); 3616 const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3617 const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3618 assert(SrcGEP); 3619 assert(DstGEP); 3620 assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); 3621 3622 // establish loop nesting levels 3623 establishNestingLevels(Src, Dst); 3624 3625 FullDependence Result(Src, Dst, false, CommonLevels); 3626 3627 // classify subscript pairs 3628 unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); 3629 SmallVector<Subscript, 4> Pair(Pairs); 3630 for (unsigned SI = 0; SI < Pairs; ++SI) { 3631 Pair[SI].Loops.resize(MaxLevels + 1); 3632 Pair[SI].GroupLoops.resize(MaxLevels + 1); 3633 Pair[SI].Group.resize(Pairs); 3634 } 3635 Pairs = 0; 3636 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3637 SrcEnd = SrcGEP->idx_end(), 3638 DstIdx = DstGEP->idx_begin(), 3639 DstEnd = DstGEP->idx_end(); 3640 SrcIdx != SrcEnd && DstIdx != DstEnd; 3641 ++SrcIdx, ++DstIdx, ++Pairs) { 3642 Pair[Pairs].Src = SE->getSCEV(*SrcIdx); 3643 Pair[Pairs].Dst = SE->getSCEV(*DstIdx); 3644 Pair[Pairs].Classification = 3645 classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), 3646 Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), 3647 Pair[Pairs].Loops); 3648 Pair[Pairs].GroupLoops = Pair[Pairs].Loops; 3649 Pair[Pairs].Group.set(Pairs); 3650 } 3651 3652 SmallBitVector Separable(Pairs); 3653 SmallBitVector Coupled(Pairs); 3654 3655 // partition subscripts into separable and minimally-coupled groups 3656 for (unsigned SI = 0; SI < Pairs; ++SI) { 3657 if (Pair[SI].Classification == Subscript::NonLinear) { 3658 // ignore these, but collect loops for later 3659 collectCommonLoops(Pair[SI].Src, 3660 LI->getLoopFor(Src->getParent()), 3661 Pair[SI].Loops); 3662 collectCommonLoops(Pair[SI].Dst, 3663 LI->getLoopFor(Dst->getParent()), 3664 Pair[SI].Loops); 3665 Result.Consistent = false; 3666 } 3667 else if (Pair[SI].Classification == Subscript::ZIV) 3668 Separable.set(SI); 3669 else { 3670 // SIV, RDIV, or MIV, so check for coupled group 3671 bool Done = true; 3672 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3673 SmallBitVector Intersection = Pair[SI].GroupLoops; 3674 Intersection &= Pair[SJ].GroupLoops; 3675 if (Intersection.any()) { 3676 // accumulate set of all the loops in group 3677 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3678 // accumulate set of all subscripts in group 3679 Pair[SJ].Group |= Pair[SI].Group; 3680 Done = false; 3681 } 3682 } 3683 if (Done) { 3684 if (Pair[SI].Group.count() == 1) 3685 Separable.set(SI); 3686 else 3687 Coupled.set(SI); 3688 } 3689 } 3690 } 3691 3692 Constraint NewConstraint; 3693 NewConstraint.setAny(SE); 3694 3695 // test separable subscripts 3696 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3697 switch (Pair[SI].Classification) { 3698 case Subscript::SIV: { 3699 unsigned Level; 3700 const SCEV *SplitIter = NULL; 3701 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3702 Result, NewConstraint, SplitIter); 3703 if (Level == SplitLevel) { 3704 assert(SplitIter != NULL); 3705 return SplitIter; 3706 } 3707 break; 3708 } 3709 case Subscript::ZIV: 3710 case Subscript::RDIV: 3711 case Subscript::MIV: 3712 break; 3713 default: 3714 llvm_unreachable("subscript has unexpected classification"); 3715 } 3716 } 3717 3718 if (Coupled.count()) { 3719 // test coupled subscript groups 3720 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3721 for (unsigned II = 0; II <= MaxLevels; ++II) 3722 Constraints[II].setAny(SE); 3723 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3724 SmallBitVector Group(Pair[SI].Group); 3725 SmallBitVector Sivs(Pairs); 3726 SmallBitVector Mivs(Pairs); 3727 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3728 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3729 if (Pair[SJ].Classification == Subscript::SIV) 3730 Sivs.set(SJ); 3731 else 3732 Mivs.set(SJ); 3733 } 3734 while (Sivs.any()) { 3735 bool Changed = false; 3736 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3737 // SJ is an SIV subscript that's part of the current coupled group 3738 unsigned Level; 3739 const SCEV *SplitIter = NULL; 3740 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3741 Result, NewConstraint, SplitIter); 3742 if (Level == SplitLevel && SplitIter) 3743 return SplitIter; 3744 ConstrainedLevels.set(Level); 3745 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3746 Changed = true; 3747 Sivs.reset(SJ); 3748 } 3749 if (Changed) { 3750 // propagate, possibly creating new SIVs and ZIVs 3751 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3752 // SJ is an MIV subscript that's part of the current coupled group 3753 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3754 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3755 Pair[SJ].Classification = 3756 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3757 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3758 Pair[SJ].Loops); 3759 switch (Pair[SJ].Classification) { 3760 case Subscript::ZIV: 3761 Mivs.reset(SJ); 3762 break; 3763 case Subscript::SIV: 3764 Sivs.set(SJ); 3765 Mivs.reset(SJ); 3766 break; 3767 case Subscript::RDIV: 3768 case Subscript::MIV: 3769 break; 3770 default: 3771 llvm_unreachable("bad subscript classification"); 3772 } 3773 } 3774 } 3775 } 3776 } 3777 } 3778 } 3779 llvm_unreachable("somehow reached end of routine"); 3780 return NULL; 3781 } 3782