1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 113 //===----------------------------------------------------------------------===// 114 // basics 115 116 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 117 "Dependence Analysis", true, true) 118 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 119 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 120 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 121 INITIALIZE_PASS_END(DependenceAnalysis, "da", 122 "Dependence Analysis", true, true) 123 124 char DependenceAnalysis::ID = 0; 125 126 127 FunctionPass *llvm::createDependenceAnalysisPass() { 128 return new DependenceAnalysis(); 129 } 130 131 132 bool DependenceAnalysis::runOnFunction(Function &F) { 133 this->F = &F; 134 AA = &getAnalysis<AliasAnalysis>(); 135 SE = &getAnalysis<ScalarEvolution>(); 136 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 137 return false; 138 } 139 140 141 void DependenceAnalysis::releaseMemory() { 142 } 143 144 145 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 146 AU.setPreservesAll(); 147 AU.addRequiredTransitive<AliasAnalysis>(); 148 AU.addRequiredTransitive<ScalarEvolution>(); 149 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 150 } 151 152 153 // Used to test the dependence analyzer. 154 // Looks through the function, noting loads and stores. 155 // Calls depends() on every possible pair and prints out the result. 156 // Ignores all other instructions. 157 static 158 void dumpExampleDependence(raw_ostream &OS, Function *F, 159 DependenceAnalysis *DA) { 160 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 161 SrcI != SrcE; ++SrcI) { 162 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 163 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 164 DstI != DstE; ++DstI) { 165 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 166 OS << "da analyze - "; 167 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 168 D->dump(OS); 169 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 170 if (D->isSplitable(Level)) { 171 OS << "da analyze - split level = " << Level; 172 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 173 OS << "!\n"; 174 } 175 } 176 } 177 else 178 OS << "none!\n"; 179 } 180 } 181 } 182 } 183 } 184 185 186 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 187 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 188 } 189 190 //===----------------------------------------------------------------------===// 191 // Dependence methods 192 193 // Returns true if this is an input dependence. 194 bool Dependence::isInput() const { 195 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 196 } 197 198 199 // Returns true if this is an output dependence. 200 bool Dependence::isOutput() const { 201 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 202 } 203 204 205 // Returns true if this is an flow (aka true) dependence. 206 bool Dependence::isFlow() const { 207 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 208 } 209 210 211 // Returns true if this is an anti dependence. 212 bool Dependence::isAnti() const { 213 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 214 } 215 216 217 // Returns true if a particular level is scalar; that is, 218 // if no subscript in the source or destination mention the induction 219 // variable associated with the loop at this level. 220 // Leave this out of line, so it will serve as a virtual method anchor 221 bool Dependence::isScalar(unsigned level) const { 222 return false; 223 } 224 225 226 //===----------------------------------------------------------------------===// 227 // FullDependence methods 228 229 FullDependence::FullDependence(Instruction *Source, 230 Instruction *Destination, 231 bool PossiblyLoopIndependent, 232 unsigned CommonLevels) : 233 Dependence(Source, Destination), 234 Levels(CommonLevels), 235 LoopIndependent(PossiblyLoopIndependent) { 236 Consistent = true; 237 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 238 } 239 240 // The rest are simple getters that hide the implementation. 241 242 // getDirection - Returns the direction associated with a particular level. 243 unsigned FullDependence::getDirection(unsigned Level) const { 244 assert(0 < Level && Level <= Levels && "Level out of range"); 245 return DV[Level - 1].Direction; 246 } 247 248 249 // Returns the distance (or NULL) associated with a particular level. 250 const SCEV *FullDependence::getDistance(unsigned Level) const { 251 assert(0 < Level && Level <= Levels && "Level out of range"); 252 return DV[Level - 1].Distance; 253 } 254 255 256 // Returns true if a particular level is scalar; that is, 257 // if no subscript in the source or destination mention the induction 258 // variable associated with the loop at this level. 259 bool FullDependence::isScalar(unsigned Level) const { 260 assert(0 < Level && Level <= Levels && "Level out of range"); 261 return DV[Level - 1].Scalar; 262 } 263 264 265 // Returns true if peeling the first iteration from this loop 266 // will break this dependence. 267 bool FullDependence::isPeelFirst(unsigned Level) const { 268 assert(0 < Level && Level <= Levels && "Level out of range"); 269 return DV[Level - 1].PeelFirst; 270 } 271 272 273 // Returns true if peeling the last iteration from this loop 274 // will break this dependence. 275 bool FullDependence::isPeelLast(unsigned Level) const { 276 assert(0 < Level && Level <= Levels && "Level out of range"); 277 return DV[Level - 1].PeelLast; 278 } 279 280 281 // Returns true if splitting this loop will break the dependence. 282 bool FullDependence::isSplitable(unsigned Level) const { 283 assert(0 < Level && Level <= Levels && "Level out of range"); 284 return DV[Level - 1].Splitable; 285 } 286 287 288 //===----------------------------------------------------------------------===// 289 // DependenceAnalysis::Constraint methods 290 291 // If constraint is a point <X, Y>, returns X. 292 // Otherwise assert. 293 const SCEV *DependenceAnalysis::Constraint::getX() const { 294 assert(Kind == Point && "Kind should be Point"); 295 return A; 296 } 297 298 299 // If constraint is a point <X, Y>, returns Y. 300 // Otherwise assert. 301 const SCEV *DependenceAnalysis::Constraint::getY() const { 302 assert(Kind == Point && "Kind should be Point"); 303 return B; 304 } 305 306 307 // If constraint is a line AX + BY = C, returns A. 308 // Otherwise assert. 309 const SCEV *DependenceAnalysis::Constraint::getA() const { 310 assert((Kind == Line || Kind == Distance) && 311 "Kind should be Line (or Distance)"); 312 return A; 313 } 314 315 316 // If constraint is a line AX + BY = C, returns B. 317 // Otherwise assert. 318 const SCEV *DependenceAnalysis::Constraint::getB() const { 319 assert((Kind == Line || Kind == Distance) && 320 "Kind should be Line (or Distance)"); 321 return B; 322 } 323 324 325 // If constraint is a line AX + BY = C, returns C. 326 // Otherwise assert. 327 const SCEV *DependenceAnalysis::Constraint::getC() const { 328 assert((Kind == Line || Kind == Distance) && 329 "Kind should be Line (or Distance)"); 330 return C; 331 } 332 333 334 // If constraint is a distance, returns D. 335 // Otherwise assert. 336 const SCEV *DependenceAnalysis::Constraint::getD() const { 337 assert(Kind == Distance && "Kind should be Distance"); 338 return SE->getNegativeSCEV(C); 339 } 340 341 342 // Returns the loop associated with this constraint. 343 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 344 assert((Kind == Distance || Kind == Line || Kind == Point) && 345 "Kind should be Distance, Line, or Point"); 346 return AssociatedLoop; 347 } 348 349 350 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 351 const SCEV *Y, 352 const Loop *CurLoop) { 353 Kind = Point; 354 A = X; 355 B = Y; 356 AssociatedLoop = CurLoop; 357 } 358 359 360 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 361 const SCEV *BB, 362 const SCEV *CC, 363 const Loop *CurLoop) { 364 Kind = Line; 365 A = AA; 366 B = BB; 367 C = CC; 368 AssociatedLoop = CurLoop; 369 } 370 371 372 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getConstant(D->getType(), 1); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 382 void DependenceAnalysis::Constraint::setEmpty() { 383 Kind = Empty; 384 } 385 386 387 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 388 SE = NewSE; 389 Kind = Any; 390 } 391 392 393 // For debugging purposes. Dumps the constraint out to OS. 394 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 395 if (isEmpty()) 396 OS << " Empty\n"; 397 else if (isAny()) 398 OS << " Any\n"; 399 else if (isPoint()) 400 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 401 else if (isDistance()) 402 OS << " Distance is " << *getD() << 403 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 404 else if (isLine()) 405 OS << " Line is " << *getA() << "*X + " << 406 *getB() << "*Y = " << *getC() << "\n"; 407 else 408 llvm_unreachable("unknown constraint type in Constraint::dump"); 409 } 410 411 412 // Updates X with the intersection 413 // of the Constraints X and Y. Returns true if X has changed. 414 // Corresponds to Figure 4 from the paper 415 // 416 // Practical Dependence Testing 417 // Goff, Kennedy, Tseng 418 // PLDI 1991 419 bool DependenceAnalysis::intersectConstraints(Constraint *X, 420 const Constraint *Y) { 421 ++DeltaApplications; 422 DEBUG(dbgs() << "\tintersect constraints\n"); 423 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 424 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 425 assert(!Y->isPoint() && "Y must not be a Point"); 426 if (X->isAny()) { 427 if (Y->isAny()) 428 return false; 429 *X = *Y; 430 return true; 431 } 432 if (X->isEmpty()) 433 return false; 434 if (Y->isEmpty()) { 435 X->setEmpty(); 436 return true; 437 } 438 439 if (X->isDistance() && Y->isDistance()) { 440 DEBUG(dbgs() << "\t intersect 2 distances\n"); 441 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 442 return false; 443 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 444 X->setEmpty(); 445 ++DeltaSuccesses; 446 return true; 447 } 448 // Hmmm, interesting situation. 449 // I guess if either is constant, keep it and ignore the other. 450 if (isa<SCEVConstant>(Y->getD())) { 451 *X = *Y; 452 return true; 453 } 454 return false; 455 } 456 457 // At this point, the pseudo-code in Figure 4 of the paper 458 // checks if (X->isPoint() && Y->isPoint()). 459 // This case can't occur in our implementation, 460 // since a Point can only arise as the result of intersecting 461 // two Line constraints, and the right-hand value, Y, is never 462 // the result of an intersection. 463 assert(!(X->isPoint() && Y->isPoint()) && 464 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 465 466 if (X->isLine() && Y->isLine()) { 467 DEBUG(dbgs() << "\t intersect 2 lines\n"); 468 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 469 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 471 // slopes are equal, so lines are parallel 472 DEBUG(dbgs() << "\t\tsame slope\n"); 473 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 474 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 475 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 476 return false; 477 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 478 X->setEmpty(); 479 ++DeltaSuccesses; 480 return true; 481 } 482 return false; 483 } 484 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 485 // slopes differ, so lines intersect 486 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 487 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 488 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 489 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 490 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 491 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 492 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 493 const SCEVConstant *C1A2_C2A1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 495 const SCEVConstant *C1B2_C2B1 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 497 const SCEVConstant *A1B2_A2B1 = 498 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 499 const SCEVConstant *A2B1_A1B2 = 500 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 501 if (!C1B2_C2B1 || !C1A2_C2A1 || 502 !A1B2_A2B1 || !A2B1_A1B2) 503 return false; 504 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 505 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 506 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 507 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 508 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 509 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 510 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 511 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 512 APInt Xq = Xtop; // these need to be initialized, even 513 APInt Xr = Xtop; // though they're just going to be overwritten 514 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 515 APInt Yq = Ytop; 516 APInt Yr = Ytop; 517 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 518 if (Xr != 0 || Yr != 0) { 519 X->setEmpty(); 520 ++DeltaSuccesses; 521 return true; 522 } 523 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 524 if (Xq.slt(0) || Yq.slt(0)) { 525 X->setEmpty(); 526 ++DeltaSuccesses; 527 return true; 528 } 529 if (const SCEVConstant *CUB = 530 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 531 APInt UpperBound = CUB->getValue()->getValue(); 532 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 533 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 534 X->setEmpty(); 535 ++DeltaSuccesses; 536 return true; 537 } 538 } 539 X->setPoint(SE->getConstant(Xq), 540 SE->getConstant(Yq), 541 X->getAssociatedLoop()); 542 ++DeltaSuccesses; 543 return true; 544 } 545 return false; 546 } 547 548 // if (X->isLine() && Y->isPoint()) This case can't occur. 549 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 550 551 if (X->isPoint() && Y->isLine()) { 552 DEBUG(dbgs() << "\t intersect Point and Line\n"); 553 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 554 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 555 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 556 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 557 return false; 558 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 559 X->setEmpty(); 560 ++DeltaSuccesses; 561 return true; 562 } 563 return false; 564 } 565 566 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 567 return false; 568 } 569 570 571 //===----------------------------------------------------------------------===// 572 // DependenceAnalysis methods 573 574 // For debugging purposes. Dumps a dependence to OS. 575 void Dependence::dump(raw_ostream &OS) const { 576 bool Splitable = false; 577 if (isConfused()) 578 OS << "confused"; 579 else { 580 if (isConsistent()) 581 OS << "consistent "; 582 if (isFlow()) 583 OS << "flow"; 584 else if (isOutput()) 585 OS << "output"; 586 else if (isAnti()) 587 OS << "anti"; 588 else if (isInput()) 589 OS << "input"; 590 unsigned Levels = getLevels(); 591 OS << " ["; 592 for (unsigned II = 1; II <= Levels; ++II) { 593 if (isSplitable(II)) 594 Splitable = true; 595 if (isPeelFirst(II)) 596 OS << 'p'; 597 const SCEV *Distance = getDistance(II); 598 if (Distance) 599 OS << *Distance; 600 else if (isScalar(II)) 601 OS << "S"; 602 else { 603 unsigned Direction = getDirection(II); 604 if (Direction == DVEntry::ALL) 605 OS << "*"; 606 else { 607 if (Direction & DVEntry::LT) 608 OS << "<"; 609 if (Direction & DVEntry::EQ) 610 OS << "="; 611 if (Direction & DVEntry::GT) 612 OS << ">"; 613 } 614 } 615 if (isPeelLast(II)) 616 OS << 'p'; 617 if (II < Levels) 618 OS << " "; 619 } 620 if (isLoopIndependent()) 621 OS << "|<"; 622 OS << "]"; 623 if (Splitable) 624 OS << " splitable"; 625 } 626 OS << "!\n"; 627 } 628 629 630 631 static 632 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 633 const Value *A, 634 const Value *B) { 635 const Value *AObj = GetUnderlyingObject(A); 636 const Value *BObj = GetUnderlyingObject(B); 637 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 638 BObj, AA->getTypeStoreSize(BObj->getType())); 639 } 640 641 642 // Returns true if the load or store can be analyzed. Atomic and volatile 643 // operations have properties which this analysis does not understand. 644 static 645 bool isLoadOrStore(const Instruction *I) { 646 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 647 return LI->isUnordered(); 648 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 649 return SI->isUnordered(); 650 return false; 651 } 652 653 654 static 655 Value *getPointerOperand(Instruction *I) { 656 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 657 return LI->getPointerOperand(); 658 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 659 return SI->getPointerOperand(); 660 llvm_unreachable("Value is not load or store instruction"); 661 return nullptr; 662 } 663 664 665 // Examines the loop nesting of the Src and Dst 666 // instructions and establishes their shared loops. Sets the variables 667 // CommonLevels, SrcLevels, and MaxLevels. 668 // The source and destination instructions needn't be contained in the same 669 // loop. The routine establishNestingLevels finds the level of most deeply 670 // nested loop that contains them both, CommonLevels. An instruction that's 671 // not contained in a loop is at level = 0. MaxLevels is equal to the level 672 // of the source plus the level of the destination, minus CommonLevels. 673 // This lets us allocate vectors MaxLevels in length, with room for every 674 // distinct loop referenced in both the source and destination subscripts. 675 // The variable SrcLevels is the nesting depth of the source instruction. 676 // It's used to help calculate distinct loops referenced by the destination. 677 // Here's the map from loops to levels: 678 // 0 - unused 679 // 1 - outermost common loop 680 // ... - other common loops 681 // CommonLevels - innermost common loop 682 // ... - loops containing Src but not Dst 683 // SrcLevels - innermost loop containing Src but not Dst 684 // ... - loops containing Dst but not Src 685 // MaxLevels - innermost loops containing Dst but not Src 686 // Consider the follow code fragment: 687 // for (a = ...) { 688 // for (b = ...) { 689 // for (c = ...) { 690 // for (d = ...) { 691 // A[] = ...; 692 // } 693 // } 694 // for (e = ...) { 695 // for (f = ...) { 696 // for (g = ...) { 697 // ... = A[]; 698 // } 699 // } 700 // } 701 // } 702 // } 703 // If we're looking at the possibility of a dependence between the store 704 // to A (the Src) and the load from A (the Dst), we'll note that they 705 // have 2 loops in common, so CommonLevels will equal 2 and the direction 706 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 707 // A map from loop names to loop numbers would look like 708 // a - 1 709 // b - 2 = CommonLevels 710 // c - 3 711 // d - 4 = SrcLevels 712 // e - 5 713 // f - 6 714 // g - 7 = MaxLevels 715 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 716 const Instruction *Dst) { 717 const BasicBlock *SrcBlock = Src->getParent(); 718 const BasicBlock *DstBlock = Dst->getParent(); 719 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 720 unsigned DstLevel = LI->getLoopDepth(DstBlock); 721 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 722 const Loop *DstLoop = LI->getLoopFor(DstBlock); 723 SrcLevels = SrcLevel; 724 MaxLevels = SrcLevel + DstLevel; 725 while (SrcLevel > DstLevel) { 726 SrcLoop = SrcLoop->getParentLoop(); 727 SrcLevel--; 728 } 729 while (DstLevel > SrcLevel) { 730 DstLoop = DstLoop->getParentLoop(); 731 DstLevel--; 732 } 733 while (SrcLoop != DstLoop) { 734 SrcLoop = SrcLoop->getParentLoop(); 735 DstLoop = DstLoop->getParentLoop(); 736 SrcLevel--; 737 } 738 CommonLevels = SrcLevel; 739 MaxLevels -= CommonLevels; 740 } 741 742 743 // Given one of the loops containing the source, return 744 // its level index in our numbering scheme. 745 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 746 return SrcLoop->getLoopDepth(); 747 } 748 749 750 // Given one of the loops containing the destination, 751 // return its level index in our numbering scheme. 752 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 753 unsigned D = DstLoop->getLoopDepth(); 754 if (D > CommonLevels) 755 return D - CommonLevels + SrcLevels; 756 else 757 return D; 758 } 759 760 761 // Returns true if Expression is loop invariant in LoopNest. 762 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 763 const Loop *LoopNest) const { 764 if (!LoopNest) 765 return true; 766 return SE->isLoopInvariant(Expression, LoopNest) && 767 isLoopInvariant(Expression, LoopNest->getParentLoop()); 768 } 769 770 771 772 // Finds the set of loops from the LoopNest that 773 // have a level <= CommonLevels and are referred to by the SCEV Expression. 774 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 775 const Loop *LoopNest, 776 SmallBitVector &Loops) const { 777 while (LoopNest) { 778 unsigned Level = LoopNest->getLoopDepth(); 779 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 780 Loops.set(Level); 781 LoopNest = LoopNest->getParentLoop(); 782 } 783 } 784 785 void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { 786 const SCEV *Src = Pair->Src; 787 const SCEV *Dst = Pair->Dst; 788 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 789 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 790 if (SrcTy == nullptr || DstTy == nullptr) { 791 assert(SrcTy == DstTy && "This function only unify integer types and " 792 "expect Src and Dst share the same type " 793 "otherwise."); 794 return; 795 } 796 if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { 797 // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). 798 Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); 799 } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { 800 // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). 801 Pair->Src = SE->getSignExtendExpr(Src, DstTy); 802 } 803 } 804 805 // removeMatchingExtensions - Examines a subscript pair. 806 // If the source and destination are identically sign (or zero) 807 // extended, it strips off the extension in an effect to simplify 808 // the actual analysis. 809 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 810 const SCEV *Src = Pair->Src; 811 const SCEV *Dst = Pair->Dst; 812 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 813 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 814 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 815 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 816 const SCEV *SrcCastOp = SrcCast->getOperand(); 817 const SCEV *DstCastOp = DstCast->getOperand(); 818 if (SrcCastOp->getType() == DstCastOp->getType()) { 819 Pair->Src = SrcCastOp; 820 Pair->Dst = DstCastOp; 821 } 822 } 823 } 824 825 826 // Examine the scev and return true iff it's linear. 827 // Collect any loops mentioned in the set of "Loops". 828 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 829 const Loop *LoopNest, 830 SmallBitVector &Loops) { 831 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 832 if (!AddRec) 833 return isLoopInvariant(Src, LoopNest); 834 const SCEV *Start = AddRec->getStart(); 835 const SCEV *Step = AddRec->getStepRecurrence(*SE); 836 if (!isLoopInvariant(Step, LoopNest)) 837 return false; 838 Loops.set(mapSrcLoop(AddRec->getLoop())); 839 return checkSrcSubscript(Start, LoopNest, Loops); 840 } 841 842 843 844 // Examine the scev and return true iff it's linear. 845 // Collect any loops mentioned in the set of "Loops". 846 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 847 const Loop *LoopNest, 848 SmallBitVector &Loops) { 849 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 850 if (!AddRec) 851 return isLoopInvariant(Dst, LoopNest); 852 const SCEV *Start = AddRec->getStart(); 853 const SCEV *Step = AddRec->getStepRecurrence(*SE); 854 if (!isLoopInvariant(Step, LoopNest)) 855 return false; 856 Loops.set(mapDstLoop(AddRec->getLoop())); 857 return checkDstSubscript(Start, LoopNest, Loops); 858 } 859 860 861 // Examines the subscript pair (the Src and Dst SCEVs) 862 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 863 // Collects the associated loops in a set. 864 DependenceAnalysis::Subscript::ClassificationKind 865 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 866 const SCEV *Dst, const Loop *DstLoopNest, 867 SmallBitVector &Loops) { 868 SmallBitVector SrcLoops(MaxLevels + 1); 869 SmallBitVector DstLoops(MaxLevels + 1); 870 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 871 return Subscript::NonLinear; 872 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 873 return Subscript::NonLinear; 874 Loops = SrcLoops; 875 Loops |= DstLoops; 876 unsigned N = Loops.count(); 877 if (N == 0) 878 return Subscript::ZIV; 879 if (N == 1) 880 return Subscript::SIV; 881 if (N == 2 && (SrcLoops.count() == 0 || 882 DstLoops.count() == 0 || 883 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 884 return Subscript::RDIV; 885 return Subscript::MIV; 886 } 887 888 889 // A wrapper around SCEV::isKnownPredicate. 890 // Looks for cases where we're interested in comparing for equality. 891 // If both X and Y have been identically sign or zero extended, 892 // it strips off the (confusing) extensions before invoking 893 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 894 // will be similarly updated. 895 // 896 // If SCEV::isKnownPredicate can't prove the predicate, 897 // we try simple subtraction, which seems to help in some cases 898 // involving symbolics. 899 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 900 const SCEV *X, 901 const SCEV *Y) const { 902 if (Pred == CmpInst::ICMP_EQ || 903 Pred == CmpInst::ICMP_NE) { 904 if ((isa<SCEVSignExtendExpr>(X) && 905 isa<SCEVSignExtendExpr>(Y)) || 906 (isa<SCEVZeroExtendExpr>(X) && 907 isa<SCEVZeroExtendExpr>(Y))) { 908 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 909 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 910 const SCEV *Xop = CX->getOperand(); 911 const SCEV *Yop = CY->getOperand(); 912 if (Xop->getType() == Yop->getType()) { 913 X = Xop; 914 Y = Yop; 915 } 916 } 917 } 918 if (SE->isKnownPredicate(Pred, X, Y)) 919 return true; 920 // If SE->isKnownPredicate can't prove the condition, 921 // we try the brute-force approach of subtracting 922 // and testing the difference. 923 // By testing with SE->isKnownPredicate first, we avoid 924 // the possibility of overflow when the arguments are constants. 925 const SCEV *Delta = SE->getMinusSCEV(X, Y); 926 switch (Pred) { 927 case CmpInst::ICMP_EQ: 928 return Delta->isZero(); 929 case CmpInst::ICMP_NE: 930 return SE->isKnownNonZero(Delta); 931 case CmpInst::ICMP_SGE: 932 return SE->isKnownNonNegative(Delta); 933 case CmpInst::ICMP_SLE: 934 return SE->isKnownNonPositive(Delta); 935 case CmpInst::ICMP_SGT: 936 return SE->isKnownPositive(Delta); 937 case CmpInst::ICMP_SLT: 938 return SE->isKnownNegative(Delta); 939 default: 940 llvm_unreachable("unexpected predicate in isKnownPredicate"); 941 } 942 } 943 944 945 // All subscripts are all the same type. 946 // Loop bound may be smaller (e.g., a char). 947 // Should zero extend loop bound, since it's always >= 0. 948 // This routine collects upper bound and extends if needed. 949 // Return null if no bound available. 950 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 951 Type *T) const { 952 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 953 const SCEV *UB = SE->getBackedgeTakenCount(L); 954 return SE->getNoopOrZeroExtend(UB, T); 955 } 956 return nullptr; 957 } 958 959 960 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 961 // If the cast fails, returns NULL. 962 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 963 Type *T 964 ) const { 965 if (const SCEV *UB = collectUpperBound(L, T)) 966 return dyn_cast<SCEVConstant>(UB); 967 return nullptr; 968 } 969 970 971 // testZIV - 972 // When we have a pair of subscripts of the form [c1] and [c2], 973 // where c1 and c2 are both loop invariant, we attack it using 974 // the ZIV test. Basically, we test by comparing the two values, 975 // but there are actually three possible results: 976 // 1) the values are equal, so there's a dependence 977 // 2) the values are different, so there's no dependence 978 // 3) the values might be equal, so we have to assume a dependence. 979 // 980 // Return true if dependence disproved. 981 bool DependenceAnalysis::testZIV(const SCEV *Src, 982 const SCEV *Dst, 983 FullDependence &Result) const { 984 DEBUG(dbgs() << " src = " << *Src << "\n"); 985 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 986 ++ZIVapplications; 987 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 988 DEBUG(dbgs() << " provably dependent\n"); 989 return false; // provably dependent 990 } 991 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 992 DEBUG(dbgs() << " provably independent\n"); 993 ++ZIVindependence; 994 return true; // provably independent 995 } 996 DEBUG(dbgs() << " possibly dependent\n"); 997 Result.Consistent = false; 998 return false; // possibly dependent 999 } 1000 1001 1002 // strongSIVtest - 1003 // From the paper, Practical Dependence Testing, Section 4.2.1 1004 // 1005 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1006 // where i is an induction variable, c1 and c2 are loop invariant, 1007 // and a is a constant, we can solve it exactly using the Strong SIV test. 1008 // 1009 // Can prove independence. Failing that, can compute distance (and direction). 1010 // In the presence of symbolic terms, we can sometimes make progress. 1011 // 1012 // If there's a dependence, 1013 // 1014 // c1 + a*i = c2 + a*i' 1015 // 1016 // The dependence distance is 1017 // 1018 // d = i' - i = (c1 - c2)/a 1019 // 1020 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1021 // loop's upper bound. If a dependence exists, the dependence direction is 1022 // defined as 1023 // 1024 // { < if d > 0 1025 // direction = { = if d = 0 1026 // { > if d < 0 1027 // 1028 // Return true if dependence disproved. 1029 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1030 const SCEV *SrcConst, 1031 const SCEV *DstConst, 1032 const Loop *CurLoop, 1033 unsigned Level, 1034 FullDependence &Result, 1035 Constraint &NewConstraint) const { 1036 DEBUG(dbgs() << "\tStrong SIV test\n"); 1037 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1038 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1039 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1040 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1041 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1042 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1043 ++StrongSIVapplications; 1044 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1045 Level--; 1046 1047 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1048 DEBUG(dbgs() << "\t Delta = " << *Delta); 1049 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1050 1051 // check that |Delta| < iteration count 1052 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1053 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1054 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1055 const SCEV *AbsDelta = 1056 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1057 const SCEV *AbsCoeff = 1058 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1059 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1060 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1061 // Distance greater than trip count - no dependence 1062 ++StrongSIVindependence; 1063 ++StrongSIVsuccesses; 1064 return true; 1065 } 1066 } 1067 1068 // Can we compute distance? 1069 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1070 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1071 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1072 APInt Distance = ConstDelta; // these need to be initialized 1073 APInt Remainder = ConstDelta; 1074 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1075 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1076 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1077 // Make sure Coeff divides Delta exactly 1078 if (Remainder != 0) { 1079 // Coeff doesn't divide Distance, no dependence 1080 ++StrongSIVindependence; 1081 ++StrongSIVsuccesses; 1082 return true; 1083 } 1084 Result.DV[Level].Distance = SE->getConstant(Distance); 1085 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1086 if (Distance.sgt(0)) 1087 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1088 else if (Distance.slt(0)) 1089 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1090 else 1091 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1092 ++StrongSIVsuccesses; 1093 } 1094 else if (Delta->isZero()) { 1095 // since 0/X == 0 1096 Result.DV[Level].Distance = Delta; 1097 NewConstraint.setDistance(Delta, CurLoop); 1098 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1099 ++StrongSIVsuccesses; 1100 } 1101 else { 1102 if (Coeff->isOne()) { 1103 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1104 Result.DV[Level].Distance = Delta; // since X/1 == X 1105 NewConstraint.setDistance(Delta, CurLoop); 1106 } 1107 else { 1108 Result.Consistent = false; 1109 NewConstraint.setLine(Coeff, 1110 SE->getNegativeSCEV(Coeff), 1111 SE->getNegativeSCEV(Delta), CurLoop); 1112 } 1113 1114 // maybe we can get a useful direction 1115 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1116 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1117 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1118 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1119 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1120 // The double negatives above are confusing. 1121 // It helps to read !SE->isKnownNonZero(Delta) 1122 // as "Delta might be Zero" 1123 unsigned NewDirection = Dependence::DVEntry::NONE; 1124 if ((DeltaMaybePositive && CoeffMaybePositive) || 1125 (DeltaMaybeNegative && CoeffMaybeNegative)) 1126 NewDirection = Dependence::DVEntry::LT; 1127 if (DeltaMaybeZero) 1128 NewDirection |= Dependence::DVEntry::EQ; 1129 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1130 (DeltaMaybePositive && CoeffMaybeNegative)) 1131 NewDirection |= Dependence::DVEntry::GT; 1132 if (NewDirection < Result.DV[Level].Direction) 1133 ++StrongSIVsuccesses; 1134 Result.DV[Level].Direction &= NewDirection; 1135 } 1136 return false; 1137 } 1138 1139 1140 // weakCrossingSIVtest - 1141 // From the paper, Practical Dependence Testing, Section 4.2.2 1142 // 1143 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1144 // where i is an induction variable, c1 and c2 are loop invariant, 1145 // and a is a constant, we can solve it exactly using the 1146 // Weak-Crossing SIV test. 1147 // 1148 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1149 // the two lines, where i = i', yielding 1150 // 1151 // c1 + a*i = c2 - a*i 1152 // 2a*i = c2 - c1 1153 // i = (c2 - c1)/2a 1154 // 1155 // If i < 0, there is no dependence. 1156 // If i > upperbound, there is no dependence. 1157 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1158 // If i = upperbound, there's a dependence with distance = 0. 1159 // If i is integral, there's a dependence (all directions). 1160 // If the non-integer part = 1/2, there's a dependence (<> directions). 1161 // Otherwise, there's no dependence. 1162 // 1163 // Can prove independence. Failing that, 1164 // can sometimes refine the directions. 1165 // Can determine iteration for splitting. 1166 // 1167 // Return true if dependence disproved. 1168 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1169 const SCEV *SrcConst, 1170 const SCEV *DstConst, 1171 const Loop *CurLoop, 1172 unsigned Level, 1173 FullDependence &Result, 1174 Constraint &NewConstraint, 1175 const SCEV *&SplitIter) const { 1176 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1177 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1178 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1179 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1180 ++WeakCrossingSIVapplications; 1181 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1182 Level--; 1183 Result.Consistent = false; 1184 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1185 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1186 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1187 if (Delta->isZero()) { 1188 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1189 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1190 ++WeakCrossingSIVsuccesses; 1191 if (!Result.DV[Level].Direction) { 1192 ++WeakCrossingSIVindependence; 1193 return true; 1194 } 1195 Result.DV[Level].Distance = Delta; // = 0 1196 return false; 1197 } 1198 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1199 if (!ConstCoeff) 1200 return false; 1201 1202 Result.DV[Level].Splitable = true; 1203 if (SE->isKnownNegative(ConstCoeff)) { 1204 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1205 assert(ConstCoeff && 1206 "dynamic cast of negative of ConstCoeff should yield constant"); 1207 Delta = SE->getNegativeSCEV(Delta); 1208 } 1209 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1210 1211 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1212 SplitIter = 1213 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1214 Delta), 1215 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1216 ConstCoeff)); 1217 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1218 1219 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1220 if (!ConstDelta) 1221 return false; 1222 1223 // We're certain that ConstCoeff > 0; therefore, 1224 // if Delta < 0, then no dependence. 1225 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1226 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1227 if (SE->isKnownNegative(Delta)) { 1228 // No dependence, Delta < 0 1229 ++WeakCrossingSIVindependence; 1230 ++WeakCrossingSIVsuccesses; 1231 return true; 1232 } 1233 1234 // We're certain that Delta > 0 and ConstCoeff > 0. 1235 // Check Delta/(2*ConstCoeff) against upper loop bound 1236 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1237 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1238 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1239 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1240 ConstantTwo); 1241 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1242 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1243 // Delta too big, no dependence 1244 ++WeakCrossingSIVindependence; 1245 ++WeakCrossingSIVsuccesses; 1246 return true; 1247 } 1248 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1249 // i = i' = UB 1250 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1251 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1252 ++WeakCrossingSIVsuccesses; 1253 if (!Result.DV[Level].Direction) { 1254 ++WeakCrossingSIVindependence; 1255 return true; 1256 } 1257 Result.DV[Level].Splitable = false; 1258 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1259 return false; 1260 } 1261 } 1262 1263 // check that Coeff divides Delta 1264 APInt APDelta = ConstDelta->getValue()->getValue(); 1265 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1266 APInt Distance = APDelta; // these need to be initialzed 1267 APInt Remainder = APDelta; 1268 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1269 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1270 if (Remainder != 0) { 1271 // Coeff doesn't divide Delta, no dependence 1272 ++WeakCrossingSIVindependence; 1273 ++WeakCrossingSIVsuccesses; 1274 return true; 1275 } 1276 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1277 1278 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1279 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1280 Remainder = Distance.srem(Two); 1281 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1282 if (Remainder != 0) { 1283 // Equal direction isn't possible 1284 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1285 ++WeakCrossingSIVsuccesses; 1286 } 1287 return false; 1288 } 1289 1290 1291 // Kirch's algorithm, from 1292 // 1293 // Optimizing Supercompilers for Supercomputers 1294 // Michael Wolfe 1295 // MIT Press, 1989 1296 // 1297 // Program 2.1, page 29. 1298 // Computes the GCD of AM and BM. 1299 // Also finds a solution to the equation ax - by = gcd(a, b). 1300 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1301 static 1302 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1303 APInt &G, APInt &X, APInt &Y) { 1304 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1305 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1306 APInt G0 = AM.abs(); 1307 APInt G1 = BM.abs(); 1308 APInt Q = G0; // these need to be initialized 1309 APInt R = G0; 1310 APInt::sdivrem(G0, G1, Q, R); 1311 while (R != 0) { 1312 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1313 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1314 G0 = G1; G1 = R; 1315 APInt::sdivrem(G0, G1, Q, R); 1316 } 1317 G = G1; 1318 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1319 X = AM.slt(0) ? -A1 : A1; 1320 Y = BM.slt(0) ? B1 : -B1; 1321 1322 // make sure gcd divides Delta 1323 R = Delta.srem(G); 1324 if (R != 0) 1325 return true; // gcd doesn't divide Delta, no dependence 1326 Q = Delta.sdiv(G); 1327 X *= Q; 1328 Y *= Q; 1329 return false; 1330 } 1331 1332 1333 static 1334 APInt floorOfQuotient(APInt A, APInt B) { 1335 APInt Q = A; // these need to be initialized 1336 APInt R = A; 1337 APInt::sdivrem(A, B, Q, R); 1338 if (R == 0) 1339 return Q; 1340 if ((A.sgt(0) && B.sgt(0)) || 1341 (A.slt(0) && B.slt(0))) 1342 return Q; 1343 else 1344 return Q - 1; 1345 } 1346 1347 1348 static 1349 APInt ceilingOfQuotient(APInt A, APInt B) { 1350 APInt Q = A; // these need to be initialized 1351 APInt R = A; 1352 APInt::sdivrem(A, B, Q, R); 1353 if (R == 0) 1354 return Q; 1355 if ((A.sgt(0) && B.sgt(0)) || 1356 (A.slt(0) && B.slt(0))) 1357 return Q + 1; 1358 else 1359 return Q; 1360 } 1361 1362 1363 static 1364 APInt maxAPInt(APInt A, APInt B) { 1365 return A.sgt(B) ? A : B; 1366 } 1367 1368 1369 static 1370 APInt minAPInt(APInt A, APInt B) { 1371 return A.slt(B) ? A : B; 1372 } 1373 1374 1375 // exactSIVtest - 1376 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1377 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1378 // and a2 are constant, we can solve it exactly using an algorithm developed 1379 // by Banerjee and Wolfe. See Section 2.5.3 in 1380 // 1381 // Optimizing Supercompilers for Supercomputers 1382 // Michael Wolfe 1383 // MIT Press, 1989 1384 // 1385 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1386 // so use them if possible. They're also a bit better with symbolics and, 1387 // in the case of the strong SIV test, can compute Distances. 1388 // 1389 // Return true if dependence disproved. 1390 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1391 const SCEV *DstCoeff, 1392 const SCEV *SrcConst, 1393 const SCEV *DstConst, 1394 const Loop *CurLoop, 1395 unsigned Level, 1396 FullDependence &Result, 1397 Constraint &NewConstraint) const { 1398 DEBUG(dbgs() << "\tExact SIV test\n"); 1399 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1400 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1401 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1402 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1403 ++ExactSIVapplications; 1404 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1405 Level--; 1406 Result.Consistent = false; 1407 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1408 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1409 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1410 Delta, CurLoop); 1411 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1412 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1413 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1414 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1415 return false; 1416 1417 // find gcd 1418 APInt G, X, Y; 1419 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1420 APInt BM = ConstDstCoeff->getValue()->getValue(); 1421 unsigned Bits = AM.getBitWidth(); 1422 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1423 // gcd doesn't divide Delta, no dependence 1424 ++ExactSIVindependence; 1425 ++ExactSIVsuccesses; 1426 return true; 1427 } 1428 1429 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1430 1431 // since SCEV construction normalizes, LM = 0 1432 APInt UM(Bits, 1, true); 1433 bool UMvalid = false; 1434 // UM is perhaps unavailable, let's check 1435 if (const SCEVConstant *CUB = 1436 collectConstantUpperBound(CurLoop, Delta->getType())) { 1437 UM = CUB->getValue()->getValue(); 1438 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1439 UMvalid = true; 1440 } 1441 1442 APInt TU(APInt::getSignedMaxValue(Bits)); 1443 APInt TL(APInt::getSignedMinValue(Bits)); 1444 1445 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1446 APInt TMUL = BM.sdiv(G); 1447 if (TMUL.sgt(0)) { 1448 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1449 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1450 if (UMvalid) { 1451 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1452 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1453 } 1454 } 1455 else { 1456 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1457 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1458 if (UMvalid) { 1459 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1460 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1461 } 1462 } 1463 1464 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1465 TMUL = AM.sdiv(G); 1466 if (TMUL.sgt(0)) { 1467 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1468 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1469 if (UMvalid) { 1470 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1471 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1472 } 1473 } 1474 else { 1475 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1476 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1477 if (UMvalid) { 1478 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1479 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1480 } 1481 } 1482 if (TL.sgt(TU)) { 1483 ++ExactSIVindependence; 1484 ++ExactSIVsuccesses; 1485 return true; 1486 } 1487 1488 // explore directions 1489 unsigned NewDirection = Dependence::DVEntry::NONE; 1490 1491 // less than 1492 APInt SaveTU(TU); // save these 1493 APInt SaveTL(TL); 1494 DEBUG(dbgs() << "\t exploring LT direction\n"); 1495 TMUL = AM - BM; 1496 if (TMUL.sgt(0)) { 1497 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1498 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1499 } 1500 else { 1501 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1502 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1503 } 1504 if (TL.sle(TU)) { 1505 NewDirection |= Dependence::DVEntry::LT; 1506 ++ExactSIVsuccesses; 1507 } 1508 1509 // equal 1510 TU = SaveTU; // restore 1511 TL = SaveTL; 1512 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1513 if (TMUL.sgt(0)) { 1514 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1515 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1516 } 1517 else { 1518 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1519 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1520 } 1521 TMUL = BM - AM; 1522 if (TMUL.sgt(0)) { 1523 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1524 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1525 } 1526 else { 1527 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1528 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1529 } 1530 if (TL.sle(TU)) { 1531 NewDirection |= Dependence::DVEntry::EQ; 1532 ++ExactSIVsuccesses; 1533 } 1534 1535 // greater than 1536 TU = SaveTU; // restore 1537 TL = SaveTL; 1538 DEBUG(dbgs() << "\t exploring GT direction\n"); 1539 if (TMUL.sgt(0)) { 1540 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1541 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1542 } 1543 else { 1544 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1545 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1546 } 1547 if (TL.sle(TU)) { 1548 NewDirection |= Dependence::DVEntry::GT; 1549 ++ExactSIVsuccesses; 1550 } 1551 1552 // finished 1553 Result.DV[Level].Direction &= NewDirection; 1554 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1555 ++ExactSIVindependence; 1556 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1557 } 1558 1559 1560 1561 // Return true if the divisor evenly divides the dividend. 1562 static 1563 bool isRemainderZero(const SCEVConstant *Dividend, 1564 const SCEVConstant *Divisor) { 1565 APInt ConstDividend = Dividend->getValue()->getValue(); 1566 APInt ConstDivisor = Divisor->getValue()->getValue(); 1567 return ConstDividend.srem(ConstDivisor) == 0; 1568 } 1569 1570 1571 // weakZeroSrcSIVtest - 1572 // From the paper, Practical Dependence Testing, Section 4.2.2 1573 // 1574 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1575 // where i is an induction variable, c1 and c2 are loop invariant, 1576 // and a is a constant, we can solve it exactly using the 1577 // Weak-Zero SIV test. 1578 // 1579 // Given 1580 // 1581 // c1 = c2 + a*i 1582 // 1583 // we get 1584 // 1585 // (c1 - c2)/a = i 1586 // 1587 // If i is not an integer, there's no dependence. 1588 // If i < 0 or > UB, there's no dependence. 1589 // If i = 0, the direction is <= and peeling the 1590 // 1st iteration will break the dependence. 1591 // If i = UB, the direction is >= and peeling the 1592 // last iteration will break the dependence. 1593 // Otherwise, the direction is *. 1594 // 1595 // Can prove independence. Failing that, we can sometimes refine 1596 // the directions. Can sometimes show that first or last 1597 // iteration carries all the dependences (so worth peeling). 1598 // 1599 // (see also weakZeroDstSIVtest) 1600 // 1601 // Return true if dependence disproved. 1602 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1603 const SCEV *SrcConst, 1604 const SCEV *DstConst, 1605 const Loop *CurLoop, 1606 unsigned Level, 1607 FullDependence &Result, 1608 Constraint &NewConstraint) const { 1609 // For the WeakSIV test, it's possible the loop isn't common to 1610 // the Src and Dst loops. If it isn't, then there's no need to 1611 // record a direction. 1612 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1613 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1614 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1615 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1616 ++WeakZeroSIVapplications; 1617 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1618 Level--; 1619 Result.Consistent = false; 1620 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1621 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1622 DstCoeff, Delta, CurLoop); 1623 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1624 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1625 if (Level < CommonLevels) { 1626 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1627 Result.DV[Level].PeelFirst = true; 1628 ++WeakZeroSIVsuccesses; 1629 } 1630 return false; // dependences caused by first iteration 1631 } 1632 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1633 if (!ConstCoeff) 1634 return false; 1635 const SCEV *AbsCoeff = 1636 SE->isKnownNegative(ConstCoeff) ? 1637 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1638 const SCEV *NewDelta = 1639 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1640 1641 // check that Delta/SrcCoeff < iteration count 1642 // really check NewDelta < count*AbsCoeff 1643 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1644 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1645 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1646 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1647 ++WeakZeroSIVindependence; 1648 ++WeakZeroSIVsuccesses; 1649 return true; 1650 } 1651 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1652 // dependences caused by last iteration 1653 if (Level < CommonLevels) { 1654 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1655 Result.DV[Level].PeelLast = true; 1656 ++WeakZeroSIVsuccesses; 1657 } 1658 return false; 1659 } 1660 } 1661 1662 // check that Delta/SrcCoeff >= 0 1663 // really check that NewDelta >= 0 1664 if (SE->isKnownNegative(NewDelta)) { 1665 // No dependence, newDelta < 0 1666 ++WeakZeroSIVindependence; 1667 ++WeakZeroSIVsuccesses; 1668 return true; 1669 } 1670 1671 // if SrcCoeff doesn't divide Delta, then no dependence 1672 if (isa<SCEVConstant>(Delta) && 1673 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1674 ++WeakZeroSIVindependence; 1675 ++WeakZeroSIVsuccesses; 1676 return true; 1677 } 1678 return false; 1679 } 1680 1681 1682 // weakZeroDstSIVtest - 1683 // From the paper, Practical Dependence Testing, Section 4.2.2 1684 // 1685 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1686 // where i is an induction variable, c1 and c2 are loop invariant, 1687 // and a is a constant, we can solve it exactly using the 1688 // Weak-Zero SIV test. 1689 // 1690 // Given 1691 // 1692 // c1 + a*i = c2 1693 // 1694 // we get 1695 // 1696 // i = (c2 - c1)/a 1697 // 1698 // If i is not an integer, there's no dependence. 1699 // If i < 0 or > UB, there's no dependence. 1700 // If i = 0, the direction is <= and peeling the 1701 // 1st iteration will break the dependence. 1702 // If i = UB, the direction is >= and peeling the 1703 // last iteration will break the dependence. 1704 // Otherwise, the direction is *. 1705 // 1706 // Can prove independence. Failing that, we can sometimes refine 1707 // the directions. Can sometimes show that first or last 1708 // iteration carries all the dependences (so worth peeling). 1709 // 1710 // (see also weakZeroSrcSIVtest) 1711 // 1712 // Return true if dependence disproved. 1713 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1714 const SCEV *SrcConst, 1715 const SCEV *DstConst, 1716 const Loop *CurLoop, 1717 unsigned Level, 1718 FullDependence &Result, 1719 Constraint &NewConstraint) const { 1720 // For the WeakSIV test, it's possible the loop isn't common to the 1721 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1722 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1723 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1724 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1725 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1726 ++WeakZeroSIVapplications; 1727 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1728 Level--; 1729 Result.Consistent = false; 1730 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1731 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1732 Delta, CurLoop); 1733 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1734 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1735 if (Level < CommonLevels) { 1736 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1737 Result.DV[Level].PeelFirst = true; 1738 ++WeakZeroSIVsuccesses; 1739 } 1740 return false; // dependences caused by first iteration 1741 } 1742 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1743 if (!ConstCoeff) 1744 return false; 1745 const SCEV *AbsCoeff = 1746 SE->isKnownNegative(ConstCoeff) ? 1747 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1748 const SCEV *NewDelta = 1749 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1750 1751 // check that Delta/SrcCoeff < iteration count 1752 // really check NewDelta < count*AbsCoeff 1753 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1754 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1755 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1756 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1757 ++WeakZeroSIVindependence; 1758 ++WeakZeroSIVsuccesses; 1759 return true; 1760 } 1761 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1762 // dependences caused by last iteration 1763 if (Level < CommonLevels) { 1764 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1765 Result.DV[Level].PeelLast = true; 1766 ++WeakZeroSIVsuccesses; 1767 } 1768 return false; 1769 } 1770 } 1771 1772 // check that Delta/SrcCoeff >= 0 1773 // really check that NewDelta >= 0 1774 if (SE->isKnownNegative(NewDelta)) { 1775 // No dependence, newDelta < 0 1776 ++WeakZeroSIVindependence; 1777 ++WeakZeroSIVsuccesses; 1778 return true; 1779 } 1780 1781 // if SrcCoeff doesn't divide Delta, then no dependence 1782 if (isa<SCEVConstant>(Delta) && 1783 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1784 ++WeakZeroSIVindependence; 1785 ++WeakZeroSIVsuccesses; 1786 return true; 1787 } 1788 return false; 1789 } 1790 1791 1792 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1793 // Things of the form [c1 + a*i] and [c2 + b*j], 1794 // where i and j are induction variable, c1 and c2 are loop invariant, 1795 // and a and b are constants. 1796 // Returns true if any possible dependence is disproved. 1797 // Marks the result as inconsistent. 1798 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1799 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1800 const SCEV *DstCoeff, 1801 const SCEV *SrcConst, 1802 const SCEV *DstConst, 1803 const Loop *SrcLoop, 1804 const Loop *DstLoop, 1805 FullDependence &Result) const { 1806 DEBUG(dbgs() << "\tExact RDIV test\n"); 1807 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1808 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1809 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1810 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1811 ++ExactRDIVapplications; 1812 Result.Consistent = false; 1813 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1814 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1815 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1816 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1817 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1818 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1819 return false; 1820 1821 // find gcd 1822 APInt G, X, Y; 1823 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1824 APInt BM = ConstDstCoeff->getValue()->getValue(); 1825 unsigned Bits = AM.getBitWidth(); 1826 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1827 // gcd doesn't divide Delta, no dependence 1828 ++ExactRDIVindependence; 1829 return true; 1830 } 1831 1832 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1833 1834 // since SCEV construction seems to normalize, LM = 0 1835 APInt SrcUM(Bits, 1, true); 1836 bool SrcUMvalid = false; 1837 // SrcUM is perhaps unavailable, let's check 1838 if (const SCEVConstant *UpperBound = 1839 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1840 SrcUM = UpperBound->getValue()->getValue(); 1841 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1842 SrcUMvalid = true; 1843 } 1844 1845 APInt DstUM(Bits, 1, true); 1846 bool DstUMvalid = false; 1847 // UM is perhaps unavailable, let's check 1848 if (const SCEVConstant *UpperBound = 1849 collectConstantUpperBound(DstLoop, Delta->getType())) { 1850 DstUM = UpperBound->getValue()->getValue(); 1851 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1852 DstUMvalid = true; 1853 } 1854 1855 APInt TU(APInt::getSignedMaxValue(Bits)); 1856 APInt TL(APInt::getSignedMinValue(Bits)); 1857 1858 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1859 APInt TMUL = BM.sdiv(G); 1860 if (TMUL.sgt(0)) { 1861 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1862 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1863 if (SrcUMvalid) { 1864 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1865 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1866 } 1867 } 1868 else { 1869 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1870 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1871 if (SrcUMvalid) { 1872 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1873 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1874 } 1875 } 1876 1877 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1878 TMUL = AM.sdiv(G); 1879 if (TMUL.sgt(0)) { 1880 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1881 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1882 if (DstUMvalid) { 1883 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1884 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1885 } 1886 } 1887 else { 1888 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1889 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1890 if (DstUMvalid) { 1891 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1892 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1893 } 1894 } 1895 if (TL.sgt(TU)) 1896 ++ExactRDIVindependence; 1897 return TL.sgt(TU); 1898 } 1899 1900 1901 // symbolicRDIVtest - 1902 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1903 // introduce a special case of Banerjee's Inequalities (also called the 1904 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1905 // particularly cases with symbolics. Since it's only able to disprove 1906 // dependence (not compute distances or directions), we'll use it as a 1907 // fall back for the other tests. 1908 // 1909 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1910 // where i and j are induction variables and c1 and c2 are loop invariants, 1911 // we can use the symbolic tests to disprove some dependences, serving as a 1912 // backup for the RDIV test. Note that i and j can be the same variable, 1913 // letting this test serve as a backup for the various SIV tests. 1914 // 1915 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1916 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1917 // loop bounds for the i and j loops, respectively. So, ... 1918 // 1919 // c1 + a1*i = c2 + a2*j 1920 // a1*i - a2*j = c2 - c1 1921 // 1922 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1923 // range of the maximum and minimum possible values of a1*i - a2*j. 1924 // Considering the signs of a1 and a2, we have 4 possible cases: 1925 // 1926 // 1) If a1 >= 0 and a2 >= 0, then 1927 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1928 // -a2*N2 <= c2 - c1 <= a1*N1 1929 // 1930 // 2) If a1 >= 0 and a2 <= 0, then 1931 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1932 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1933 // 1934 // 3) If a1 <= 0 and a2 >= 0, then 1935 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1936 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1937 // 1938 // 4) If a1 <= 0 and a2 <= 0, then 1939 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1940 // a1*N1 <= c2 - c1 <= -a2*N2 1941 // 1942 // return true if dependence disproved 1943 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1944 const SCEV *A2, 1945 const SCEV *C1, 1946 const SCEV *C2, 1947 const Loop *Loop1, 1948 const Loop *Loop2) const { 1949 ++SymbolicRDIVapplications; 1950 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1951 DEBUG(dbgs() << "\t A1 = " << *A1); 1952 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1953 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1954 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1955 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1956 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1957 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1958 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1959 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1960 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1961 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1962 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1963 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1964 if (SE->isKnownNonNegative(A1)) { 1965 if (SE->isKnownNonNegative(A2)) { 1966 // A1 >= 0 && A2 >= 0 1967 if (N1) { 1968 // make sure that c2 - c1 <= a1*N1 1969 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1970 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1971 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1972 ++SymbolicRDIVindependence; 1973 return true; 1974 } 1975 } 1976 if (N2) { 1977 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1978 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1979 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1980 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1981 ++SymbolicRDIVindependence; 1982 return true; 1983 } 1984 } 1985 } 1986 else if (SE->isKnownNonPositive(A2)) { 1987 // a1 >= 0 && a2 <= 0 1988 if (N1 && N2) { 1989 // make sure that c2 - c1 <= a1*N1 - a2*N2 1990 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1991 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1992 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1993 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1994 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1995 ++SymbolicRDIVindependence; 1996 return true; 1997 } 1998 } 1999 // make sure that 0 <= c2 - c1 2000 if (SE->isKnownNegative(C2_C1)) { 2001 ++SymbolicRDIVindependence; 2002 return true; 2003 } 2004 } 2005 } 2006 else if (SE->isKnownNonPositive(A1)) { 2007 if (SE->isKnownNonNegative(A2)) { 2008 // a1 <= 0 && a2 >= 0 2009 if (N1 && N2) { 2010 // make sure that a1*N1 - a2*N2 <= c2 - c1 2011 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2012 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2013 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2014 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2015 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2016 ++SymbolicRDIVindependence; 2017 return true; 2018 } 2019 } 2020 // make sure that c2 - c1 <= 0 2021 if (SE->isKnownPositive(C2_C1)) { 2022 ++SymbolicRDIVindependence; 2023 return true; 2024 } 2025 } 2026 else if (SE->isKnownNonPositive(A2)) { 2027 // a1 <= 0 && a2 <= 0 2028 if (N1) { 2029 // make sure that a1*N1 <= c2 - c1 2030 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2031 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2032 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2033 ++SymbolicRDIVindependence; 2034 return true; 2035 } 2036 } 2037 if (N2) { 2038 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2039 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2040 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2041 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2042 ++SymbolicRDIVindependence; 2043 return true; 2044 } 2045 } 2046 } 2047 } 2048 return false; 2049 } 2050 2051 2052 // testSIV - 2053 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2054 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2055 // a2 are constant, we attack it with an SIV test. While they can all be 2056 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2057 // they apply; they're cheaper and sometimes more precise. 2058 // 2059 // Return true if dependence disproved. 2060 bool DependenceAnalysis::testSIV(const SCEV *Src, 2061 const SCEV *Dst, 2062 unsigned &Level, 2063 FullDependence &Result, 2064 Constraint &NewConstraint, 2065 const SCEV *&SplitIter) const { 2066 DEBUG(dbgs() << " src = " << *Src << "\n"); 2067 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2068 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2069 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2070 if (SrcAddRec && DstAddRec) { 2071 const SCEV *SrcConst = SrcAddRec->getStart(); 2072 const SCEV *DstConst = DstAddRec->getStart(); 2073 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2074 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2075 const Loop *CurLoop = SrcAddRec->getLoop(); 2076 assert(CurLoop == DstAddRec->getLoop() && 2077 "both loops in SIV should be same"); 2078 Level = mapSrcLoop(CurLoop); 2079 bool disproven; 2080 if (SrcCoeff == DstCoeff) 2081 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2082 Level, Result, NewConstraint); 2083 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2084 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2085 Level, Result, NewConstraint, SplitIter); 2086 else 2087 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2088 Level, Result, NewConstraint); 2089 return disproven || 2090 gcdMIVtest(Src, Dst, Result) || 2091 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2092 } 2093 if (SrcAddRec) { 2094 const SCEV *SrcConst = SrcAddRec->getStart(); 2095 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2096 const SCEV *DstConst = Dst; 2097 const Loop *CurLoop = SrcAddRec->getLoop(); 2098 Level = mapSrcLoop(CurLoop); 2099 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2100 Level, Result, NewConstraint) || 2101 gcdMIVtest(Src, Dst, Result); 2102 } 2103 if (DstAddRec) { 2104 const SCEV *DstConst = DstAddRec->getStart(); 2105 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2106 const SCEV *SrcConst = Src; 2107 const Loop *CurLoop = DstAddRec->getLoop(); 2108 Level = mapDstLoop(CurLoop); 2109 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2110 CurLoop, Level, Result, NewConstraint) || 2111 gcdMIVtest(Src, Dst, Result); 2112 } 2113 llvm_unreachable("SIV test expected at least one AddRec"); 2114 return false; 2115 } 2116 2117 2118 // testRDIV - 2119 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2120 // where i and j are induction variables, c1 and c2 are loop invariant, 2121 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2122 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2123 // It doesn't make sense to talk about distance or direction in this case, 2124 // so there's no point in making special versions of the Strong SIV test or 2125 // the Weak-crossing SIV test. 2126 // 2127 // With minor algebra, this test can also be used for things like 2128 // [c1 + a1*i + a2*j][c2]. 2129 // 2130 // Return true if dependence disproved. 2131 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2132 const SCEV *Dst, 2133 FullDependence &Result) const { 2134 // we have 3 possible situations here: 2135 // 1) [a*i + b] and [c*j + d] 2136 // 2) [a*i + c*j + b] and [d] 2137 // 3) [b] and [a*i + c*j + d] 2138 // We need to find what we've got and get organized 2139 2140 const SCEV *SrcConst, *DstConst; 2141 const SCEV *SrcCoeff, *DstCoeff; 2142 const Loop *SrcLoop, *DstLoop; 2143 2144 DEBUG(dbgs() << " src = " << *Src << "\n"); 2145 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2146 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2147 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2148 if (SrcAddRec && DstAddRec) { 2149 SrcConst = SrcAddRec->getStart(); 2150 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2151 SrcLoop = SrcAddRec->getLoop(); 2152 DstConst = DstAddRec->getStart(); 2153 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2154 DstLoop = DstAddRec->getLoop(); 2155 } 2156 else if (SrcAddRec) { 2157 if (const SCEVAddRecExpr *tmpAddRec = 2158 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2159 SrcConst = tmpAddRec->getStart(); 2160 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2161 SrcLoop = tmpAddRec->getLoop(); 2162 DstConst = Dst; 2163 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2164 DstLoop = SrcAddRec->getLoop(); 2165 } 2166 else 2167 llvm_unreachable("RDIV reached by surprising SCEVs"); 2168 } 2169 else if (DstAddRec) { 2170 if (const SCEVAddRecExpr *tmpAddRec = 2171 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2172 DstConst = tmpAddRec->getStart(); 2173 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2174 DstLoop = tmpAddRec->getLoop(); 2175 SrcConst = Src; 2176 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2177 SrcLoop = DstAddRec->getLoop(); 2178 } 2179 else 2180 llvm_unreachable("RDIV reached by surprising SCEVs"); 2181 } 2182 else 2183 llvm_unreachable("RDIV expected at least one AddRec"); 2184 return exactRDIVtest(SrcCoeff, DstCoeff, 2185 SrcConst, DstConst, 2186 SrcLoop, DstLoop, 2187 Result) || 2188 gcdMIVtest(Src, Dst, Result) || 2189 symbolicRDIVtest(SrcCoeff, DstCoeff, 2190 SrcConst, DstConst, 2191 SrcLoop, DstLoop); 2192 } 2193 2194 2195 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2196 // Return true if dependence disproved. 2197 // Can sometimes refine direction vectors. 2198 bool DependenceAnalysis::testMIV(const SCEV *Src, 2199 const SCEV *Dst, 2200 const SmallBitVector &Loops, 2201 FullDependence &Result) const { 2202 DEBUG(dbgs() << " src = " << *Src << "\n"); 2203 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2204 Result.Consistent = false; 2205 return gcdMIVtest(Src, Dst, Result) || 2206 banerjeeMIVtest(Src, Dst, Loops, Result); 2207 } 2208 2209 2210 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2211 // in this case 10. If there is no constant part, returns NULL. 2212 static 2213 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2214 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2215 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2216 return Constant; 2217 } 2218 return nullptr; 2219 } 2220 2221 2222 //===----------------------------------------------------------------------===// 2223 // gcdMIVtest - 2224 // Tests an MIV subscript pair for dependence. 2225 // Returns true if any possible dependence is disproved. 2226 // Marks the result as inconsistent. 2227 // Can sometimes disprove the equal direction for 1 or more loops, 2228 // as discussed in Michael Wolfe's book, 2229 // High Performance Compilers for Parallel Computing, page 235. 2230 // 2231 // We spend some effort (code!) to handle cases like 2232 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2233 // but M and N are just loop-invariant variables. 2234 // This should help us handle linearized subscripts; 2235 // also makes this test a useful backup to the various SIV tests. 2236 // 2237 // It occurs to me that the presence of loop-invariant variables 2238 // changes the nature of the test from "greatest common divisor" 2239 // to "a common divisor". 2240 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2241 const SCEV *Dst, 2242 FullDependence &Result) const { 2243 DEBUG(dbgs() << "starting gcd\n"); 2244 ++GCDapplications; 2245 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2246 APInt RunningGCD = APInt::getNullValue(BitWidth); 2247 2248 // Examine Src coefficients. 2249 // Compute running GCD and record source constant. 2250 // Because we're looking for the constant at the end of the chain, 2251 // we can't quit the loop just because the GCD == 1. 2252 const SCEV *Coefficients = Src; 2253 while (const SCEVAddRecExpr *AddRec = 2254 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2255 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2256 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2257 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2258 // If the coefficient is the product of a constant and other stuff, 2259 // we can use the constant in the GCD computation. 2260 Constant = getConstantPart(Product); 2261 if (!Constant) 2262 return false; 2263 APInt ConstCoeff = Constant->getValue()->getValue(); 2264 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2265 Coefficients = AddRec->getStart(); 2266 } 2267 const SCEV *SrcConst = Coefficients; 2268 2269 // Examine Dst coefficients. 2270 // Compute running GCD and record destination constant. 2271 // Because we're looking for the constant at the end of the chain, 2272 // we can't quit the loop just because the GCD == 1. 2273 Coefficients = Dst; 2274 while (const SCEVAddRecExpr *AddRec = 2275 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2276 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2277 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2278 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2279 // If the coefficient is the product of a constant and other stuff, 2280 // we can use the constant in the GCD computation. 2281 Constant = getConstantPart(Product); 2282 if (!Constant) 2283 return false; 2284 APInt ConstCoeff = Constant->getValue()->getValue(); 2285 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2286 Coefficients = AddRec->getStart(); 2287 } 2288 const SCEV *DstConst = Coefficients; 2289 2290 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2291 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2292 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2293 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2294 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2295 // If Delta is a sum of products, we may be able to make further progress. 2296 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2297 const SCEV *Operand = Sum->getOperand(Op); 2298 if (isa<SCEVConstant>(Operand)) { 2299 assert(!Constant && "Surprised to find multiple constants"); 2300 Constant = cast<SCEVConstant>(Operand); 2301 } 2302 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2303 // Search for constant operand to participate in GCD; 2304 // If none found; return false. 2305 const SCEVConstant *ConstOp = getConstantPart(Product); 2306 if (!ConstOp) 2307 return false; 2308 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2309 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2310 ConstOpValue.abs()); 2311 } 2312 else 2313 return false; 2314 } 2315 } 2316 if (!Constant) 2317 return false; 2318 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2319 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2320 if (ConstDelta == 0) 2321 return false; 2322 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2323 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2324 APInt Remainder = ConstDelta.srem(RunningGCD); 2325 if (Remainder != 0) { 2326 ++GCDindependence; 2327 return true; 2328 } 2329 2330 // Try to disprove equal directions. 2331 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2332 // the code above can't disprove the dependence because the GCD = 1. 2333 // So we consider what happen if i = i' and what happens if j = j'. 2334 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2335 // which is infeasible, so we can disallow the = direction for the i level. 2336 // Setting j = j' doesn't help matters, so we end up with a direction vector 2337 // of [<>, *] 2338 // 2339 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2340 // we need to remember that the constant part is 5 and the RunningGCD should 2341 // be initialized to ExtraGCD = 30. 2342 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2343 2344 bool Improved = false; 2345 Coefficients = Src; 2346 while (const SCEVAddRecExpr *AddRec = 2347 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2348 Coefficients = AddRec->getStart(); 2349 const Loop *CurLoop = AddRec->getLoop(); 2350 RunningGCD = ExtraGCD; 2351 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2352 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2353 const SCEV *Inner = Src; 2354 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2355 AddRec = cast<SCEVAddRecExpr>(Inner); 2356 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2357 if (CurLoop == AddRec->getLoop()) 2358 ; // SrcCoeff == Coeff 2359 else { 2360 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2361 // If the coefficient is the product of a constant and other stuff, 2362 // we can use the constant in the GCD computation. 2363 Constant = getConstantPart(Product); 2364 else 2365 Constant = cast<SCEVConstant>(Coeff); 2366 APInt ConstCoeff = Constant->getValue()->getValue(); 2367 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2368 } 2369 Inner = AddRec->getStart(); 2370 } 2371 Inner = Dst; 2372 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2373 AddRec = cast<SCEVAddRecExpr>(Inner); 2374 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2375 if (CurLoop == AddRec->getLoop()) 2376 DstCoeff = Coeff; 2377 else { 2378 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2379 // If the coefficient is the product of a constant and other stuff, 2380 // we can use the constant in the GCD computation. 2381 Constant = getConstantPart(Product); 2382 else 2383 Constant = cast<SCEVConstant>(Coeff); 2384 APInt ConstCoeff = Constant->getValue()->getValue(); 2385 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2386 } 2387 Inner = AddRec->getStart(); 2388 } 2389 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2390 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2391 // If the coefficient is the product of a constant and other stuff, 2392 // we can use the constant in the GCD computation. 2393 Constant = getConstantPart(Product); 2394 else if (isa<SCEVConstant>(Delta)) 2395 Constant = cast<SCEVConstant>(Delta); 2396 else { 2397 // The difference of the two coefficients might not be a product 2398 // or constant, in which case we give up on this direction. 2399 continue; 2400 } 2401 APInt ConstCoeff = Constant->getValue()->getValue(); 2402 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2403 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2404 if (RunningGCD != 0) { 2405 Remainder = ConstDelta.srem(RunningGCD); 2406 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2407 if (Remainder != 0) { 2408 unsigned Level = mapSrcLoop(CurLoop); 2409 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2410 Improved = true; 2411 } 2412 } 2413 } 2414 if (Improved) 2415 ++GCDsuccesses; 2416 DEBUG(dbgs() << "all done\n"); 2417 return false; 2418 } 2419 2420 2421 //===----------------------------------------------------------------------===// 2422 // banerjeeMIVtest - 2423 // Use Banerjee's Inequalities to test an MIV subscript pair. 2424 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2425 // Generally follows the discussion in Section 2.5.2 of 2426 // 2427 // Optimizing Supercompilers for Supercomputers 2428 // Michael Wolfe 2429 // 2430 // The inequalities given on page 25 are simplified in that loops are 2431 // normalized so that the lower bound is always 0 and the stride is always 1. 2432 // For example, Wolfe gives 2433 // 2434 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2435 // 2436 // where A_k is the coefficient of the kth index in the source subscript, 2437 // B_k is the coefficient of the kth index in the destination subscript, 2438 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2439 // index, and N_k is the stride of the kth index. Since all loops are normalized 2440 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2441 // equation to 2442 // 2443 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2444 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2445 // 2446 // Similar simplifications are possible for the other equations. 2447 // 2448 // When we can't determine the number of iterations for a loop, 2449 // we use NULL as an indicator for the worst case, infinity. 2450 // When computing the upper bound, NULL denotes +inf; 2451 // for the lower bound, NULL denotes -inf. 2452 // 2453 // Return true if dependence disproved. 2454 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2455 const SCEV *Dst, 2456 const SmallBitVector &Loops, 2457 FullDependence &Result) const { 2458 DEBUG(dbgs() << "starting Banerjee\n"); 2459 ++BanerjeeApplications; 2460 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2461 const SCEV *A0; 2462 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2463 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2464 const SCEV *B0; 2465 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2466 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2467 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2468 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2469 2470 // Compute bounds for all the * directions. 2471 DEBUG(dbgs() << "\tBounds[*]\n"); 2472 for (unsigned K = 1; K <= MaxLevels; ++K) { 2473 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2474 Bound[K].Direction = Dependence::DVEntry::ALL; 2475 Bound[K].DirSet = Dependence::DVEntry::NONE; 2476 findBoundsALL(A, B, Bound, K); 2477 #ifndef NDEBUG 2478 DEBUG(dbgs() << "\t " << K << '\t'); 2479 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2480 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2481 else 2482 DEBUG(dbgs() << "-inf\t"); 2483 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2484 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2485 else 2486 DEBUG(dbgs() << "+inf\n"); 2487 #endif 2488 } 2489 2490 // Test the *, *, *, ... case. 2491 bool Disproved = false; 2492 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2493 // Explore the direction vector hierarchy. 2494 unsigned DepthExpanded = 0; 2495 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2496 Loops, DepthExpanded, Delta); 2497 if (NewDeps > 0) { 2498 bool Improved = false; 2499 for (unsigned K = 1; K <= CommonLevels; ++K) { 2500 if (Loops[K]) { 2501 unsigned Old = Result.DV[K - 1].Direction; 2502 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2503 Improved |= Old != Result.DV[K - 1].Direction; 2504 if (!Result.DV[K - 1].Direction) { 2505 Improved = false; 2506 Disproved = true; 2507 break; 2508 } 2509 } 2510 } 2511 if (Improved) 2512 ++BanerjeeSuccesses; 2513 } 2514 else { 2515 ++BanerjeeIndependence; 2516 Disproved = true; 2517 } 2518 } 2519 else { 2520 ++BanerjeeIndependence; 2521 Disproved = true; 2522 } 2523 delete [] Bound; 2524 delete [] A; 2525 delete [] B; 2526 return Disproved; 2527 } 2528 2529 2530 // Hierarchically expands the direction vector 2531 // search space, combining the directions of discovered dependences 2532 // in the DirSet field of Bound. Returns the number of distinct 2533 // dependences discovered. If the dependence is disproved, 2534 // it will return 0. 2535 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2536 CoefficientInfo *A, 2537 CoefficientInfo *B, 2538 BoundInfo *Bound, 2539 const SmallBitVector &Loops, 2540 unsigned &DepthExpanded, 2541 const SCEV *Delta) const { 2542 if (Level > CommonLevels) { 2543 // record result 2544 DEBUG(dbgs() << "\t["); 2545 for (unsigned K = 1; K <= CommonLevels; ++K) { 2546 if (Loops[K]) { 2547 Bound[K].DirSet |= Bound[K].Direction; 2548 #ifndef NDEBUG 2549 switch (Bound[K].Direction) { 2550 case Dependence::DVEntry::LT: 2551 DEBUG(dbgs() << " <"); 2552 break; 2553 case Dependence::DVEntry::EQ: 2554 DEBUG(dbgs() << " ="); 2555 break; 2556 case Dependence::DVEntry::GT: 2557 DEBUG(dbgs() << " >"); 2558 break; 2559 case Dependence::DVEntry::ALL: 2560 DEBUG(dbgs() << " *"); 2561 break; 2562 default: 2563 llvm_unreachable("unexpected Bound[K].Direction"); 2564 } 2565 #endif 2566 } 2567 } 2568 DEBUG(dbgs() << " ]\n"); 2569 return 1; 2570 } 2571 if (Loops[Level]) { 2572 if (Level > DepthExpanded) { 2573 DepthExpanded = Level; 2574 // compute bounds for <, =, > at current level 2575 findBoundsLT(A, B, Bound, Level); 2576 findBoundsGT(A, B, Bound, Level); 2577 findBoundsEQ(A, B, Bound, Level); 2578 #ifndef NDEBUG 2579 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2580 DEBUG(dbgs() << "\t <\t"); 2581 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2582 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2583 else 2584 DEBUG(dbgs() << "-inf\t"); 2585 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2586 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2587 else 2588 DEBUG(dbgs() << "+inf\n"); 2589 DEBUG(dbgs() << "\t =\t"); 2590 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2591 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2592 else 2593 DEBUG(dbgs() << "-inf\t"); 2594 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2595 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2596 else 2597 DEBUG(dbgs() << "+inf\n"); 2598 DEBUG(dbgs() << "\t >\t"); 2599 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2600 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2601 else 2602 DEBUG(dbgs() << "-inf\t"); 2603 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2604 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2605 else 2606 DEBUG(dbgs() << "+inf\n"); 2607 #endif 2608 } 2609 2610 unsigned NewDeps = 0; 2611 2612 // test bounds for <, *, *, ... 2613 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2614 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2615 Loops, DepthExpanded, Delta); 2616 2617 // Test bounds for =, *, *, ... 2618 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2619 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2620 Loops, DepthExpanded, Delta); 2621 2622 // test bounds for >, *, *, ... 2623 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2624 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2625 Loops, DepthExpanded, Delta); 2626 2627 Bound[Level].Direction = Dependence::DVEntry::ALL; 2628 return NewDeps; 2629 } 2630 else 2631 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2632 } 2633 2634 2635 // Returns true iff the current bounds are plausible. 2636 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2637 unsigned Level, 2638 BoundInfo *Bound, 2639 const SCEV *Delta) const { 2640 Bound[Level].Direction = DirKind; 2641 if (const SCEV *LowerBound = getLowerBound(Bound)) 2642 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2643 return false; 2644 if (const SCEV *UpperBound = getUpperBound(Bound)) 2645 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2646 return false; 2647 return true; 2648 } 2649 2650 2651 // Computes the upper and lower bounds for level K 2652 // using the * direction. Records them in Bound. 2653 // Wolfe gives the equations 2654 // 2655 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2656 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2657 // 2658 // Since we normalize loops, we can simplify these equations to 2659 // 2660 // LB^*_k = (A^-_k - B^+_k)U_k 2661 // UB^*_k = (A^+_k - B^-_k)U_k 2662 // 2663 // We must be careful to handle the case where the upper bound is unknown. 2664 // Note that the lower bound is always <= 0 2665 // and the upper bound is always >= 0. 2666 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2667 CoefficientInfo *B, 2668 BoundInfo *Bound, 2669 unsigned K) const { 2670 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2671 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2672 if (Bound[K].Iterations) { 2673 Bound[K].Lower[Dependence::DVEntry::ALL] = 2674 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2675 Bound[K].Iterations); 2676 Bound[K].Upper[Dependence::DVEntry::ALL] = 2677 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2678 Bound[K].Iterations); 2679 } 2680 else { 2681 // If the difference is 0, we won't need to know the number of iterations. 2682 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2683 Bound[K].Lower[Dependence::DVEntry::ALL] = 2684 SE->getConstant(A[K].Coeff->getType(), 0); 2685 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2686 Bound[K].Upper[Dependence::DVEntry::ALL] = 2687 SE->getConstant(A[K].Coeff->getType(), 0); 2688 } 2689 } 2690 2691 2692 // Computes the upper and lower bounds for level K 2693 // using the = direction. Records them in Bound. 2694 // Wolfe gives the equations 2695 // 2696 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2697 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2698 // 2699 // Since we normalize loops, we can simplify these equations to 2700 // 2701 // LB^=_k = (A_k - B_k)^- U_k 2702 // UB^=_k = (A_k - B_k)^+ U_k 2703 // 2704 // We must be careful to handle the case where the upper bound is unknown. 2705 // Note that the lower bound is always <= 0 2706 // and the upper bound is always >= 0. 2707 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2708 CoefficientInfo *B, 2709 BoundInfo *Bound, 2710 unsigned K) const { 2711 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2712 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2713 if (Bound[K].Iterations) { 2714 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2715 const SCEV *NegativePart = getNegativePart(Delta); 2716 Bound[K].Lower[Dependence::DVEntry::EQ] = 2717 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2718 const SCEV *PositivePart = getPositivePart(Delta); 2719 Bound[K].Upper[Dependence::DVEntry::EQ] = 2720 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2721 } 2722 else { 2723 // If the positive/negative part of the difference is 0, 2724 // we won't need to know the number of iterations. 2725 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2726 const SCEV *NegativePart = getNegativePart(Delta); 2727 if (NegativePart->isZero()) 2728 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2729 const SCEV *PositivePart = getPositivePart(Delta); 2730 if (PositivePart->isZero()) 2731 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2732 } 2733 } 2734 2735 2736 // Computes the upper and lower bounds for level K 2737 // using the < direction. Records them in Bound. 2738 // Wolfe gives the equations 2739 // 2740 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2741 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2742 // 2743 // Since we normalize loops, we can simplify these equations to 2744 // 2745 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2746 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2747 // 2748 // We must be careful to handle the case where the upper bound is unknown. 2749 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2750 CoefficientInfo *B, 2751 BoundInfo *Bound, 2752 unsigned K) const { 2753 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2754 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2755 if (Bound[K].Iterations) { 2756 const SCEV *Iter_1 = 2757 SE->getMinusSCEV(Bound[K].Iterations, 2758 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2759 const SCEV *NegPart = 2760 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2761 Bound[K].Lower[Dependence::DVEntry::LT] = 2762 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2763 const SCEV *PosPart = 2764 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2765 Bound[K].Upper[Dependence::DVEntry::LT] = 2766 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2767 } 2768 else { 2769 // If the positive/negative part of the difference is 0, 2770 // we won't need to know the number of iterations. 2771 const SCEV *NegPart = 2772 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2773 if (NegPart->isZero()) 2774 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2775 const SCEV *PosPart = 2776 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2777 if (PosPart->isZero()) 2778 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2779 } 2780 } 2781 2782 2783 // Computes the upper and lower bounds for level K 2784 // using the > direction. Records them in Bound. 2785 // Wolfe gives the equations 2786 // 2787 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2788 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2789 // 2790 // Since we normalize loops, we can simplify these equations to 2791 // 2792 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2793 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2794 // 2795 // We must be careful to handle the case where the upper bound is unknown. 2796 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2797 CoefficientInfo *B, 2798 BoundInfo *Bound, 2799 unsigned K) const { 2800 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2801 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2802 if (Bound[K].Iterations) { 2803 const SCEV *Iter_1 = 2804 SE->getMinusSCEV(Bound[K].Iterations, 2805 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2806 const SCEV *NegPart = 2807 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2808 Bound[K].Lower[Dependence::DVEntry::GT] = 2809 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2810 const SCEV *PosPart = 2811 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2812 Bound[K].Upper[Dependence::DVEntry::GT] = 2813 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2814 } 2815 else { 2816 // If the positive/negative part of the difference is 0, 2817 // we won't need to know the number of iterations. 2818 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2819 if (NegPart->isZero()) 2820 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2821 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2822 if (PosPart->isZero()) 2823 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2824 } 2825 } 2826 2827 2828 // X^+ = max(X, 0) 2829 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2830 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2831 } 2832 2833 2834 // X^- = min(X, 0) 2835 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2836 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2837 } 2838 2839 2840 // Walks through the subscript, 2841 // collecting each coefficient, the associated loop bounds, 2842 // and recording its positive and negative parts for later use. 2843 DependenceAnalysis::CoefficientInfo * 2844 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2845 bool SrcFlag, 2846 const SCEV *&Constant) const { 2847 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2848 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2849 for (unsigned K = 1; K <= MaxLevels; ++K) { 2850 CI[K].Coeff = Zero; 2851 CI[K].PosPart = Zero; 2852 CI[K].NegPart = Zero; 2853 CI[K].Iterations = nullptr; 2854 } 2855 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2856 const Loop *L = AddRec->getLoop(); 2857 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2858 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2859 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2860 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2861 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2862 Subscript = AddRec->getStart(); 2863 } 2864 Constant = Subscript; 2865 #ifndef NDEBUG 2866 DEBUG(dbgs() << "\tCoefficient Info\n"); 2867 for (unsigned K = 1; K <= MaxLevels; ++K) { 2868 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2869 DEBUG(dbgs() << "\tPos Part = "); 2870 DEBUG(dbgs() << *CI[K].PosPart); 2871 DEBUG(dbgs() << "\tNeg Part = "); 2872 DEBUG(dbgs() << *CI[K].NegPart); 2873 DEBUG(dbgs() << "\tUpper Bound = "); 2874 if (CI[K].Iterations) 2875 DEBUG(dbgs() << *CI[K].Iterations); 2876 else 2877 DEBUG(dbgs() << "+inf"); 2878 DEBUG(dbgs() << '\n'); 2879 } 2880 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2881 #endif 2882 return CI; 2883 } 2884 2885 2886 // Looks through all the bounds info and 2887 // computes the lower bound given the current direction settings 2888 // at each level. If the lower bound for any level is -inf, 2889 // the result is -inf. 2890 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2891 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2892 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2893 if (Bound[K].Lower[Bound[K].Direction]) 2894 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2895 else 2896 Sum = nullptr; 2897 } 2898 return Sum; 2899 } 2900 2901 2902 // Looks through all the bounds info and 2903 // computes the upper bound given the current direction settings 2904 // at each level. If the upper bound at any level is +inf, 2905 // the result is +inf. 2906 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2907 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2908 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2909 if (Bound[K].Upper[Bound[K].Direction]) 2910 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2911 else 2912 Sum = nullptr; 2913 } 2914 return Sum; 2915 } 2916 2917 2918 //===----------------------------------------------------------------------===// 2919 // Constraint manipulation for Delta test. 2920 2921 // Given a linear SCEV, 2922 // return the coefficient (the step) 2923 // corresponding to the specified loop. 2924 // If there isn't one, return 0. 2925 // For example, given a*i + b*j + c*k, zeroing the coefficient 2926 // corresponding to the j loop would yield b. 2927 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2928 const Loop *TargetLoop) const { 2929 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2930 if (!AddRec) 2931 return SE->getConstant(Expr->getType(), 0); 2932 if (AddRec->getLoop() == TargetLoop) 2933 return AddRec->getStepRecurrence(*SE); 2934 return findCoefficient(AddRec->getStart(), TargetLoop); 2935 } 2936 2937 2938 // Given a linear SCEV, 2939 // return the SCEV given by zeroing out the coefficient 2940 // corresponding to the specified loop. 2941 // For example, given a*i + b*j + c*k, zeroing the coefficient 2942 // corresponding to the j loop would yield a*i + c*k. 2943 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2944 const Loop *TargetLoop) const { 2945 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2946 if (!AddRec) 2947 return Expr; // ignore 2948 if (AddRec->getLoop() == TargetLoop) 2949 return AddRec->getStart(); 2950 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2951 AddRec->getStepRecurrence(*SE), 2952 AddRec->getLoop(), 2953 AddRec->getNoWrapFlags()); 2954 } 2955 2956 2957 // Given a linear SCEV Expr, 2958 // return the SCEV given by adding some Value to the 2959 // coefficient corresponding to the specified TargetLoop. 2960 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2961 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2962 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2963 const Loop *TargetLoop, 2964 const SCEV *Value) const { 2965 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2966 if (!AddRec) // create a new addRec 2967 return SE->getAddRecExpr(Expr, 2968 Value, 2969 TargetLoop, 2970 SCEV::FlagAnyWrap); // Worst case, with no info. 2971 if (AddRec->getLoop() == TargetLoop) { 2972 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2973 if (Sum->isZero()) 2974 return AddRec->getStart(); 2975 return SE->getAddRecExpr(AddRec->getStart(), 2976 Sum, 2977 AddRec->getLoop(), 2978 AddRec->getNoWrapFlags()); 2979 } 2980 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2981 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2982 return SE->getAddRecExpr( 2983 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2984 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2985 AddRec->getNoWrapFlags()); 2986 } 2987 2988 2989 // Review the constraints, looking for opportunities 2990 // to simplify a subscript pair (Src and Dst). 2991 // Return true if some simplification occurs. 2992 // If the simplification isn't exact (that is, if it is conservative 2993 // in terms of dependence), set consistent to false. 2994 // Corresponds to Figure 5 from the paper 2995 // 2996 // Practical Dependence Testing 2997 // Goff, Kennedy, Tseng 2998 // PLDI 1991 2999 bool DependenceAnalysis::propagate(const SCEV *&Src, 3000 const SCEV *&Dst, 3001 SmallBitVector &Loops, 3002 SmallVectorImpl<Constraint> &Constraints, 3003 bool &Consistent) { 3004 bool Result = false; 3005 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3006 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3007 DEBUG(Constraints[LI].dump(dbgs())); 3008 if (Constraints[LI].isDistance()) 3009 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3010 else if (Constraints[LI].isLine()) 3011 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3012 else if (Constraints[LI].isPoint()) 3013 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3014 } 3015 return Result; 3016 } 3017 3018 3019 // Attempt to propagate a distance 3020 // constraint into a subscript pair (Src and Dst). 3021 // Return true if some simplification occurs. 3022 // If the simplification isn't exact (that is, if it is conservative 3023 // in terms of dependence), set consistent to false. 3024 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3025 const SCEV *&Dst, 3026 Constraint &CurConstraint, 3027 bool &Consistent) { 3028 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3029 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3030 const SCEV *A_K = findCoefficient(Src, CurLoop); 3031 if (A_K->isZero()) 3032 return false; 3033 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3034 Src = SE->getMinusSCEV(Src, DA_K); 3035 Src = zeroCoefficient(Src, CurLoop); 3036 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3037 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3038 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3039 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3040 if (!findCoefficient(Dst, CurLoop)->isZero()) 3041 Consistent = false; 3042 return true; 3043 } 3044 3045 3046 // Attempt to propagate a line 3047 // constraint into a subscript pair (Src and Dst). 3048 // Return true if some simplification occurs. 3049 // If the simplification isn't exact (that is, if it is conservative 3050 // in terms of dependence), set consistent to false. 3051 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3052 const SCEV *&Dst, 3053 Constraint &CurConstraint, 3054 bool &Consistent) { 3055 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3056 const SCEV *A = CurConstraint.getA(); 3057 const SCEV *B = CurConstraint.getB(); 3058 const SCEV *C = CurConstraint.getC(); 3059 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3060 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3061 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3062 if (A->isZero()) { 3063 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3064 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3065 if (!Bconst || !Cconst) return false; 3066 APInt Beta = Bconst->getValue()->getValue(); 3067 APInt Charlie = Cconst->getValue()->getValue(); 3068 APInt CdivB = Charlie.sdiv(Beta); 3069 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3070 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3071 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3072 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3073 Dst = zeroCoefficient(Dst, CurLoop); 3074 if (!findCoefficient(Src, CurLoop)->isZero()) 3075 Consistent = false; 3076 } 3077 else if (B->isZero()) { 3078 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3079 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3080 if (!Aconst || !Cconst) return false; 3081 APInt Alpha = Aconst->getValue()->getValue(); 3082 APInt Charlie = Cconst->getValue()->getValue(); 3083 APInt CdivA = Charlie.sdiv(Alpha); 3084 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3085 const SCEV *A_K = findCoefficient(Src, CurLoop); 3086 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3087 Src = zeroCoefficient(Src, CurLoop); 3088 if (!findCoefficient(Dst, CurLoop)->isZero()) 3089 Consistent = false; 3090 } 3091 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3092 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3093 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3094 if (!Aconst || !Cconst) return false; 3095 APInt Alpha = Aconst->getValue()->getValue(); 3096 APInt Charlie = Cconst->getValue()->getValue(); 3097 APInt CdivA = Charlie.sdiv(Alpha); 3098 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3099 const SCEV *A_K = findCoefficient(Src, CurLoop); 3100 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3101 Src = zeroCoefficient(Src, CurLoop); 3102 Dst = addToCoefficient(Dst, CurLoop, A_K); 3103 if (!findCoefficient(Dst, CurLoop)->isZero()) 3104 Consistent = false; 3105 } 3106 else { 3107 // paper is incorrect here, or perhaps just misleading 3108 const SCEV *A_K = findCoefficient(Src, CurLoop); 3109 Src = SE->getMulExpr(Src, A); 3110 Dst = SE->getMulExpr(Dst, A); 3111 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3112 Src = zeroCoefficient(Src, CurLoop); 3113 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3114 if (!findCoefficient(Dst, CurLoop)->isZero()) 3115 Consistent = false; 3116 } 3117 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3118 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3119 return true; 3120 } 3121 3122 3123 // Attempt to propagate a point 3124 // constraint into a subscript pair (Src and Dst). 3125 // Return true if some simplification occurs. 3126 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3127 const SCEV *&Dst, 3128 Constraint &CurConstraint) { 3129 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3130 const SCEV *A_K = findCoefficient(Src, CurLoop); 3131 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3132 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3133 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3134 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3135 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3136 Src = zeroCoefficient(Src, CurLoop); 3137 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3138 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3139 Dst = zeroCoefficient(Dst, CurLoop); 3140 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3141 return true; 3142 } 3143 3144 3145 // Update direction vector entry based on the current constraint. 3146 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3147 const Constraint &CurConstraint 3148 ) const { 3149 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3150 DEBUG(CurConstraint.dump(dbgs())); 3151 if (CurConstraint.isAny()) 3152 ; // use defaults 3153 else if (CurConstraint.isDistance()) { 3154 // this one is consistent, the others aren't 3155 Level.Scalar = false; 3156 Level.Distance = CurConstraint.getD(); 3157 unsigned NewDirection = Dependence::DVEntry::NONE; 3158 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3159 NewDirection = Dependence::DVEntry::EQ; 3160 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3161 NewDirection |= Dependence::DVEntry::LT; 3162 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3163 NewDirection |= Dependence::DVEntry::GT; 3164 Level.Direction &= NewDirection; 3165 } 3166 else if (CurConstraint.isLine()) { 3167 Level.Scalar = false; 3168 Level.Distance = nullptr; 3169 // direction should be accurate 3170 } 3171 else if (CurConstraint.isPoint()) { 3172 Level.Scalar = false; 3173 Level.Distance = nullptr; 3174 unsigned NewDirection = Dependence::DVEntry::NONE; 3175 if (!isKnownPredicate(CmpInst::ICMP_NE, 3176 CurConstraint.getY(), 3177 CurConstraint.getX())) 3178 // if X may be = Y 3179 NewDirection |= Dependence::DVEntry::EQ; 3180 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3181 CurConstraint.getY(), 3182 CurConstraint.getX())) 3183 // if Y may be > X 3184 NewDirection |= Dependence::DVEntry::LT; 3185 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3186 CurConstraint.getY(), 3187 CurConstraint.getX())) 3188 // if Y may be < X 3189 NewDirection |= Dependence::DVEntry::GT; 3190 Level.Direction &= NewDirection; 3191 } 3192 else 3193 llvm_unreachable("constraint has unexpected kind"); 3194 } 3195 3196 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3197 /// source and destination array references are recurrences on a nested loop, 3198 /// this function flattens the nested recurrences into separate recurrences 3199 /// for each loop level. 3200 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3201 const SCEV *DstSCEV, 3202 SmallVectorImpl<Subscript> &Pair, 3203 const SCEV *ElementSize) { 3204 const SCEVUnknown *SrcBase = 3205 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3206 const SCEVUnknown *DstBase = 3207 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3208 3209 if (!SrcBase || !DstBase || SrcBase != DstBase) 3210 return false; 3211 3212 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3213 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3214 3215 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3216 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3217 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3218 return false; 3219 3220 // First step: collect parametric terms in both array references. 3221 SmallVector<const SCEV *, 4> Terms; 3222 SrcAR->collectParametricTerms(*SE, Terms); 3223 DstAR->collectParametricTerms(*SE, Terms); 3224 3225 // Second step: find subscript sizes. 3226 SmallVector<const SCEV *, 4> Sizes; 3227 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3228 3229 // Third step: compute the access functions for each subscript. 3230 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3231 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3232 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3233 3234 // Fail when there is only a subscript: that's a linearized access function. 3235 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3236 SrcSubscripts.size() != DstSubscripts.size()) 3237 return false; 3238 3239 int size = SrcSubscripts.size(); 3240 3241 DEBUG({ 3242 dbgs() << "\nSrcSubscripts: "; 3243 for (int i = 0; i < size; i++) 3244 dbgs() << *SrcSubscripts[i]; 3245 dbgs() << "\nDstSubscripts: "; 3246 for (int i = 0; i < size; i++) 3247 dbgs() << *DstSubscripts[i]; 3248 }); 3249 3250 // The delinearization transforms a single-subscript MIV dependence test into 3251 // a multi-subscript SIV dependence test that is easier to compute. So we 3252 // resize Pair to contain as many pairs of subscripts as the delinearization 3253 // has found, and then initialize the pairs following the delinearization. 3254 Pair.resize(size); 3255 for (int i = 0; i < size; ++i) { 3256 Pair[i].Src = SrcSubscripts[i]; 3257 Pair[i].Dst = DstSubscripts[i]; 3258 unifySubscriptType(&Pair[i]); 3259 3260 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3261 // delinearization has found, and add these constraints to the dependence 3262 // check to avoid memory accesses overflow from one dimension into another. 3263 // This is related to the problem of determining the existence of data 3264 // dependences in array accesses using a different number of subscripts: in 3265 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3266 } 3267 3268 return true; 3269 } 3270 3271 //===----------------------------------------------------------------------===// 3272 3273 #ifndef NDEBUG 3274 // For debugging purposes, dump a small bit vector to dbgs(). 3275 static void dumpSmallBitVector(SmallBitVector &BV) { 3276 dbgs() << "{"; 3277 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3278 dbgs() << VI; 3279 if (BV.find_next(VI) >= 0) 3280 dbgs() << ' '; 3281 } 3282 dbgs() << "}\n"; 3283 } 3284 #endif 3285 3286 3287 // depends - 3288 // Returns NULL if there is no dependence. 3289 // Otherwise, return a Dependence with as many details as possible. 3290 // Corresponds to Section 3.1 in the paper 3291 // 3292 // Practical Dependence Testing 3293 // Goff, Kennedy, Tseng 3294 // PLDI 1991 3295 // 3296 // Care is required to keep the routine below, getSplitIteration(), 3297 // up to date with respect to this routine. 3298 std::unique_ptr<Dependence> 3299 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3300 bool PossiblyLoopIndependent) { 3301 if (Src == Dst) 3302 PossiblyLoopIndependent = false; 3303 3304 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3305 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3306 // if both instructions don't reference memory, there's no dependence 3307 return nullptr; 3308 3309 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3310 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3311 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3312 return make_unique<Dependence>(Src, Dst); 3313 } 3314 3315 Value *SrcPtr = getPointerOperand(Src); 3316 Value *DstPtr = getPointerOperand(Dst); 3317 3318 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3319 case AliasAnalysis::MayAlias: 3320 case AliasAnalysis::PartialAlias: 3321 // cannot analyse objects if we don't understand their aliasing. 3322 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3323 return make_unique<Dependence>(Src, Dst); 3324 case AliasAnalysis::NoAlias: 3325 // If the objects noalias, they are distinct, accesses are independent. 3326 DEBUG(dbgs() << "no alias\n"); 3327 return nullptr; 3328 case AliasAnalysis::MustAlias: 3329 break; // The underlying objects alias; test accesses for dependence. 3330 } 3331 3332 // establish loop nesting levels 3333 establishNestingLevels(Src, Dst); 3334 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3335 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3336 3337 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3338 ++TotalArrayPairs; 3339 3340 // See if there are GEPs we can use. 3341 bool UsefulGEP = false; 3342 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3343 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3344 if (SrcGEP && DstGEP && 3345 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3346 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3347 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3348 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3349 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3350 3351 UsefulGEP = 3352 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3353 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3354 } 3355 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3356 SmallVector<Subscript, 4> Pair(Pairs); 3357 if (UsefulGEP) { 3358 DEBUG(dbgs() << " using GEPs\n"); 3359 unsigned P = 0; 3360 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3361 SrcEnd = SrcGEP->idx_end(), 3362 DstIdx = DstGEP->idx_begin(); 3363 SrcIdx != SrcEnd; 3364 ++SrcIdx, ++DstIdx, ++P) { 3365 Pair[P].Src = SE->getSCEV(*SrcIdx); 3366 Pair[P].Dst = SE->getSCEV(*DstIdx); 3367 unifySubscriptType(&Pair[P]); 3368 } 3369 } 3370 else { 3371 DEBUG(dbgs() << " ignoring GEPs\n"); 3372 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3373 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3374 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3375 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3376 Pair[0].Src = SrcSCEV; 3377 Pair[0].Dst = DstSCEV; 3378 } 3379 3380 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3381 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3382 DEBUG(dbgs() << " delinerized GEP\n"); 3383 Pairs = Pair.size(); 3384 } 3385 3386 for (unsigned P = 0; P < Pairs; ++P) { 3387 Pair[P].Loops.resize(MaxLevels + 1); 3388 Pair[P].GroupLoops.resize(MaxLevels + 1); 3389 Pair[P].Group.resize(Pairs); 3390 removeMatchingExtensions(&Pair[P]); 3391 Pair[P].Classification = 3392 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3393 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3394 Pair[P].Loops); 3395 Pair[P].GroupLoops = Pair[P].Loops; 3396 Pair[P].Group.set(P); 3397 DEBUG(dbgs() << " subscript " << P << "\n"); 3398 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3399 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3400 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3401 DEBUG(dbgs() << "\tloops = "); 3402 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3403 } 3404 3405 SmallBitVector Separable(Pairs); 3406 SmallBitVector Coupled(Pairs); 3407 3408 // Partition subscripts into separable and minimally-coupled groups 3409 // Algorithm in paper is algorithmically better; 3410 // this may be faster in practice. Check someday. 3411 // 3412 // Here's an example of how it works. Consider this code: 3413 // 3414 // for (i = ...) { 3415 // for (j = ...) { 3416 // for (k = ...) { 3417 // for (l = ...) { 3418 // for (m = ...) { 3419 // A[i][j][k][m] = ...; 3420 // ... = A[0][j][l][i + j]; 3421 // } 3422 // } 3423 // } 3424 // } 3425 // } 3426 // 3427 // There are 4 subscripts here: 3428 // 0 [i] and [0] 3429 // 1 [j] and [j] 3430 // 2 [k] and [l] 3431 // 3 [m] and [i + j] 3432 // 3433 // We've already classified each subscript pair as ZIV, SIV, etc., 3434 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3435 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3436 // and set Pair[P].Group = {P}. 3437 // 3438 // Src Dst Classification Loops GroupLoops Group 3439 // 0 [i] [0] SIV {1} {1} {0} 3440 // 1 [j] [j] SIV {2} {2} {1} 3441 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3442 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3443 // 3444 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3445 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3446 // 3447 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3448 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3449 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3450 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3451 // to either Separable or Coupled). 3452 // 3453 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3454 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3455 // so Pair[3].Group = {0, 1, 3} and Done = false. 3456 // 3457 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3458 // Since Done remains true, we add 2 to the set of Separable pairs. 3459 // 3460 // Finally, we consider 3. There's nothing to compare it with, 3461 // so Done remains true and we add it to the Coupled set. 3462 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3463 // 3464 // In the end, we've got 1 separable subscript and 1 coupled group. 3465 for (unsigned SI = 0; SI < Pairs; ++SI) { 3466 if (Pair[SI].Classification == Subscript::NonLinear) { 3467 // ignore these, but collect loops for later 3468 ++NonlinearSubscriptPairs; 3469 collectCommonLoops(Pair[SI].Src, 3470 LI->getLoopFor(Src->getParent()), 3471 Pair[SI].Loops); 3472 collectCommonLoops(Pair[SI].Dst, 3473 LI->getLoopFor(Dst->getParent()), 3474 Pair[SI].Loops); 3475 Result.Consistent = false; 3476 } 3477 else if (Pair[SI].Classification == Subscript::ZIV) { 3478 // always separable 3479 Separable.set(SI); 3480 } 3481 else { 3482 // SIV, RDIV, or MIV, so check for coupled group 3483 bool Done = true; 3484 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3485 SmallBitVector Intersection = Pair[SI].GroupLoops; 3486 Intersection &= Pair[SJ].GroupLoops; 3487 if (Intersection.any()) { 3488 // accumulate set of all the loops in group 3489 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3490 // accumulate set of all subscripts in group 3491 Pair[SJ].Group |= Pair[SI].Group; 3492 Done = false; 3493 } 3494 } 3495 if (Done) { 3496 if (Pair[SI].Group.count() == 1) { 3497 Separable.set(SI); 3498 ++SeparableSubscriptPairs; 3499 } 3500 else { 3501 Coupled.set(SI); 3502 ++CoupledSubscriptPairs; 3503 } 3504 } 3505 } 3506 } 3507 3508 DEBUG(dbgs() << " Separable = "); 3509 DEBUG(dumpSmallBitVector(Separable)); 3510 DEBUG(dbgs() << " Coupled = "); 3511 DEBUG(dumpSmallBitVector(Coupled)); 3512 3513 Constraint NewConstraint; 3514 NewConstraint.setAny(SE); 3515 3516 // test separable subscripts 3517 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3518 DEBUG(dbgs() << "testing subscript " << SI); 3519 switch (Pair[SI].Classification) { 3520 case Subscript::ZIV: 3521 DEBUG(dbgs() << ", ZIV\n"); 3522 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3523 return nullptr; 3524 break; 3525 case Subscript::SIV: { 3526 DEBUG(dbgs() << ", SIV\n"); 3527 unsigned Level; 3528 const SCEV *SplitIter = nullptr; 3529 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3530 Result, NewConstraint, SplitIter)) 3531 return nullptr; 3532 break; 3533 } 3534 case Subscript::RDIV: 3535 DEBUG(dbgs() << ", RDIV\n"); 3536 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3537 return nullptr; 3538 break; 3539 case Subscript::MIV: 3540 DEBUG(dbgs() << ", MIV\n"); 3541 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3542 return nullptr; 3543 break; 3544 default: 3545 llvm_unreachable("subscript has unexpected classification"); 3546 } 3547 } 3548 3549 if (Coupled.count()) { 3550 // test coupled subscript groups 3551 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3552 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3553 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3554 for (unsigned II = 0; II <= MaxLevels; ++II) 3555 Constraints[II].setAny(SE); 3556 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3557 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3558 SmallBitVector Group(Pair[SI].Group); 3559 SmallBitVector Sivs(Pairs); 3560 SmallBitVector Mivs(Pairs); 3561 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3562 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3563 DEBUG(dbgs() << SJ << " "); 3564 if (Pair[SJ].Classification == Subscript::SIV) 3565 Sivs.set(SJ); 3566 else 3567 Mivs.set(SJ); 3568 } 3569 DEBUG(dbgs() << "}\n"); 3570 while (Sivs.any()) { 3571 bool Changed = false; 3572 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3573 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3574 // SJ is an SIV subscript that's part of the current coupled group 3575 unsigned Level; 3576 const SCEV *SplitIter = nullptr; 3577 DEBUG(dbgs() << "SIV\n"); 3578 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3579 Result, NewConstraint, SplitIter)) 3580 return nullptr; 3581 ConstrainedLevels.set(Level); 3582 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3583 if (Constraints[Level].isEmpty()) { 3584 ++DeltaIndependence; 3585 return nullptr; 3586 } 3587 Changed = true; 3588 } 3589 Sivs.reset(SJ); 3590 } 3591 if (Changed) { 3592 // propagate, possibly creating new SIVs and ZIVs 3593 DEBUG(dbgs() << " propagating\n"); 3594 DEBUG(dbgs() << "\tMivs = "); 3595 DEBUG(dumpSmallBitVector(Mivs)); 3596 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3597 // SJ is an MIV subscript that's part of the current coupled group 3598 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3599 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3600 Constraints, Result.Consistent)) { 3601 DEBUG(dbgs() << "\t Changed\n"); 3602 ++DeltaPropagations; 3603 Pair[SJ].Classification = 3604 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3605 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3606 Pair[SJ].Loops); 3607 switch (Pair[SJ].Classification) { 3608 case Subscript::ZIV: 3609 DEBUG(dbgs() << "ZIV\n"); 3610 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3611 return nullptr; 3612 Mivs.reset(SJ); 3613 break; 3614 case Subscript::SIV: 3615 Sivs.set(SJ); 3616 Mivs.reset(SJ); 3617 break; 3618 case Subscript::RDIV: 3619 case Subscript::MIV: 3620 break; 3621 default: 3622 llvm_unreachable("bad subscript classification"); 3623 } 3624 } 3625 } 3626 } 3627 } 3628 3629 // test & propagate remaining RDIVs 3630 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3631 if (Pair[SJ].Classification == Subscript::RDIV) { 3632 DEBUG(dbgs() << "RDIV test\n"); 3633 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3634 return nullptr; 3635 // I don't yet understand how to propagate RDIV results 3636 Mivs.reset(SJ); 3637 } 3638 } 3639 3640 // test remaining MIVs 3641 // This code is temporary. 3642 // Better to somehow test all remaining subscripts simultaneously. 3643 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3644 if (Pair[SJ].Classification == Subscript::MIV) { 3645 DEBUG(dbgs() << "MIV test\n"); 3646 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3647 return nullptr; 3648 } 3649 else 3650 llvm_unreachable("expected only MIV subscripts at this point"); 3651 } 3652 3653 // update Result.DV from constraint vector 3654 DEBUG(dbgs() << " updating\n"); 3655 for (int SJ = ConstrainedLevels.find_first(); 3656 SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { 3657 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3658 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3659 return nullptr; 3660 } 3661 } 3662 } 3663 3664 // Make sure the Scalar flags are set correctly. 3665 SmallBitVector CompleteLoops(MaxLevels + 1); 3666 for (unsigned SI = 0; SI < Pairs; ++SI) 3667 CompleteLoops |= Pair[SI].Loops; 3668 for (unsigned II = 1; II <= CommonLevels; ++II) 3669 if (CompleteLoops[II]) 3670 Result.DV[II - 1].Scalar = false; 3671 3672 if (PossiblyLoopIndependent) { 3673 // Make sure the LoopIndependent flag is set correctly. 3674 // All directions must include equal, otherwise no 3675 // loop-independent dependence is possible. 3676 for (unsigned II = 1; II <= CommonLevels; ++II) { 3677 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3678 Result.LoopIndependent = false; 3679 break; 3680 } 3681 } 3682 } 3683 else { 3684 // On the other hand, if all directions are equal and there's no 3685 // loop-independent dependence possible, then no dependence exists. 3686 bool AllEqual = true; 3687 for (unsigned II = 1; II <= CommonLevels; ++II) { 3688 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3689 AllEqual = false; 3690 break; 3691 } 3692 } 3693 if (AllEqual) 3694 return nullptr; 3695 } 3696 3697 auto Final = make_unique<FullDependence>(Result); 3698 Result.DV = nullptr; 3699 return std::move(Final); 3700 } 3701 3702 3703 3704 //===----------------------------------------------------------------------===// 3705 // getSplitIteration - 3706 // Rather than spend rarely-used space recording the splitting iteration 3707 // during the Weak-Crossing SIV test, we re-compute it on demand. 3708 // The re-computation is basically a repeat of the entire dependence test, 3709 // though simplified since we know that the dependence exists. 3710 // It's tedious, since we must go through all propagations, etc. 3711 // 3712 // Care is required to keep this code up to date with respect to the routine 3713 // above, depends(). 3714 // 3715 // Generally, the dependence analyzer will be used to build 3716 // a dependence graph for a function (basically a map from instructions 3717 // to dependences). Looking for cycles in the graph shows us loops 3718 // that cannot be trivially vectorized/parallelized. 3719 // 3720 // We can try to improve the situation by examining all the dependences 3721 // that make up the cycle, looking for ones we can break. 3722 // Sometimes, peeling the first or last iteration of a loop will break 3723 // dependences, and we've got flags for those possibilities. 3724 // Sometimes, splitting a loop at some other iteration will do the trick, 3725 // and we've got a flag for that case. Rather than waste the space to 3726 // record the exact iteration (since we rarely know), we provide 3727 // a method that calculates the iteration. It's a drag that it must work 3728 // from scratch, but wonderful in that it's possible. 3729 // 3730 // Here's an example: 3731 // 3732 // for (i = 0; i < 10; i++) 3733 // A[i] = ... 3734 // ... = A[11 - i] 3735 // 3736 // There's a loop-carried flow dependence from the store to the load, 3737 // found by the weak-crossing SIV test. The dependence will have a flag, 3738 // indicating that the dependence can be broken by splitting the loop. 3739 // Calling getSplitIteration will return 5. 3740 // Splitting the loop breaks the dependence, like so: 3741 // 3742 // for (i = 0; i <= 5; i++) 3743 // A[i] = ... 3744 // ... = A[11 - i] 3745 // for (i = 6; i < 10; i++) 3746 // A[i] = ... 3747 // ... = A[11 - i] 3748 // 3749 // breaks the dependence and allows us to vectorize/parallelize 3750 // both loops. 3751 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3752 unsigned SplitLevel) { 3753 assert(Dep.isSplitable(SplitLevel) && 3754 "Dep should be splitable at SplitLevel"); 3755 Instruction *Src = Dep.getSrc(); 3756 Instruction *Dst = Dep.getDst(); 3757 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3758 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3759 assert(isLoadOrStore(Src)); 3760 assert(isLoadOrStore(Dst)); 3761 Value *SrcPtr = getPointerOperand(Src); 3762 Value *DstPtr = getPointerOperand(Dst); 3763 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3764 AliasAnalysis::MustAlias); 3765 3766 // establish loop nesting levels 3767 establishNestingLevels(Src, Dst); 3768 3769 FullDependence Result(Src, Dst, false, CommonLevels); 3770 3771 // See if there are GEPs we can use. 3772 bool UsefulGEP = false; 3773 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3774 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3775 if (SrcGEP && DstGEP && 3776 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3777 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3778 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3779 UsefulGEP = 3780 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3781 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3782 } 3783 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3784 SmallVector<Subscript, 4> Pair(Pairs); 3785 if (UsefulGEP) { 3786 unsigned P = 0; 3787 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3788 SrcEnd = SrcGEP->idx_end(), 3789 DstIdx = DstGEP->idx_begin(); 3790 SrcIdx != SrcEnd; 3791 ++SrcIdx, ++DstIdx, ++P) { 3792 Pair[P].Src = SE->getSCEV(*SrcIdx); 3793 Pair[P].Dst = SE->getSCEV(*DstIdx); 3794 } 3795 } 3796 else { 3797 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3798 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3799 Pair[0].Src = SrcSCEV; 3800 Pair[0].Dst = DstSCEV; 3801 } 3802 3803 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3804 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3805 DEBUG(dbgs() << " delinerized GEP\n"); 3806 Pairs = Pair.size(); 3807 } 3808 3809 for (unsigned P = 0; P < Pairs; ++P) { 3810 Pair[P].Loops.resize(MaxLevels + 1); 3811 Pair[P].GroupLoops.resize(MaxLevels + 1); 3812 Pair[P].Group.resize(Pairs); 3813 removeMatchingExtensions(&Pair[P]); 3814 Pair[P].Classification = 3815 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3816 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3817 Pair[P].Loops); 3818 Pair[P].GroupLoops = Pair[P].Loops; 3819 Pair[P].Group.set(P); 3820 } 3821 3822 SmallBitVector Separable(Pairs); 3823 SmallBitVector Coupled(Pairs); 3824 3825 // partition subscripts into separable and minimally-coupled groups 3826 for (unsigned SI = 0; SI < Pairs; ++SI) { 3827 if (Pair[SI].Classification == Subscript::NonLinear) { 3828 // ignore these, but collect loops for later 3829 collectCommonLoops(Pair[SI].Src, 3830 LI->getLoopFor(Src->getParent()), 3831 Pair[SI].Loops); 3832 collectCommonLoops(Pair[SI].Dst, 3833 LI->getLoopFor(Dst->getParent()), 3834 Pair[SI].Loops); 3835 Result.Consistent = false; 3836 } 3837 else if (Pair[SI].Classification == Subscript::ZIV) 3838 Separable.set(SI); 3839 else { 3840 // SIV, RDIV, or MIV, so check for coupled group 3841 bool Done = true; 3842 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3843 SmallBitVector Intersection = Pair[SI].GroupLoops; 3844 Intersection &= Pair[SJ].GroupLoops; 3845 if (Intersection.any()) { 3846 // accumulate set of all the loops in group 3847 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3848 // accumulate set of all subscripts in group 3849 Pair[SJ].Group |= Pair[SI].Group; 3850 Done = false; 3851 } 3852 } 3853 if (Done) { 3854 if (Pair[SI].Group.count() == 1) 3855 Separable.set(SI); 3856 else 3857 Coupled.set(SI); 3858 } 3859 } 3860 } 3861 3862 Constraint NewConstraint; 3863 NewConstraint.setAny(SE); 3864 3865 // test separable subscripts 3866 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3867 switch (Pair[SI].Classification) { 3868 case Subscript::SIV: { 3869 unsigned Level; 3870 const SCEV *SplitIter = nullptr; 3871 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3872 Result, NewConstraint, SplitIter); 3873 if (Level == SplitLevel) { 3874 assert(SplitIter != nullptr); 3875 return SplitIter; 3876 } 3877 break; 3878 } 3879 case Subscript::ZIV: 3880 case Subscript::RDIV: 3881 case Subscript::MIV: 3882 break; 3883 default: 3884 llvm_unreachable("subscript has unexpected classification"); 3885 } 3886 } 3887 3888 if (Coupled.count()) { 3889 // test coupled subscript groups 3890 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3891 for (unsigned II = 0; II <= MaxLevels; ++II) 3892 Constraints[II].setAny(SE); 3893 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3894 SmallBitVector Group(Pair[SI].Group); 3895 SmallBitVector Sivs(Pairs); 3896 SmallBitVector Mivs(Pairs); 3897 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3898 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3899 if (Pair[SJ].Classification == Subscript::SIV) 3900 Sivs.set(SJ); 3901 else 3902 Mivs.set(SJ); 3903 } 3904 while (Sivs.any()) { 3905 bool Changed = false; 3906 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3907 // SJ is an SIV subscript that's part of the current coupled group 3908 unsigned Level; 3909 const SCEV *SplitIter = nullptr; 3910 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3911 Result, NewConstraint, SplitIter); 3912 if (Level == SplitLevel && SplitIter) 3913 return SplitIter; 3914 ConstrainedLevels.set(Level); 3915 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3916 Changed = true; 3917 Sivs.reset(SJ); 3918 } 3919 if (Changed) { 3920 // propagate, possibly creating new SIVs and ZIVs 3921 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3922 // SJ is an MIV subscript that's part of the current coupled group 3923 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3924 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3925 Pair[SJ].Classification = 3926 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3927 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3928 Pair[SJ].Loops); 3929 switch (Pair[SJ].Classification) { 3930 case Subscript::ZIV: 3931 Mivs.reset(SJ); 3932 break; 3933 case Subscript::SIV: 3934 Sivs.set(SJ); 3935 Mivs.reset(SJ); 3936 break; 3937 case Subscript::RDIV: 3938 case Subscript::MIV: 3939 break; 3940 default: 3941 llvm_unreachable("bad subscript classification"); 3942 } 3943 } 3944 } 3945 } 3946 } 3947 } 3948 } 3949 llvm_unreachable("somehow reached end of routine"); 3950 return nullptr; 3951 } 3952