1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 118 "Dependence Analysis", true, true) 119 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 120 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 121 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 122 INITIALIZE_PASS_END(DependenceAnalysis, "da", 123 "Dependence Analysis", true, true) 124 125 char DependenceAnalysis::ID = 0; 126 127 128 FunctionPass *llvm::createDependenceAnalysisPass() { 129 return new DependenceAnalysis(); 130 } 131 132 133 bool DependenceAnalysis::runOnFunction(Function &F) { 134 this->F = &F; 135 AA = &getAnalysis<AliasAnalysis>(); 136 SE = &getAnalysis<ScalarEvolution>(); 137 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 138 return false; 139 } 140 141 142 void DependenceAnalysis::releaseMemory() { 143 } 144 145 146 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 147 AU.setPreservesAll(); 148 AU.addRequiredTransitive<AliasAnalysis>(); 149 AU.addRequiredTransitive<ScalarEvolution>(); 150 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 151 } 152 153 154 // Used to test the dependence analyzer. 155 // Looks through the function, noting loads and stores. 156 // Calls depends() on every possible pair and prints out the result. 157 // Ignores all other instructions. 158 static 159 void dumpExampleDependence(raw_ostream &OS, Function *F, 160 DependenceAnalysis *DA) { 161 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 162 SrcI != SrcE; ++SrcI) { 163 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 164 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 165 DstI != DstE; ++DstI) { 166 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 167 OS << "da analyze - "; 168 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 169 D->dump(OS); 170 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 171 if (D->isSplitable(Level)) { 172 OS << "da analyze - split level = " << Level; 173 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 174 OS << "!\n"; 175 } 176 } 177 } 178 else 179 OS << "none!\n"; 180 } 181 } 182 } 183 } 184 } 185 186 187 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 188 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 189 } 190 191 //===----------------------------------------------------------------------===// 192 // Dependence methods 193 194 // Returns true if this is an input dependence. 195 bool Dependence::isInput() const { 196 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 197 } 198 199 200 // Returns true if this is an output dependence. 201 bool Dependence::isOutput() const { 202 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 203 } 204 205 206 // Returns true if this is an flow (aka true) dependence. 207 bool Dependence::isFlow() const { 208 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 209 } 210 211 212 // Returns true if this is an anti dependence. 213 bool Dependence::isAnti() const { 214 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 215 } 216 217 218 // Returns true if a particular level is scalar; that is, 219 // if no subscript in the source or destination mention the induction 220 // variable associated with the loop at this level. 221 // Leave this out of line, so it will serve as a virtual method anchor 222 bool Dependence::isScalar(unsigned level) const { 223 return false; 224 } 225 226 227 //===----------------------------------------------------------------------===// 228 // FullDependence methods 229 230 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 231 bool PossiblyLoopIndependent, 232 unsigned CommonLevels) 233 : Dependence(Source, Destination), Levels(CommonLevels), 234 LoopIndependent(PossiblyLoopIndependent) { 235 Consistent = true; 236 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 237 } 238 239 // The rest are simple getters that hide the implementation. 240 241 // getDirection - Returns the direction associated with a particular level. 242 unsigned FullDependence::getDirection(unsigned Level) const { 243 assert(0 < Level && Level <= Levels && "Level out of range"); 244 return DV[Level - 1].Direction; 245 } 246 247 248 // Returns the distance (or NULL) associated with a particular level. 249 const SCEV *FullDependence::getDistance(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Distance; 252 } 253 254 255 // Returns true if a particular level is scalar; that is, 256 // if no subscript in the source or destination mention the induction 257 // variable associated with the loop at this level. 258 bool FullDependence::isScalar(unsigned Level) const { 259 assert(0 < Level && Level <= Levels && "Level out of range"); 260 return DV[Level - 1].Scalar; 261 } 262 263 264 // Returns true if peeling the first iteration from this loop 265 // will break this dependence. 266 bool FullDependence::isPeelFirst(unsigned Level) const { 267 assert(0 < Level && Level <= Levels && "Level out of range"); 268 return DV[Level - 1].PeelFirst; 269 } 270 271 272 // Returns true if peeling the last iteration from this loop 273 // will break this dependence. 274 bool FullDependence::isPeelLast(unsigned Level) const { 275 assert(0 < Level && Level <= Levels && "Level out of range"); 276 return DV[Level - 1].PeelLast; 277 } 278 279 280 // Returns true if splitting this loop will break the dependence. 281 bool FullDependence::isSplitable(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].Splitable; 284 } 285 286 287 //===----------------------------------------------------------------------===// 288 // DependenceAnalysis::Constraint methods 289 290 // If constraint is a point <X, Y>, returns X. 291 // Otherwise assert. 292 const SCEV *DependenceAnalysis::Constraint::getX() const { 293 assert(Kind == Point && "Kind should be Point"); 294 return A; 295 } 296 297 298 // If constraint is a point <X, Y>, returns Y. 299 // Otherwise assert. 300 const SCEV *DependenceAnalysis::Constraint::getY() const { 301 assert(Kind == Point && "Kind should be Point"); 302 return B; 303 } 304 305 306 // If constraint is a line AX + BY = C, returns A. 307 // Otherwise assert. 308 const SCEV *DependenceAnalysis::Constraint::getA() const { 309 assert((Kind == Line || Kind == Distance) && 310 "Kind should be Line (or Distance)"); 311 return A; 312 } 313 314 315 // If constraint is a line AX + BY = C, returns B. 316 // Otherwise assert. 317 const SCEV *DependenceAnalysis::Constraint::getB() const { 318 assert((Kind == Line || Kind == Distance) && 319 "Kind should be Line (or Distance)"); 320 return B; 321 } 322 323 324 // If constraint is a line AX + BY = C, returns C. 325 // Otherwise assert. 326 const SCEV *DependenceAnalysis::Constraint::getC() const { 327 assert((Kind == Line || Kind == Distance) && 328 "Kind should be Line (or Distance)"); 329 return C; 330 } 331 332 333 // If constraint is a distance, returns D. 334 // Otherwise assert. 335 const SCEV *DependenceAnalysis::Constraint::getD() const { 336 assert(Kind == Distance && "Kind should be Distance"); 337 return SE->getNegativeSCEV(C); 338 } 339 340 341 // Returns the loop associated with this constraint. 342 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 343 assert((Kind == Distance || Kind == Line || Kind == Point) && 344 "Kind should be Distance, Line, or Point"); 345 return AssociatedLoop; 346 } 347 348 349 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 350 const SCEV *Y, 351 const Loop *CurLoop) { 352 Kind = Point; 353 A = X; 354 B = Y; 355 AssociatedLoop = CurLoop; 356 } 357 358 359 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 360 const SCEV *BB, 361 const SCEV *CC, 362 const Loop *CurLoop) { 363 Kind = Line; 364 A = AA; 365 B = BB; 366 C = CC; 367 AssociatedLoop = CurLoop; 368 } 369 370 371 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 372 const Loop *CurLoop) { 373 Kind = Distance; 374 A = SE->getConstant(D->getType(), 1); 375 B = SE->getNegativeSCEV(A); 376 C = SE->getNegativeSCEV(D); 377 AssociatedLoop = CurLoop; 378 } 379 380 381 void DependenceAnalysis::Constraint::setEmpty() { 382 Kind = Empty; 383 } 384 385 386 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 387 SE = NewSE; 388 Kind = Any; 389 } 390 391 392 // For debugging purposes. Dumps the constraint out to OS. 393 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 394 if (isEmpty()) 395 OS << " Empty\n"; 396 else if (isAny()) 397 OS << " Any\n"; 398 else if (isPoint()) 399 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 400 else if (isDistance()) 401 OS << " Distance is " << *getD() << 402 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 403 else if (isLine()) 404 OS << " Line is " << *getA() << "*X + " << 405 *getB() << "*Y = " << *getC() << "\n"; 406 else 407 llvm_unreachable("unknown constraint type in Constraint::dump"); 408 } 409 410 411 // Updates X with the intersection 412 // of the Constraints X and Y. Returns true if X has changed. 413 // Corresponds to Figure 4 from the paper 414 // 415 // Practical Dependence Testing 416 // Goff, Kennedy, Tseng 417 // PLDI 1991 418 bool DependenceAnalysis::intersectConstraints(Constraint *X, 419 const Constraint *Y) { 420 ++DeltaApplications; 421 DEBUG(dbgs() << "\tintersect constraints\n"); 422 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 423 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 424 assert(!Y->isPoint() && "Y must not be a Point"); 425 if (X->isAny()) { 426 if (Y->isAny()) 427 return false; 428 *X = *Y; 429 return true; 430 } 431 if (X->isEmpty()) 432 return false; 433 if (Y->isEmpty()) { 434 X->setEmpty(); 435 return true; 436 } 437 438 if (X->isDistance() && Y->isDistance()) { 439 DEBUG(dbgs() << "\t intersect 2 distances\n"); 440 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 441 return false; 442 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 443 X->setEmpty(); 444 ++DeltaSuccesses; 445 return true; 446 } 447 // Hmmm, interesting situation. 448 // I guess if either is constant, keep it and ignore the other. 449 if (isa<SCEVConstant>(Y->getD())) { 450 *X = *Y; 451 return true; 452 } 453 return false; 454 } 455 456 // At this point, the pseudo-code in Figure 4 of the paper 457 // checks if (X->isPoint() && Y->isPoint()). 458 // This case can't occur in our implementation, 459 // since a Point can only arise as the result of intersecting 460 // two Line constraints, and the right-hand value, Y, is never 461 // the result of an intersection. 462 assert(!(X->isPoint() && Y->isPoint()) && 463 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 464 465 if (X->isLine() && Y->isLine()) { 466 DEBUG(dbgs() << "\t intersect 2 lines\n"); 467 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 468 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 469 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 470 // slopes are equal, so lines are parallel 471 DEBUG(dbgs() << "\t\tsame slope\n"); 472 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 473 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 474 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 475 return false; 476 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 477 X->setEmpty(); 478 ++DeltaSuccesses; 479 return true; 480 } 481 return false; 482 } 483 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 484 // slopes differ, so lines intersect 485 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 486 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 487 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 488 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 489 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 490 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 491 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 492 const SCEVConstant *C1A2_C2A1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 494 const SCEVConstant *C1B2_C2B1 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 496 const SCEVConstant *A1B2_A2B1 = 497 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 498 const SCEVConstant *A2B1_A1B2 = 499 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 500 if (!C1B2_C2B1 || !C1A2_C2A1 || 501 !A1B2_A2B1 || !A2B1_A1B2) 502 return false; 503 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 504 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 505 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 506 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 507 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 508 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 509 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 510 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 511 APInt Xq = Xtop; // these need to be initialized, even 512 APInt Xr = Xtop; // though they're just going to be overwritten 513 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 514 APInt Yq = Ytop; 515 APInt Yr = Ytop; 516 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 517 if (Xr != 0 || Yr != 0) { 518 X->setEmpty(); 519 ++DeltaSuccesses; 520 return true; 521 } 522 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 523 if (Xq.slt(0) || Yq.slt(0)) { 524 X->setEmpty(); 525 ++DeltaSuccesses; 526 return true; 527 } 528 if (const SCEVConstant *CUB = 529 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 530 APInt UpperBound = CUB->getValue()->getValue(); 531 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 532 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 533 X->setEmpty(); 534 ++DeltaSuccesses; 535 return true; 536 } 537 } 538 X->setPoint(SE->getConstant(Xq), 539 SE->getConstant(Yq), 540 X->getAssociatedLoop()); 541 ++DeltaSuccesses; 542 return true; 543 } 544 return false; 545 } 546 547 // if (X->isLine() && Y->isPoint()) This case can't occur. 548 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 549 550 if (X->isPoint() && Y->isLine()) { 551 DEBUG(dbgs() << "\t intersect Point and Line\n"); 552 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 553 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 554 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 555 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 556 return false; 557 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 558 X->setEmpty(); 559 ++DeltaSuccesses; 560 return true; 561 } 562 return false; 563 } 564 565 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 566 return false; 567 } 568 569 570 //===----------------------------------------------------------------------===// 571 // DependenceAnalysis methods 572 573 // For debugging purposes. Dumps a dependence to OS. 574 void Dependence::dump(raw_ostream &OS) const { 575 bool Splitable = false; 576 if (isConfused()) 577 OS << "confused"; 578 else { 579 if (isConsistent()) 580 OS << "consistent "; 581 if (isFlow()) 582 OS << "flow"; 583 else if (isOutput()) 584 OS << "output"; 585 else if (isAnti()) 586 OS << "anti"; 587 else if (isInput()) 588 OS << "input"; 589 unsigned Levels = getLevels(); 590 OS << " ["; 591 for (unsigned II = 1; II <= Levels; ++II) { 592 if (isSplitable(II)) 593 Splitable = true; 594 if (isPeelFirst(II)) 595 OS << 'p'; 596 const SCEV *Distance = getDistance(II); 597 if (Distance) 598 OS << *Distance; 599 else if (isScalar(II)) 600 OS << "S"; 601 else { 602 unsigned Direction = getDirection(II); 603 if (Direction == DVEntry::ALL) 604 OS << "*"; 605 else { 606 if (Direction & DVEntry::LT) 607 OS << "<"; 608 if (Direction & DVEntry::EQ) 609 OS << "="; 610 if (Direction & DVEntry::GT) 611 OS << ">"; 612 } 613 } 614 if (isPeelLast(II)) 615 OS << 'p'; 616 if (II < Levels) 617 OS << " "; 618 } 619 if (isLoopIndependent()) 620 OS << "|<"; 621 OS << "]"; 622 if (Splitable) 623 OS << " splitable"; 624 } 625 OS << "!\n"; 626 } 627 628 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 629 const DataLayout &DL, const Value *A, 630 const Value *B) { 631 const Value *AObj = GetUnderlyingObject(A, DL); 632 const Value *BObj = GetUnderlyingObject(B, DL); 633 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 634 BObj, AA->getTypeStoreSize(BObj->getType())); 635 } 636 637 638 // Returns true if the load or store can be analyzed. Atomic and volatile 639 // operations have properties which this analysis does not understand. 640 static 641 bool isLoadOrStore(const Instruction *I) { 642 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 643 return LI->isUnordered(); 644 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 645 return SI->isUnordered(); 646 return false; 647 } 648 649 650 static 651 Value *getPointerOperand(Instruction *I) { 652 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 653 return LI->getPointerOperand(); 654 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 655 return SI->getPointerOperand(); 656 llvm_unreachable("Value is not load or store instruction"); 657 return nullptr; 658 } 659 660 661 // Examines the loop nesting of the Src and Dst 662 // instructions and establishes their shared loops. Sets the variables 663 // CommonLevels, SrcLevels, and MaxLevels. 664 // The source and destination instructions needn't be contained in the same 665 // loop. The routine establishNestingLevels finds the level of most deeply 666 // nested loop that contains them both, CommonLevels. An instruction that's 667 // not contained in a loop is at level = 0. MaxLevels is equal to the level 668 // of the source plus the level of the destination, minus CommonLevels. 669 // This lets us allocate vectors MaxLevels in length, with room for every 670 // distinct loop referenced in both the source and destination subscripts. 671 // The variable SrcLevels is the nesting depth of the source instruction. 672 // It's used to help calculate distinct loops referenced by the destination. 673 // Here's the map from loops to levels: 674 // 0 - unused 675 // 1 - outermost common loop 676 // ... - other common loops 677 // CommonLevels - innermost common loop 678 // ... - loops containing Src but not Dst 679 // SrcLevels - innermost loop containing Src but not Dst 680 // ... - loops containing Dst but not Src 681 // MaxLevels - innermost loops containing Dst but not Src 682 // Consider the follow code fragment: 683 // for (a = ...) { 684 // for (b = ...) { 685 // for (c = ...) { 686 // for (d = ...) { 687 // A[] = ...; 688 // } 689 // } 690 // for (e = ...) { 691 // for (f = ...) { 692 // for (g = ...) { 693 // ... = A[]; 694 // } 695 // } 696 // } 697 // } 698 // } 699 // If we're looking at the possibility of a dependence between the store 700 // to A (the Src) and the load from A (the Dst), we'll note that they 701 // have 2 loops in common, so CommonLevels will equal 2 and the direction 702 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 703 // A map from loop names to loop numbers would look like 704 // a - 1 705 // b - 2 = CommonLevels 706 // c - 3 707 // d - 4 = SrcLevels 708 // e - 5 709 // f - 6 710 // g - 7 = MaxLevels 711 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 712 const Instruction *Dst) { 713 const BasicBlock *SrcBlock = Src->getParent(); 714 const BasicBlock *DstBlock = Dst->getParent(); 715 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 716 unsigned DstLevel = LI->getLoopDepth(DstBlock); 717 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 718 const Loop *DstLoop = LI->getLoopFor(DstBlock); 719 SrcLevels = SrcLevel; 720 MaxLevels = SrcLevel + DstLevel; 721 while (SrcLevel > DstLevel) { 722 SrcLoop = SrcLoop->getParentLoop(); 723 SrcLevel--; 724 } 725 while (DstLevel > SrcLevel) { 726 DstLoop = DstLoop->getParentLoop(); 727 DstLevel--; 728 } 729 while (SrcLoop != DstLoop) { 730 SrcLoop = SrcLoop->getParentLoop(); 731 DstLoop = DstLoop->getParentLoop(); 732 SrcLevel--; 733 } 734 CommonLevels = SrcLevel; 735 MaxLevels -= CommonLevels; 736 } 737 738 739 // Given one of the loops containing the source, return 740 // its level index in our numbering scheme. 741 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 742 return SrcLoop->getLoopDepth(); 743 } 744 745 746 // Given one of the loops containing the destination, 747 // return its level index in our numbering scheme. 748 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 749 unsigned D = DstLoop->getLoopDepth(); 750 if (D > CommonLevels) 751 return D - CommonLevels + SrcLevels; 752 else 753 return D; 754 } 755 756 757 // Returns true if Expression is loop invariant in LoopNest. 758 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 759 const Loop *LoopNest) const { 760 if (!LoopNest) 761 return true; 762 return SE->isLoopInvariant(Expression, LoopNest) && 763 isLoopInvariant(Expression, LoopNest->getParentLoop()); 764 } 765 766 767 768 // Finds the set of loops from the LoopNest that 769 // have a level <= CommonLevels and are referred to by the SCEV Expression. 770 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 771 const Loop *LoopNest, 772 SmallBitVector &Loops) const { 773 while (LoopNest) { 774 unsigned Level = LoopNest->getLoopDepth(); 775 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 776 Loops.set(Level); 777 LoopNest = LoopNest->getParentLoop(); 778 } 779 } 780 781 void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 782 783 unsigned widestWidthSeen = 0; 784 Type *widestType; 785 786 // Go through each pair and find the widest bit to which we need 787 // to extend all of them. 788 for (unsigned i = 0; i < Pairs.size(); i++) { 789 const SCEV *Src = Pairs[i]->Src; 790 const SCEV *Dst = Pairs[i]->Dst; 791 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 792 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 793 if (SrcTy == nullptr || DstTy == nullptr) { 794 assert(SrcTy == DstTy && "This function only unify integer types and " 795 "expect Src and Dst share the same type " 796 "otherwise."); 797 continue; 798 } 799 if (SrcTy->getBitWidth() > widestWidthSeen) { 800 widestWidthSeen = SrcTy->getBitWidth(); 801 widestType = SrcTy; 802 } 803 if (DstTy->getBitWidth() > widestWidthSeen) { 804 widestWidthSeen = DstTy->getBitWidth(); 805 widestType = DstTy; 806 } 807 } 808 809 810 assert(widestWidthSeen > 0); 811 812 // Now extend each pair to the widest seen. 813 for (unsigned i = 0; i < Pairs.size(); i++) { 814 const SCEV *Src = Pairs[i]->Src; 815 const SCEV *Dst = Pairs[i]->Dst; 816 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 817 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 818 if (SrcTy == nullptr || DstTy == nullptr) { 819 assert(SrcTy == DstTy && "This function only unify integer types and " 820 "expect Src and Dst share the same type " 821 "otherwise."); 822 continue; 823 } 824 if (SrcTy->getBitWidth() < widestWidthSeen) 825 // Sign-extend Src to widestType 826 Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType); 827 if (DstTy->getBitWidth() < widestWidthSeen) { 828 // Sign-extend Dst to widestType 829 Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType); 830 } 831 } 832 } 833 834 // removeMatchingExtensions - Examines a subscript pair. 835 // If the source and destination are identically sign (or zero) 836 // extended, it strips off the extension in an effect to simplify 837 // the actual analysis. 838 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 839 const SCEV *Src = Pair->Src; 840 const SCEV *Dst = Pair->Dst; 841 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 842 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 843 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 844 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 845 const SCEV *SrcCastOp = SrcCast->getOperand(); 846 const SCEV *DstCastOp = DstCast->getOperand(); 847 if (SrcCastOp->getType() == DstCastOp->getType()) { 848 Pair->Src = SrcCastOp; 849 Pair->Dst = DstCastOp; 850 } 851 } 852 } 853 854 855 // Examine the scev and return true iff it's linear. 856 // Collect any loops mentioned in the set of "Loops". 857 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 858 const Loop *LoopNest, 859 SmallBitVector &Loops) { 860 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 861 if (!AddRec) 862 return isLoopInvariant(Src, LoopNest); 863 const SCEV *Start = AddRec->getStart(); 864 const SCEV *Step = AddRec->getStepRecurrence(*SE); 865 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 866 if (!isa<SCEVCouldNotCompute>(UB)) { 867 if (SE->getTypeSizeInBits(Start->getType()) < 868 SE->getTypeSizeInBits(UB->getType())) { 869 if (!AddRec->getNoWrapFlags()) 870 return false; 871 } 872 } 873 if (!isLoopInvariant(Step, LoopNest)) 874 return false; 875 Loops.set(mapSrcLoop(AddRec->getLoop())); 876 return checkSrcSubscript(Start, LoopNest, Loops); 877 } 878 879 880 881 // Examine the scev and return true iff it's linear. 882 // Collect any loops mentioned in the set of "Loops". 883 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 884 const Loop *LoopNest, 885 SmallBitVector &Loops) { 886 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 887 if (!AddRec) 888 return isLoopInvariant(Dst, LoopNest); 889 const SCEV *Start = AddRec->getStart(); 890 const SCEV *Step = AddRec->getStepRecurrence(*SE); 891 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 892 if (!isa<SCEVCouldNotCompute>(UB)) { 893 if (SE->getTypeSizeInBits(Start->getType()) < 894 SE->getTypeSizeInBits(UB->getType())) { 895 if (!AddRec->getNoWrapFlags()) 896 return false; 897 } 898 } 899 if (!isLoopInvariant(Step, LoopNest)) 900 return false; 901 Loops.set(mapDstLoop(AddRec->getLoop())); 902 return checkDstSubscript(Start, LoopNest, Loops); 903 } 904 905 906 // Examines the subscript pair (the Src and Dst SCEVs) 907 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 908 // Collects the associated loops in a set. 909 DependenceAnalysis::Subscript::ClassificationKind 910 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 911 const SCEV *Dst, const Loop *DstLoopNest, 912 SmallBitVector &Loops) { 913 SmallBitVector SrcLoops(MaxLevels + 1); 914 SmallBitVector DstLoops(MaxLevels + 1); 915 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 916 return Subscript::NonLinear; 917 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 918 return Subscript::NonLinear; 919 Loops = SrcLoops; 920 Loops |= DstLoops; 921 unsigned N = Loops.count(); 922 if (N == 0) 923 return Subscript::ZIV; 924 if (N == 1) 925 return Subscript::SIV; 926 if (N == 2 && (SrcLoops.count() == 0 || 927 DstLoops.count() == 0 || 928 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 929 return Subscript::RDIV; 930 return Subscript::MIV; 931 } 932 933 934 // A wrapper around SCEV::isKnownPredicate. 935 // Looks for cases where we're interested in comparing for equality. 936 // If both X and Y have been identically sign or zero extended, 937 // it strips off the (confusing) extensions before invoking 938 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 939 // will be similarly updated. 940 // 941 // If SCEV::isKnownPredicate can't prove the predicate, 942 // we try simple subtraction, which seems to help in some cases 943 // involving symbolics. 944 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 945 const SCEV *X, 946 const SCEV *Y) const { 947 if (Pred == CmpInst::ICMP_EQ || 948 Pred == CmpInst::ICMP_NE) { 949 if ((isa<SCEVSignExtendExpr>(X) && 950 isa<SCEVSignExtendExpr>(Y)) || 951 (isa<SCEVZeroExtendExpr>(X) && 952 isa<SCEVZeroExtendExpr>(Y))) { 953 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 954 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 955 const SCEV *Xop = CX->getOperand(); 956 const SCEV *Yop = CY->getOperand(); 957 if (Xop->getType() == Yop->getType()) { 958 X = Xop; 959 Y = Yop; 960 } 961 } 962 } 963 if (SE->isKnownPredicate(Pred, X, Y)) 964 return true; 965 // If SE->isKnownPredicate can't prove the condition, 966 // we try the brute-force approach of subtracting 967 // and testing the difference. 968 // By testing with SE->isKnownPredicate first, we avoid 969 // the possibility of overflow when the arguments are constants. 970 const SCEV *Delta = SE->getMinusSCEV(X, Y); 971 switch (Pred) { 972 case CmpInst::ICMP_EQ: 973 return Delta->isZero(); 974 case CmpInst::ICMP_NE: 975 return SE->isKnownNonZero(Delta); 976 case CmpInst::ICMP_SGE: 977 return SE->isKnownNonNegative(Delta); 978 case CmpInst::ICMP_SLE: 979 return SE->isKnownNonPositive(Delta); 980 case CmpInst::ICMP_SGT: 981 return SE->isKnownPositive(Delta); 982 case CmpInst::ICMP_SLT: 983 return SE->isKnownNegative(Delta); 984 default: 985 llvm_unreachable("unexpected predicate in isKnownPredicate"); 986 } 987 } 988 989 990 // All subscripts are all the same type. 991 // Loop bound may be smaller (e.g., a char). 992 // Should zero extend loop bound, since it's always >= 0. 993 // This routine collects upper bound and extends or truncates if needed. 994 // Truncating is safe when subscripts are known not to wrap. Cases without 995 // nowrap flags should have been rejected earlier. 996 // Return null if no bound available. 997 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 998 Type *T) const { 999 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1000 const SCEV *UB = SE->getBackedgeTakenCount(L); 1001 return SE->getTruncateOrZeroExtend(UB, T); 1002 } 1003 return nullptr; 1004 } 1005 1006 1007 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1008 // If the cast fails, returns NULL. 1009 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 1010 Type *T 1011 ) const { 1012 if (const SCEV *UB = collectUpperBound(L, T)) 1013 return dyn_cast<SCEVConstant>(UB); 1014 return nullptr; 1015 } 1016 1017 1018 // testZIV - 1019 // When we have a pair of subscripts of the form [c1] and [c2], 1020 // where c1 and c2 are both loop invariant, we attack it using 1021 // the ZIV test. Basically, we test by comparing the two values, 1022 // but there are actually three possible results: 1023 // 1) the values are equal, so there's a dependence 1024 // 2) the values are different, so there's no dependence 1025 // 3) the values might be equal, so we have to assume a dependence. 1026 // 1027 // Return true if dependence disproved. 1028 bool DependenceAnalysis::testZIV(const SCEV *Src, 1029 const SCEV *Dst, 1030 FullDependence &Result) const { 1031 DEBUG(dbgs() << " src = " << *Src << "\n"); 1032 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1033 ++ZIVapplications; 1034 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1035 DEBUG(dbgs() << " provably dependent\n"); 1036 return false; // provably dependent 1037 } 1038 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1039 DEBUG(dbgs() << " provably independent\n"); 1040 ++ZIVindependence; 1041 return true; // provably independent 1042 } 1043 DEBUG(dbgs() << " possibly dependent\n"); 1044 Result.Consistent = false; 1045 return false; // possibly dependent 1046 } 1047 1048 1049 // strongSIVtest - 1050 // From the paper, Practical Dependence Testing, Section 4.2.1 1051 // 1052 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1053 // where i is an induction variable, c1 and c2 are loop invariant, 1054 // and a is a constant, we can solve it exactly using the Strong SIV test. 1055 // 1056 // Can prove independence. Failing that, can compute distance (and direction). 1057 // In the presence of symbolic terms, we can sometimes make progress. 1058 // 1059 // If there's a dependence, 1060 // 1061 // c1 + a*i = c2 + a*i' 1062 // 1063 // The dependence distance is 1064 // 1065 // d = i' - i = (c1 - c2)/a 1066 // 1067 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1068 // loop's upper bound. If a dependence exists, the dependence direction is 1069 // defined as 1070 // 1071 // { < if d > 0 1072 // direction = { = if d = 0 1073 // { > if d < 0 1074 // 1075 // Return true if dependence disproved. 1076 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1077 const SCEV *SrcConst, 1078 const SCEV *DstConst, 1079 const Loop *CurLoop, 1080 unsigned Level, 1081 FullDependence &Result, 1082 Constraint &NewConstraint) const { 1083 DEBUG(dbgs() << "\tStrong SIV test\n"); 1084 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1085 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1086 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1087 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1088 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1089 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1090 ++StrongSIVapplications; 1091 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1092 Level--; 1093 1094 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1095 DEBUG(dbgs() << "\t Delta = " << *Delta); 1096 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1097 1098 // check that |Delta| < iteration count 1099 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1100 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1101 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1102 const SCEV *AbsDelta = 1103 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1104 const SCEV *AbsCoeff = 1105 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1106 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1107 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1108 // Distance greater than trip count - no dependence 1109 ++StrongSIVindependence; 1110 ++StrongSIVsuccesses; 1111 return true; 1112 } 1113 } 1114 1115 // Can we compute distance? 1116 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1117 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1118 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1119 APInt Distance = ConstDelta; // these need to be initialized 1120 APInt Remainder = ConstDelta; 1121 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1122 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1123 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1124 // Make sure Coeff divides Delta exactly 1125 if (Remainder != 0) { 1126 // Coeff doesn't divide Distance, no dependence 1127 ++StrongSIVindependence; 1128 ++StrongSIVsuccesses; 1129 return true; 1130 } 1131 Result.DV[Level].Distance = SE->getConstant(Distance); 1132 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1133 if (Distance.sgt(0)) 1134 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1135 else if (Distance.slt(0)) 1136 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1137 else 1138 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1139 ++StrongSIVsuccesses; 1140 } 1141 else if (Delta->isZero()) { 1142 // since 0/X == 0 1143 Result.DV[Level].Distance = Delta; 1144 NewConstraint.setDistance(Delta, CurLoop); 1145 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1146 ++StrongSIVsuccesses; 1147 } 1148 else { 1149 if (Coeff->isOne()) { 1150 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1151 Result.DV[Level].Distance = Delta; // since X/1 == X 1152 NewConstraint.setDistance(Delta, CurLoop); 1153 } 1154 else { 1155 Result.Consistent = false; 1156 NewConstraint.setLine(Coeff, 1157 SE->getNegativeSCEV(Coeff), 1158 SE->getNegativeSCEV(Delta), CurLoop); 1159 } 1160 1161 // maybe we can get a useful direction 1162 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1163 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1164 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1165 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1166 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1167 // The double negatives above are confusing. 1168 // It helps to read !SE->isKnownNonZero(Delta) 1169 // as "Delta might be Zero" 1170 unsigned NewDirection = Dependence::DVEntry::NONE; 1171 if ((DeltaMaybePositive && CoeffMaybePositive) || 1172 (DeltaMaybeNegative && CoeffMaybeNegative)) 1173 NewDirection = Dependence::DVEntry::LT; 1174 if (DeltaMaybeZero) 1175 NewDirection |= Dependence::DVEntry::EQ; 1176 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1177 (DeltaMaybePositive && CoeffMaybeNegative)) 1178 NewDirection |= Dependence::DVEntry::GT; 1179 if (NewDirection < Result.DV[Level].Direction) 1180 ++StrongSIVsuccesses; 1181 Result.DV[Level].Direction &= NewDirection; 1182 } 1183 return false; 1184 } 1185 1186 1187 // weakCrossingSIVtest - 1188 // From the paper, Practical Dependence Testing, Section 4.2.2 1189 // 1190 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1191 // where i is an induction variable, c1 and c2 are loop invariant, 1192 // and a is a constant, we can solve it exactly using the 1193 // Weak-Crossing SIV test. 1194 // 1195 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1196 // the two lines, where i = i', yielding 1197 // 1198 // c1 + a*i = c2 - a*i 1199 // 2a*i = c2 - c1 1200 // i = (c2 - c1)/2a 1201 // 1202 // If i < 0, there is no dependence. 1203 // If i > upperbound, there is no dependence. 1204 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1205 // If i = upperbound, there's a dependence with distance = 0. 1206 // If i is integral, there's a dependence (all directions). 1207 // If the non-integer part = 1/2, there's a dependence (<> directions). 1208 // Otherwise, there's no dependence. 1209 // 1210 // Can prove independence. Failing that, 1211 // can sometimes refine the directions. 1212 // Can determine iteration for splitting. 1213 // 1214 // Return true if dependence disproved. 1215 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1216 const SCEV *SrcConst, 1217 const SCEV *DstConst, 1218 const Loop *CurLoop, 1219 unsigned Level, 1220 FullDependence &Result, 1221 Constraint &NewConstraint, 1222 const SCEV *&SplitIter) const { 1223 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1224 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1225 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1226 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1227 ++WeakCrossingSIVapplications; 1228 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1229 Level--; 1230 Result.Consistent = false; 1231 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1232 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1233 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1234 if (Delta->isZero()) { 1235 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1236 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1237 ++WeakCrossingSIVsuccesses; 1238 if (!Result.DV[Level].Direction) { 1239 ++WeakCrossingSIVindependence; 1240 return true; 1241 } 1242 Result.DV[Level].Distance = Delta; // = 0 1243 return false; 1244 } 1245 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1246 if (!ConstCoeff) 1247 return false; 1248 1249 Result.DV[Level].Splitable = true; 1250 if (SE->isKnownNegative(ConstCoeff)) { 1251 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1252 assert(ConstCoeff && 1253 "dynamic cast of negative of ConstCoeff should yield constant"); 1254 Delta = SE->getNegativeSCEV(Delta); 1255 } 1256 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1257 1258 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1259 SplitIter = 1260 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1261 Delta), 1262 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1263 ConstCoeff)); 1264 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1265 1266 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1267 if (!ConstDelta) 1268 return false; 1269 1270 // We're certain that ConstCoeff > 0; therefore, 1271 // if Delta < 0, then no dependence. 1272 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1273 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1274 if (SE->isKnownNegative(Delta)) { 1275 // No dependence, Delta < 0 1276 ++WeakCrossingSIVindependence; 1277 ++WeakCrossingSIVsuccesses; 1278 return true; 1279 } 1280 1281 // We're certain that Delta > 0 and ConstCoeff > 0. 1282 // Check Delta/(2*ConstCoeff) against upper loop bound 1283 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1284 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1285 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1286 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1287 ConstantTwo); 1288 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1289 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1290 // Delta too big, no dependence 1291 ++WeakCrossingSIVindependence; 1292 ++WeakCrossingSIVsuccesses; 1293 return true; 1294 } 1295 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1296 // i = i' = UB 1297 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1298 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1299 ++WeakCrossingSIVsuccesses; 1300 if (!Result.DV[Level].Direction) { 1301 ++WeakCrossingSIVindependence; 1302 return true; 1303 } 1304 Result.DV[Level].Splitable = false; 1305 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1306 return false; 1307 } 1308 } 1309 1310 // check that Coeff divides Delta 1311 APInt APDelta = ConstDelta->getValue()->getValue(); 1312 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1313 APInt Distance = APDelta; // these need to be initialzed 1314 APInt Remainder = APDelta; 1315 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1316 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1317 if (Remainder != 0) { 1318 // Coeff doesn't divide Delta, no dependence 1319 ++WeakCrossingSIVindependence; 1320 ++WeakCrossingSIVsuccesses; 1321 return true; 1322 } 1323 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1324 1325 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1326 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1327 Remainder = Distance.srem(Two); 1328 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1329 if (Remainder != 0) { 1330 // Equal direction isn't possible 1331 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1332 ++WeakCrossingSIVsuccesses; 1333 } 1334 return false; 1335 } 1336 1337 1338 // Kirch's algorithm, from 1339 // 1340 // Optimizing Supercompilers for Supercomputers 1341 // Michael Wolfe 1342 // MIT Press, 1989 1343 // 1344 // Program 2.1, page 29. 1345 // Computes the GCD of AM and BM. 1346 // Also finds a solution to the equation ax - by = gcd(a, b). 1347 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1348 static 1349 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1350 APInt &G, APInt &X, APInt &Y) { 1351 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1352 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1353 APInt G0 = AM.abs(); 1354 APInt G1 = BM.abs(); 1355 APInt Q = G0; // these need to be initialized 1356 APInt R = G0; 1357 APInt::sdivrem(G0, G1, Q, R); 1358 while (R != 0) { 1359 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1360 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1361 G0 = G1; G1 = R; 1362 APInt::sdivrem(G0, G1, Q, R); 1363 } 1364 G = G1; 1365 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1366 X = AM.slt(0) ? -A1 : A1; 1367 Y = BM.slt(0) ? B1 : -B1; 1368 1369 // make sure gcd divides Delta 1370 R = Delta.srem(G); 1371 if (R != 0) 1372 return true; // gcd doesn't divide Delta, no dependence 1373 Q = Delta.sdiv(G); 1374 X *= Q; 1375 Y *= Q; 1376 return false; 1377 } 1378 1379 1380 static 1381 APInt floorOfQuotient(APInt A, APInt B) { 1382 APInt Q = A; // these need to be initialized 1383 APInt R = A; 1384 APInt::sdivrem(A, B, Q, R); 1385 if (R == 0) 1386 return Q; 1387 if ((A.sgt(0) && B.sgt(0)) || 1388 (A.slt(0) && B.slt(0))) 1389 return Q; 1390 else 1391 return Q - 1; 1392 } 1393 1394 1395 static 1396 APInt ceilingOfQuotient(APInt A, APInt B) { 1397 APInt Q = A; // these need to be initialized 1398 APInt R = A; 1399 APInt::sdivrem(A, B, Q, R); 1400 if (R == 0) 1401 return Q; 1402 if ((A.sgt(0) && B.sgt(0)) || 1403 (A.slt(0) && B.slt(0))) 1404 return Q + 1; 1405 else 1406 return Q; 1407 } 1408 1409 1410 static 1411 APInt maxAPInt(APInt A, APInt B) { 1412 return A.sgt(B) ? A : B; 1413 } 1414 1415 1416 static 1417 APInt minAPInt(APInt A, APInt B) { 1418 return A.slt(B) ? A : B; 1419 } 1420 1421 1422 // exactSIVtest - 1423 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1424 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1425 // and a2 are constant, we can solve it exactly using an algorithm developed 1426 // by Banerjee and Wolfe. See Section 2.5.3 in 1427 // 1428 // Optimizing Supercompilers for Supercomputers 1429 // Michael Wolfe 1430 // MIT Press, 1989 1431 // 1432 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1433 // so use them if possible. They're also a bit better with symbolics and, 1434 // in the case of the strong SIV test, can compute Distances. 1435 // 1436 // Return true if dependence disproved. 1437 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1438 const SCEV *DstCoeff, 1439 const SCEV *SrcConst, 1440 const SCEV *DstConst, 1441 const Loop *CurLoop, 1442 unsigned Level, 1443 FullDependence &Result, 1444 Constraint &NewConstraint) const { 1445 DEBUG(dbgs() << "\tExact SIV test\n"); 1446 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1447 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1448 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1449 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1450 ++ExactSIVapplications; 1451 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1452 Level--; 1453 Result.Consistent = false; 1454 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1455 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1456 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1457 Delta, CurLoop); 1458 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1459 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1460 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1461 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1462 return false; 1463 1464 // find gcd 1465 APInt G, X, Y; 1466 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1467 APInt BM = ConstDstCoeff->getValue()->getValue(); 1468 unsigned Bits = AM.getBitWidth(); 1469 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1470 // gcd doesn't divide Delta, no dependence 1471 ++ExactSIVindependence; 1472 ++ExactSIVsuccesses; 1473 return true; 1474 } 1475 1476 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1477 1478 // since SCEV construction normalizes, LM = 0 1479 APInt UM(Bits, 1, true); 1480 bool UMvalid = false; 1481 // UM is perhaps unavailable, let's check 1482 if (const SCEVConstant *CUB = 1483 collectConstantUpperBound(CurLoop, Delta->getType())) { 1484 UM = CUB->getValue()->getValue(); 1485 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1486 UMvalid = true; 1487 } 1488 1489 APInt TU(APInt::getSignedMaxValue(Bits)); 1490 APInt TL(APInt::getSignedMinValue(Bits)); 1491 1492 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1493 APInt TMUL = BM.sdiv(G); 1494 if (TMUL.sgt(0)) { 1495 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1496 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1497 if (UMvalid) { 1498 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1499 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1500 } 1501 } 1502 else { 1503 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1504 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1505 if (UMvalid) { 1506 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1507 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1508 } 1509 } 1510 1511 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1512 TMUL = AM.sdiv(G); 1513 if (TMUL.sgt(0)) { 1514 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1515 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1516 if (UMvalid) { 1517 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1518 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1519 } 1520 } 1521 else { 1522 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1523 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1524 if (UMvalid) { 1525 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1526 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1527 } 1528 } 1529 if (TL.sgt(TU)) { 1530 ++ExactSIVindependence; 1531 ++ExactSIVsuccesses; 1532 return true; 1533 } 1534 1535 // explore directions 1536 unsigned NewDirection = Dependence::DVEntry::NONE; 1537 1538 // less than 1539 APInt SaveTU(TU); // save these 1540 APInt SaveTL(TL); 1541 DEBUG(dbgs() << "\t exploring LT direction\n"); 1542 TMUL = AM - BM; 1543 if (TMUL.sgt(0)) { 1544 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1545 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1546 } 1547 else { 1548 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1549 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1550 } 1551 if (TL.sle(TU)) { 1552 NewDirection |= Dependence::DVEntry::LT; 1553 ++ExactSIVsuccesses; 1554 } 1555 1556 // equal 1557 TU = SaveTU; // restore 1558 TL = SaveTL; 1559 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1560 if (TMUL.sgt(0)) { 1561 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1562 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1563 } 1564 else { 1565 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1566 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1567 } 1568 TMUL = BM - AM; 1569 if (TMUL.sgt(0)) { 1570 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1571 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1572 } 1573 else { 1574 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1575 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1576 } 1577 if (TL.sle(TU)) { 1578 NewDirection |= Dependence::DVEntry::EQ; 1579 ++ExactSIVsuccesses; 1580 } 1581 1582 // greater than 1583 TU = SaveTU; // restore 1584 TL = SaveTL; 1585 DEBUG(dbgs() << "\t exploring GT direction\n"); 1586 if (TMUL.sgt(0)) { 1587 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1588 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1589 } 1590 else { 1591 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1592 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1593 } 1594 if (TL.sle(TU)) { 1595 NewDirection |= Dependence::DVEntry::GT; 1596 ++ExactSIVsuccesses; 1597 } 1598 1599 // finished 1600 Result.DV[Level].Direction &= NewDirection; 1601 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1602 ++ExactSIVindependence; 1603 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1604 } 1605 1606 1607 1608 // Return true if the divisor evenly divides the dividend. 1609 static 1610 bool isRemainderZero(const SCEVConstant *Dividend, 1611 const SCEVConstant *Divisor) { 1612 APInt ConstDividend = Dividend->getValue()->getValue(); 1613 APInt ConstDivisor = Divisor->getValue()->getValue(); 1614 return ConstDividend.srem(ConstDivisor) == 0; 1615 } 1616 1617 1618 // weakZeroSrcSIVtest - 1619 // From the paper, Practical Dependence Testing, Section 4.2.2 1620 // 1621 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1622 // where i is an induction variable, c1 and c2 are loop invariant, 1623 // and a is a constant, we can solve it exactly using the 1624 // Weak-Zero SIV test. 1625 // 1626 // Given 1627 // 1628 // c1 = c2 + a*i 1629 // 1630 // we get 1631 // 1632 // (c1 - c2)/a = i 1633 // 1634 // If i is not an integer, there's no dependence. 1635 // If i < 0 or > UB, there's no dependence. 1636 // If i = 0, the direction is <= and peeling the 1637 // 1st iteration will break the dependence. 1638 // If i = UB, the direction is >= and peeling the 1639 // last iteration will break the dependence. 1640 // Otherwise, the direction is *. 1641 // 1642 // Can prove independence. Failing that, we can sometimes refine 1643 // the directions. Can sometimes show that first or last 1644 // iteration carries all the dependences (so worth peeling). 1645 // 1646 // (see also weakZeroDstSIVtest) 1647 // 1648 // Return true if dependence disproved. 1649 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1650 const SCEV *SrcConst, 1651 const SCEV *DstConst, 1652 const Loop *CurLoop, 1653 unsigned Level, 1654 FullDependence &Result, 1655 Constraint &NewConstraint) const { 1656 // For the WeakSIV test, it's possible the loop isn't common to 1657 // the Src and Dst loops. If it isn't, then there's no need to 1658 // record a direction. 1659 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1660 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1661 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1662 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1663 ++WeakZeroSIVapplications; 1664 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1665 Level--; 1666 Result.Consistent = false; 1667 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1668 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1669 DstCoeff, Delta, CurLoop); 1670 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1671 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1672 if (Level < CommonLevels) { 1673 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1674 Result.DV[Level].PeelFirst = true; 1675 ++WeakZeroSIVsuccesses; 1676 } 1677 return false; // dependences caused by first iteration 1678 } 1679 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1680 if (!ConstCoeff) 1681 return false; 1682 const SCEV *AbsCoeff = 1683 SE->isKnownNegative(ConstCoeff) ? 1684 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1685 const SCEV *NewDelta = 1686 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1687 1688 // check that Delta/SrcCoeff < iteration count 1689 // really check NewDelta < count*AbsCoeff 1690 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1691 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1692 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1693 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1694 ++WeakZeroSIVindependence; 1695 ++WeakZeroSIVsuccesses; 1696 return true; 1697 } 1698 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1699 // dependences caused by last iteration 1700 if (Level < CommonLevels) { 1701 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1702 Result.DV[Level].PeelLast = true; 1703 ++WeakZeroSIVsuccesses; 1704 } 1705 return false; 1706 } 1707 } 1708 1709 // check that Delta/SrcCoeff >= 0 1710 // really check that NewDelta >= 0 1711 if (SE->isKnownNegative(NewDelta)) { 1712 // No dependence, newDelta < 0 1713 ++WeakZeroSIVindependence; 1714 ++WeakZeroSIVsuccesses; 1715 return true; 1716 } 1717 1718 // if SrcCoeff doesn't divide Delta, then no dependence 1719 if (isa<SCEVConstant>(Delta) && 1720 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1721 ++WeakZeroSIVindependence; 1722 ++WeakZeroSIVsuccesses; 1723 return true; 1724 } 1725 return false; 1726 } 1727 1728 1729 // weakZeroDstSIVtest - 1730 // From the paper, Practical Dependence Testing, Section 4.2.2 1731 // 1732 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1733 // where i is an induction variable, c1 and c2 are loop invariant, 1734 // and a is a constant, we can solve it exactly using the 1735 // Weak-Zero SIV test. 1736 // 1737 // Given 1738 // 1739 // c1 + a*i = c2 1740 // 1741 // we get 1742 // 1743 // i = (c2 - c1)/a 1744 // 1745 // If i is not an integer, there's no dependence. 1746 // If i < 0 or > UB, there's no dependence. 1747 // If i = 0, the direction is <= and peeling the 1748 // 1st iteration will break the dependence. 1749 // If i = UB, the direction is >= and peeling the 1750 // last iteration will break the dependence. 1751 // Otherwise, the direction is *. 1752 // 1753 // Can prove independence. Failing that, we can sometimes refine 1754 // the directions. Can sometimes show that first or last 1755 // iteration carries all the dependences (so worth peeling). 1756 // 1757 // (see also weakZeroSrcSIVtest) 1758 // 1759 // Return true if dependence disproved. 1760 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1761 const SCEV *SrcConst, 1762 const SCEV *DstConst, 1763 const Loop *CurLoop, 1764 unsigned Level, 1765 FullDependence &Result, 1766 Constraint &NewConstraint) const { 1767 // For the WeakSIV test, it's possible the loop isn't common to the 1768 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1769 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1770 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1771 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1772 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1773 ++WeakZeroSIVapplications; 1774 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1775 Level--; 1776 Result.Consistent = false; 1777 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1778 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1779 Delta, CurLoop); 1780 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1781 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1782 if (Level < CommonLevels) { 1783 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1784 Result.DV[Level].PeelFirst = true; 1785 ++WeakZeroSIVsuccesses; 1786 } 1787 return false; // dependences caused by first iteration 1788 } 1789 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1790 if (!ConstCoeff) 1791 return false; 1792 const SCEV *AbsCoeff = 1793 SE->isKnownNegative(ConstCoeff) ? 1794 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1795 const SCEV *NewDelta = 1796 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1797 1798 // check that Delta/SrcCoeff < iteration count 1799 // really check NewDelta < count*AbsCoeff 1800 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1801 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1802 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1803 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1804 ++WeakZeroSIVindependence; 1805 ++WeakZeroSIVsuccesses; 1806 return true; 1807 } 1808 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1809 // dependences caused by last iteration 1810 if (Level < CommonLevels) { 1811 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1812 Result.DV[Level].PeelLast = true; 1813 ++WeakZeroSIVsuccesses; 1814 } 1815 return false; 1816 } 1817 } 1818 1819 // check that Delta/SrcCoeff >= 0 1820 // really check that NewDelta >= 0 1821 if (SE->isKnownNegative(NewDelta)) { 1822 // No dependence, newDelta < 0 1823 ++WeakZeroSIVindependence; 1824 ++WeakZeroSIVsuccesses; 1825 return true; 1826 } 1827 1828 // if SrcCoeff doesn't divide Delta, then no dependence 1829 if (isa<SCEVConstant>(Delta) && 1830 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1831 ++WeakZeroSIVindependence; 1832 ++WeakZeroSIVsuccesses; 1833 return true; 1834 } 1835 return false; 1836 } 1837 1838 1839 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1840 // Things of the form [c1 + a*i] and [c2 + b*j], 1841 // where i and j are induction variable, c1 and c2 are loop invariant, 1842 // and a and b are constants. 1843 // Returns true if any possible dependence is disproved. 1844 // Marks the result as inconsistent. 1845 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1846 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1847 const SCEV *DstCoeff, 1848 const SCEV *SrcConst, 1849 const SCEV *DstConst, 1850 const Loop *SrcLoop, 1851 const Loop *DstLoop, 1852 FullDependence &Result) const { 1853 DEBUG(dbgs() << "\tExact RDIV test\n"); 1854 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1855 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1856 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1857 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1858 ++ExactRDIVapplications; 1859 Result.Consistent = false; 1860 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1861 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1862 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1863 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1864 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1865 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1866 return false; 1867 1868 // find gcd 1869 APInt G, X, Y; 1870 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1871 APInt BM = ConstDstCoeff->getValue()->getValue(); 1872 unsigned Bits = AM.getBitWidth(); 1873 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1874 // gcd doesn't divide Delta, no dependence 1875 ++ExactRDIVindependence; 1876 return true; 1877 } 1878 1879 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1880 1881 // since SCEV construction seems to normalize, LM = 0 1882 APInt SrcUM(Bits, 1, true); 1883 bool SrcUMvalid = false; 1884 // SrcUM is perhaps unavailable, let's check 1885 if (const SCEVConstant *UpperBound = 1886 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1887 SrcUM = UpperBound->getValue()->getValue(); 1888 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1889 SrcUMvalid = true; 1890 } 1891 1892 APInt DstUM(Bits, 1, true); 1893 bool DstUMvalid = false; 1894 // UM is perhaps unavailable, let's check 1895 if (const SCEVConstant *UpperBound = 1896 collectConstantUpperBound(DstLoop, Delta->getType())) { 1897 DstUM = UpperBound->getValue()->getValue(); 1898 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1899 DstUMvalid = true; 1900 } 1901 1902 APInt TU(APInt::getSignedMaxValue(Bits)); 1903 APInt TL(APInt::getSignedMinValue(Bits)); 1904 1905 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1906 APInt TMUL = BM.sdiv(G); 1907 if (TMUL.sgt(0)) { 1908 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1909 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1910 if (SrcUMvalid) { 1911 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1912 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1913 } 1914 } 1915 else { 1916 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1917 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1918 if (SrcUMvalid) { 1919 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1920 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1921 } 1922 } 1923 1924 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1925 TMUL = AM.sdiv(G); 1926 if (TMUL.sgt(0)) { 1927 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1928 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1929 if (DstUMvalid) { 1930 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1931 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1932 } 1933 } 1934 else { 1935 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1936 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1937 if (DstUMvalid) { 1938 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1939 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1940 } 1941 } 1942 if (TL.sgt(TU)) 1943 ++ExactRDIVindependence; 1944 return TL.sgt(TU); 1945 } 1946 1947 1948 // symbolicRDIVtest - 1949 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1950 // introduce a special case of Banerjee's Inequalities (also called the 1951 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1952 // particularly cases with symbolics. Since it's only able to disprove 1953 // dependence (not compute distances or directions), we'll use it as a 1954 // fall back for the other tests. 1955 // 1956 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1957 // where i and j are induction variables and c1 and c2 are loop invariants, 1958 // we can use the symbolic tests to disprove some dependences, serving as a 1959 // backup for the RDIV test. Note that i and j can be the same variable, 1960 // letting this test serve as a backup for the various SIV tests. 1961 // 1962 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1963 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1964 // loop bounds for the i and j loops, respectively. So, ... 1965 // 1966 // c1 + a1*i = c2 + a2*j 1967 // a1*i - a2*j = c2 - c1 1968 // 1969 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1970 // range of the maximum and minimum possible values of a1*i - a2*j. 1971 // Considering the signs of a1 and a2, we have 4 possible cases: 1972 // 1973 // 1) If a1 >= 0 and a2 >= 0, then 1974 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1975 // -a2*N2 <= c2 - c1 <= a1*N1 1976 // 1977 // 2) If a1 >= 0 and a2 <= 0, then 1978 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1979 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1980 // 1981 // 3) If a1 <= 0 and a2 >= 0, then 1982 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1983 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1984 // 1985 // 4) If a1 <= 0 and a2 <= 0, then 1986 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1987 // a1*N1 <= c2 - c1 <= -a2*N2 1988 // 1989 // return true if dependence disproved 1990 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1991 const SCEV *A2, 1992 const SCEV *C1, 1993 const SCEV *C2, 1994 const Loop *Loop1, 1995 const Loop *Loop2) const { 1996 ++SymbolicRDIVapplications; 1997 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1998 DEBUG(dbgs() << "\t A1 = " << *A1); 1999 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2000 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2001 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2002 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2003 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2004 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2005 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2006 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2007 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2008 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2009 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2010 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2011 if (SE->isKnownNonNegative(A1)) { 2012 if (SE->isKnownNonNegative(A2)) { 2013 // A1 >= 0 && A2 >= 0 2014 if (N1) { 2015 // make sure that c2 - c1 <= a1*N1 2016 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2017 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2018 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2019 ++SymbolicRDIVindependence; 2020 return true; 2021 } 2022 } 2023 if (N2) { 2024 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2025 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2026 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2027 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2028 ++SymbolicRDIVindependence; 2029 return true; 2030 } 2031 } 2032 } 2033 else if (SE->isKnownNonPositive(A2)) { 2034 // a1 >= 0 && a2 <= 0 2035 if (N1 && N2) { 2036 // make sure that c2 - c1 <= a1*N1 - a2*N2 2037 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2038 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2039 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2040 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2041 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2042 ++SymbolicRDIVindependence; 2043 return true; 2044 } 2045 } 2046 // make sure that 0 <= c2 - c1 2047 if (SE->isKnownNegative(C2_C1)) { 2048 ++SymbolicRDIVindependence; 2049 return true; 2050 } 2051 } 2052 } 2053 else if (SE->isKnownNonPositive(A1)) { 2054 if (SE->isKnownNonNegative(A2)) { 2055 // a1 <= 0 && a2 >= 0 2056 if (N1 && N2) { 2057 // make sure that a1*N1 - a2*N2 <= c2 - c1 2058 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2059 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2060 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2061 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2062 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2063 ++SymbolicRDIVindependence; 2064 return true; 2065 } 2066 } 2067 // make sure that c2 - c1 <= 0 2068 if (SE->isKnownPositive(C2_C1)) { 2069 ++SymbolicRDIVindependence; 2070 return true; 2071 } 2072 } 2073 else if (SE->isKnownNonPositive(A2)) { 2074 // a1 <= 0 && a2 <= 0 2075 if (N1) { 2076 // make sure that a1*N1 <= c2 - c1 2077 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2078 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2079 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2080 ++SymbolicRDIVindependence; 2081 return true; 2082 } 2083 } 2084 if (N2) { 2085 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2086 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2087 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2088 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2089 ++SymbolicRDIVindependence; 2090 return true; 2091 } 2092 } 2093 } 2094 } 2095 return false; 2096 } 2097 2098 2099 // testSIV - 2100 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2101 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2102 // a2 are constant, we attack it with an SIV test. While they can all be 2103 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2104 // they apply; they're cheaper and sometimes more precise. 2105 // 2106 // Return true if dependence disproved. 2107 bool DependenceAnalysis::testSIV(const SCEV *Src, 2108 const SCEV *Dst, 2109 unsigned &Level, 2110 FullDependence &Result, 2111 Constraint &NewConstraint, 2112 const SCEV *&SplitIter) const { 2113 DEBUG(dbgs() << " src = " << *Src << "\n"); 2114 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2115 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2116 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2117 if (SrcAddRec && DstAddRec) { 2118 const SCEV *SrcConst = SrcAddRec->getStart(); 2119 const SCEV *DstConst = DstAddRec->getStart(); 2120 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2121 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2122 const Loop *CurLoop = SrcAddRec->getLoop(); 2123 assert(CurLoop == DstAddRec->getLoop() && 2124 "both loops in SIV should be same"); 2125 Level = mapSrcLoop(CurLoop); 2126 bool disproven; 2127 if (SrcCoeff == DstCoeff) 2128 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2129 Level, Result, NewConstraint); 2130 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2131 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2132 Level, Result, NewConstraint, SplitIter); 2133 else 2134 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2135 Level, Result, NewConstraint); 2136 return disproven || 2137 gcdMIVtest(Src, Dst, Result) || 2138 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2139 } 2140 if (SrcAddRec) { 2141 const SCEV *SrcConst = SrcAddRec->getStart(); 2142 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2143 const SCEV *DstConst = Dst; 2144 const Loop *CurLoop = SrcAddRec->getLoop(); 2145 Level = mapSrcLoop(CurLoop); 2146 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2147 Level, Result, NewConstraint) || 2148 gcdMIVtest(Src, Dst, Result); 2149 } 2150 if (DstAddRec) { 2151 const SCEV *DstConst = DstAddRec->getStart(); 2152 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2153 const SCEV *SrcConst = Src; 2154 const Loop *CurLoop = DstAddRec->getLoop(); 2155 Level = mapDstLoop(CurLoop); 2156 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2157 CurLoop, Level, Result, NewConstraint) || 2158 gcdMIVtest(Src, Dst, Result); 2159 } 2160 llvm_unreachable("SIV test expected at least one AddRec"); 2161 return false; 2162 } 2163 2164 2165 // testRDIV - 2166 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2167 // where i and j are induction variables, c1 and c2 are loop invariant, 2168 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2169 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2170 // It doesn't make sense to talk about distance or direction in this case, 2171 // so there's no point in making special versions of the Strong SIV test or 2172 // the Weak-crossing SIV test. 2173 // 2174 // With minor algebra, this test can also be used for things like 2175 // [c1 + a1*i + a2*j][c2]. 2176 // 2177 // Return true if dependence disproved. 2178 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2179 const SCEV *Dst, 2180 FullDependence &Result) const { 2181 // we have 3 possible situations here: 2182 // 1) [a*i + b] and [c*j + d] 2183 // 2) [a*i + c*j + b] and [d] 2184 // 3) [b] and [a*i + c*j + d] 2185 // We need to find what we've got and get organized 2186 2187 const SCEV *SrcConst, *DstConst; 2188 const SCEV *SrcCoeff, *DstCoeff; 2189 const Loop *SrcLoop, *DstLoop; 2190 2191 DEBUG(dbgs() << " src = " << *Src << "\n"); 2192 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2193 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2194 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2195 if (SrcAddRec && DstAddRec) { 2196 SrcConst = SrcAddRec->getStart(); 2197 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2198 SrcLoop = SrcAddRec->getLoop(); 2199 DstConst = DstAddRec->getStart(); 2200 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2201 DstLoop = DstAddRec->getLoop(); 2202 } 2203 else if (SrcAddRec) { 2204 if (const SCEVAddRecExpr *tmpAddRec = 2205 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2206 SrcConst = tmpAddRec->getStart(); 2207 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2208 SrcLoop = tmpAddRec->getLoop(); 2209 DstConst = Dst; 2210 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2211 DstLoop = SrcAddRec->getLoop(); 2212 } 2213 else 2214 llvm_unreachable("RDIV reached by surprising SCEVs"); 2215 } 2216 else if (DstAddRec) { 2217 if (const SCEVAddRecExpr *tmpAddRec = 2218 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2219 DstConst = tmpAddRec->getStart(); 2220 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2221 DstLoop = tmpAddRec->getLoop(); 2222 SrcConst = Src; 2223 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2224 SrcLoop = DstAddRec->getLoop(); 2225 } 2226 else 2227 llvm_unreachable("RDIV reached by surprising SCEVs"); 2228 } 2229 else 2230 llvm_unreachable("RDIV expected at least one AddRec"); 2231 return exactRDIVtest(SrcCoeff, DstCoeff, 2232 SrcConst, DstConst, 2233 SrcLoop, DstLoop, 2234 Result) || 2235 gcdMIVtest(Src, Dst, Result) || 2236 symbolicRDIVtest(SrcCoeff, DstCoeff, 2237 SrcConst, DstConst, 2238 SrcLoop, DstLoop); 2239 } 2240 2241 2242 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2243 // Return true if dependence disproved. 2244 // Can sometimes refine direction vectors. 2245 bool DependenceAnalysis::testMIV(const SCEV *Src, 2246 const SCEV *Dst, 2247 const SmallBitVector &Loops, 2248 FullDependence &Result) const { 2249 DEBUG(dbgs() << " src = " << *Src << "\n"); 2250 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2251 Result.Consistent = false; 2252 return gcdMIVtest(Src, Dst, Result) || 2253 banerjeeMIVtest(Src, Dst, Loops, Result); 2254 } 2255 2256 2257 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2258 // in this case 10. If there is no constant part, returns NULL. 2259 static 2260 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2261 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2262 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2263 return Constant; 2264 } 2265 return nullptr; 2266 } 2267 2268 2269 //===----------------------------------------------------------------------===// 2270 // gcdMIVtest - 2271 // Tests an MIV subscript pair for dependence. 2272 // Returns true if any possible dependence is disproved. 2273 // Marks the result as inconsistent. 2274 // Can sometimes disprove the equal direction for 1 or more loops, 2275 // as discussed in Michael Wolfe's book, 2276 // High Performance Compilers for Parallel Computing, page 235. 2277 // 2278 // We spend some effort (code!) to handle cases like 2279 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2280 // but M and N are just loop-invariant variables. 2281 // This should help us handle linearized subscripts; 2282 // also makes this test a useful backup to the various SIV tests. 2283 // 2284 // It occurs to me that the presence of loop-invariant variables 2285 // changes the nature of the test from "greatest common divisor" 2286 // to "a common divisor". 2287 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2288 const SCEV *Dst, 2289 FullDependence &Result) const { 2290 DEBUG(dbgs() << "starting gcd\n"); 2291 ++GCDapplications; 2292 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2293 APInt RunningGCD = APInt::getNullValue(BitWidth); 2294 2295 // Examine Src coefficients. 2296 // Compute running GCD and record source constant. 2297 // Because we're looking for the constant at the end of the chain, 2298 // we can't quit the loop just because the GCD == 1. 2299 const SCEV *Coefficients = Src; 2300 while (const SCEVAddRecExpr *AddRec = 2301 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2302 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2303 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2304 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2305 // If the coefficient is the product of a constant and other stuff, 2306 // we can use the constant in the GCD computation. 2307 Constant = getConstantPart(Product); 2308 if (!Constant) 2309 return false; 2310 APInt ConstCoeff = Constant->getValue()->getValue(); 2311 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2312 Coefficients = AddRec->getStart(); 2313 } 2314 const SCEV *SrcConst = Coefficients; 2315 2316 // Examine Dst coefficients. 2317 // Compute running GCD and record destination constant. 2318 // Because we're looking for the constant at the end of the chain, 2319 // we can't quit the loop just because the GCD == 1. 2320 Coefficients = Dst; 2321 while (const SCEVAddRecExpr *AddRec = 2322 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2323 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2324 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2325 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2326 // If the coefficient is the product of a constant and other stuff, 2327 // we can use the constant in the GCD computation. 2328 Constant = getConstantPart(Product); 2329 if (!Constant) 2330 return false; 2331 APInt ConstCoeff = Constant->getValue()->getValue(); 2332 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2333 Coefficients = AddRec->getStart(); 2334 } 2335 const SCEV *DstConst = Coefficients; 2336 2337 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2338 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2339 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2340 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2341 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2342 // If Delta is a sum of products, we may be able to make further progress. 2343 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2344 const SCEV *Operand = Sum->getOperand(Op); 2345 if (isa<SCEVConstant>(Operand)) { 2346 assert(!Constant && "Surprised to find multiple constants"); 2347 Constant = cast<SCEVConstant>(Operand); 2348 } 2349 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2350 // Search for constant operand to participate in GCD; 2351 // If none found; return false. 2352 const SCEVConstant *ConstOp = getConstantPart(Product); 2353 if (!ConstOp) 2354 return false; 2355 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2356 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2357 ConstOpValue.abs()); 2358 } 2359 else 2360 return false; 2361 } 2362 } 2363 if (!Constant) 2364 return false; 2365 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2366 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2367 if (ConstDelta == 0) 2368 return false; 2369 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2370 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2371 APInt Remainder = ConstDelta.srem(RunningGCD); 2372 if (Remainder != 0) { 2373 ++GCDindependence; 2374 return true; 2375 } 2376 2377 // Try to disprove equal directions. 2378 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2379 // the code above can't disprove the dependence because the GCD = 1. 2380 // So we consider what happen if i = i' and what happens if j = j'. 2381 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2382 // which is infeasible, so we can disallow the = direction for the i level. 2383 // Setting j = j' doesn't help matters, so we end up with a direction vector 2384 // of [<>, *] 2385 // 2386 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2387 // we need to remember that the constant part is 5 and the RunningGCD should 2388 // be initialized to ExtraGCD = 30. 2389 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2390 2391 bool Improved = false; 2392 Coefficients = Src; 2393 while (const SCEVAddRecExpr *AddRec = 2394 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2395 Coefficients = AddRec->getStart(); 2396 const Loop *CurLoop = AddRec->getLoop(); 2397 RunningGCD = ExtraGCD; 2398 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2399 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2400 const SCEV *Inner = Src; 2401 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2402 AddRec = cast<SCEVAddRecExpr>(Inner); 2403 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2404 if (CurLoop == AddRec->getLoop()) 2405 ; // SrcCoeff == Coeff 2406 else { 2407 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2408 // If the coefficient is the product of a constant and other stuff, 2409 // we can use the constant in the GCD computation. 2410 Constant = getConstantPart(Product); 2411 else 2412 Constant = cast<SCEVConstant>(Coeff); 2413 APInt ConstCoeff = Constant->getValue()->getValue(); 2414 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2415 } 2416 Inner = AddRec->getStart(); 2417 } 2418 Inner = Dst; 2419 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2420 AddRec = cast<SCEVAddRecExpr>(Inner); 2421 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2422 if (CurLoop == AddRec->getLoop()) 2423 DstCoeff = Coeff; 2424 else { 2425 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2426 // If the coefficient is the product of a constant and other stuff, 2427 // we can use the constant in the GCD computation. 2428 Constant = getConstantPart(Product); 2429 else 2430 Constant = cast<SCEVConstant>(Coeff); 2431 APInt ConstCoeff = Constant->getValue()->getValue(); 2432 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2433 } 2434 Inner = AddRec->getStart(); 2435 } 2436 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2437 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2438 // If the coefficient is the product of a constant and other stuff, 2439 // we can use the constant in the GCD computation. 2440 Constant = getConstantPart(Product); 2441 else if (isa<SCEVConstant>(Delta)) 2442 Constant = cast<SCEVConstant>(Delta); 2443 else { 2444 // The difference of the two coefficients might not be a product 2445 // or constant, in which case we give up on this direction. 2446 continue; 2447 } 2448 APInt ConstCoeff = Constant->getValue()->getValue(); 2449 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2450 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2451 if (RunningGCD != 0) { 2452 Remainder = ConstDelta.srem(RunningGCD); 2453 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2454 if (Remainder != 0) { 2455 unsigned Level = mapSrcLoop(CurLoop); 2456 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2457 Improved = true; 2458 } 2459 } 2460 } 2461 if (Improved) 2462 ++GCDsuccesses; 2463 DEBUG(dbgs() << "all done\n"); 2464 return false; 2465 } 2466 2467 2468 //===----------------------------------------------------------------------===// 2469 // banerjeeMIVtest - 2470 // Use Banerjee's Inequalities to test an MIV subscript pair. 2471 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2472 // Generally follows the discussion in Section 2.5.2 of 2473 // 2474 // Optimizing Supercompilers for Supercomputers 2475 // Michael Wolfe 2476 // 2477 // The inequalities given on page 25 are simplified in that loops are 2478 // normalized so that the lower bound is always 0 and the stride is always 1. 2479 // For example, Wolfe gives 2480 // 2481 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2482 // 2483 // where A_k is the coefficient of the kth index in the source subscript, 2484 // B_k is the coefficient of the kth index in the destination subscript, 2485 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2486 // index, and N_k is the stride of the kth index. Since all loops are normalized 2487 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2488 // equation to 2489 // 2490 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2491 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2492 // 2493 // Similar simplifications are possible for the other equations. 2494 // 2495 // When we can't determine the number of iterations for a loop, 2496 // we use NULL as an indicator for the worst case, infinity. 2497 // When computing the upper bound, NULL denotes +inf; 2498 // for the lower bound, NULL denotes -inf. 2499 // 2500 // Return true if dependence disproved. 2501 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2502 const SCEV *Dst, 2503 const SmallBitVector &Loops, 2504 FullDependence &Result) const { 2505 DEBUG(dbgs() << "starting Banerjee\n"); 2506 ++BanerjeeApplications; 2507 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2508 const SCEV *A0; 2509 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2510 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2511 const SCEV *B0; 2512 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2513 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2514 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2515 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2516 2517 // Compute bounds for all the * directions. 2518 DEBUG(dbgs() << "\tBounds[*]\n"); 2519 for (unsigned K = 1; K <= MaxLevels; ++K) { 2520 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2521 Bound[K].Direction = Dependence::DVEntry::ALL; 2522 Bound[K].DirSet = Dependence::DVEntry::NONE; 2523 findBoundsALL(A, B, Bound, K); 2524 #ifndef NDEBUG 2525 DEBUG(dbgs() << "\t " << K << '\t'); 2526 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2527 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2528 else 2529 DEBUG(dbgs() << "-inf\t"); 2530 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2531 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2532 else 2533 DEBUG(dbgs() << "+inf\n"); 2534 #endif 2535 } 2536 2537 // Test the *, *, *, ... case. 2538 bool Disproved = false; 2539 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2540 // Explore the direction vector hierarchy. 2541 unsigned DepthExpanded = 0; 2542 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2543 Loops, DepthExpanded, Delta); 2544 if (NewDeps > 0) { 2545 bool Improved = false; 2546 for (unsigned K = 1; K <= CommonLevels; ++K) { 2547 if (Loops[K]) { 2548 unsigned Old = Result.DV[K - 1].Direction; 2549 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2550 Improved |= Old != Result.DV[K - 1].Direction; 2551 if (!Result.DV[K - 1].Direction) { 2552 Improved = false; 2553 Disproved = true; 2554 break; 2555 } 2556 } 2557 } 2558 if (Improved) 2559 ++BanerjeeSuccesses; 2560 } 2561 else { 2562 ++BanerjeeIndependence; 2563 Disproved = true; 2564 } 2565 } 2566 else { 2567 ++BanerjeeIndependence; 2568 Disproved = true; 2569 } 2570 delete [] Bound; 2571 delete [] A; 2572 delete [] B; 2573 return Disproved; 2574 } 2575 2576 2577 // Hierarchically expands the direction vector 2578 // search space, combining the directions of discovered dependences 2579 // in the DirSet field of Bound. Returns the number of distinct 2580 // dependences discovered. If the dependence is disproved, 2581 // it will return 0. 2582 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2583 CoefficientInfo *A, 2584 CoefficientInfo *B, 2585 BoundInfo *Bound, 2586 const SmallBitVector &Loops, 2587 unsigned &DepthExpanded, 2588 const SCEV *Delta) const { 2589 if (Level > CommonLevels) { 2590 // record result 2591 DEBUG(dbgs() << "\t["); 2592 for (unsigned K = 1; K <= CommonLevels; ++K) { 2593 if (Loops[K]) { 2594 Bound[K].DirSet |= Bound[K].Direction; 2595 #ifndef NDEBUG 2596 switch (Bound[K].Direction) { 2597 case Dependence::DVEntry::LT: 2598 DEBUG(dbgs() << " <"); 2599 break; 2600 case Dependence::DVEntry::EQ: 2601 DEBUG(dbgs() << " ="); 2602 break; 2603 case Dependence::DVEntry::GT: 2604 DEBUG(dbgs() << " >"); 2605 break; 2606 case Dependence::DVEntry::ALL: 2607 DEBUG(dbgs() << " *"); 2608 break; 2609 default: 2610 llvm_unreachable("unexpected Bound[K].Direction"); 2611 } 2612 #endif 2613 } 2614 } 2615 DEBUG(dbgs() << " ]\n"); 2616 return 1; 2617 } 2618 if (Loops[Level]) { 2619 if (Level > DepthExpanded) { 2620 DepthExpanded = Level; 2621 // compute bounds for <, =, > at current level 2622 findBoundsLT(A, B, Bound, Level); 2623 findBoundsGT(A, B, Bound, Level); 2624 findBoundsEQ(A, B, Bound, Level); 2625 #ifndef NDEBUG 2626 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2627 DEBUG(dbgs() << "\t <\t"); 2628 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2629 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2630 else 2631 DEBUG(dbgs() << "-inf\t"); 2632 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2633 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2634 else 2635 DEBUG(dbgs() << "+inf\n"); 2636 DEBUG(dbgs() << "\t =\t"); 2637 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2638 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2639 else 2640 DEBUG(dbgs() << "-inf\t"); 2641 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2642 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2643 else 2644 DEBUG(dbgs() << "+inf\n"); 2645 DEBUG(dbgs() << "\t >\t"); 2646 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2647 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2648 else 2649 DEBUG(dbgs() << "-inf\t"); 2650 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2651 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2652 else 2653 DEBUG(dbgs() << "+inf\n"); 2654 #endif 2655 } 2656 2657 unsigned NewDeps = 0; 2658 2659 // test bounds for <, *, *, ... 2660 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2661 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2662 Loops, DepthExpanded, Delta); 2663 2664 // Test bounds for =, *, *, ... 2665 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2666 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2667 Loops, DepthExpanded, Delta); 2668 2669 // test bounds for >, *, *, ... 2670 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2671 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2672 Loops, DepthExpanded, Delta); 2673 2674 Bound[Level].Direction = Dependence::DVEntry::ALL; 2675 return NewDeps; 2676 } 2677 else 2678 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2679 } 2680 2681 2682 // Returns true iff the current bounds are plausible. 2683 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2684 unsigned Level, 2685 BoundInfo *Bound, 2686 const SCEV *Delta) const { 2687 Bound[Level].Direction = DirKind; 2688 if (const SCEV *LowerBound = getLowerBound(Bound)) 2689 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2690 return false; 2691 if (const SCEV *UpperBound = getUpperBound(Bound)) 2692 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2693 return false; 2694 return true; 2695 } 2696 2697 2698 // Computes the upper and lower bounds for level K 2699 // using the * direction. Records them in Bound. 2700 // Wolfe gives the equations 2701 // 2702 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2703 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2704 // 2705 // Since we normalize loops, we can simplify these equations to 2706 // 2707 // LB^*_k = (A^-_k - B^+_k)U_k 2708 // UB^*_k = (A^+_k - B^-_k)U_k 2709 // 2710 // We must be careful to handle the case where the upper bound is unknown. 2711 // Note that the lower bound is always <= 0 2712 // and the upper bound is always >= 0. 2713 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2714 CoefficientInfo *B, 2715 BoundInfo *Bound, 2716 unsigned K) const { 2717 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2718 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2719 if (Bound[K].Iterations) { 2720 Bound[K].Lower[Dependence::DVEntry::ALL] = 2721 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2722 Bound[K].Iterations); 2723 Bound[K].Upper[Dependence::DVEntry::ALL] = 2724 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2725 Bound[K].Iterations); 2726 } 2727 else { 2728 // If the difference is 0, we won't need to know the number of iterations. 2729 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2730 Bound[K].Lower[Dependence::DVEntry::ALL] = 2731 SE->getConstant(A[K].Coeff->getType(), 0); 2732 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2733 Bound[K].Upper[Dependence::DVEntry::ALL] = 2734 SE->getConstant(A[K].Coeff->getType(), 0); 2735 } 2736 } 2737 2738 2739 // Computes the upper and lower bounds for level K 2740 // using the = direction. Records them in Bound. 2741 // Wolfe gives the equations 2742 // 2743 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2744 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2745 // 2746 // Since we normalize loops, we can simplify these equations to 2747 // 2748 // LB^=_k = (A_k - B_k)^- U_k 2749 // UB^=_k = (A_k - B_k)^+ U_k 2750 // 2751 // We must be careful to handle the case where the upper bound is unknown. 2752 // Note that the lower bound is always <= 0 2753 // and the upper bound is always >= 0. 2754 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2755 CoefficientInfo *B, 2756 BoundInfo *Bound, 2757 unsigned K) const { 2758 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2759 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2760 if (Bound[K].Iterations) { 2761 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2762 const SCEV *NegativePart = getNegativePart(Delta); 2763 Bound[K].Lower[Dependence::DVEntry::EQ] = 2764 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2765 const SCEV *PositivePart = getPositivePart(Delta); 2766 Bound[K].Upper[Dependence::DVEntry::EQ] = 2767 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2768 } 2769 else { 2770 // If the positive/negative part of the difference is 0, 2771 // we won't need to know the number of iterations. 2772 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2773 const SCEV *NegativePart = getNegativePart(Delta); 2774 if (NegativePart->isZero()) 2775 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2776 const SCEV *PositivePart = getPositivePart(Delta); 2777 if (PositivePart->isZero()) 2778 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2779 } 2780 } 2781 2782 2783 // Computes the upper and lower bounds for level K 2784 // using the < direction. Records them in Bound. 2785 // Wolfe gives the equations 2786 // 2787 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2788 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2789 // 2790 // Since we normalize loops, we can simplify these equations to 2791 // 2792 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2793 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2794 // 2795 // We must be careful to handle the case where the upper bound is unknown. 2796 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2797 CoefficientInfo *B, 2798 BoundInfo *Bound, 2799 unsigned K) const { 2800 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2801 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2802 if (Bound[K].Iterations) { 2803 const SCEV *Iter_1 = 2804 SE->getMinusSCEV(Bound[K].Iterations, 2805 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2806 const SCEV *NegPart = 2807 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2808 Bound[K].Lower[Dependence::DVEntry::LT] = 2809 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2810 const SCEV *PosPart = 2811 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2812 Bound[K].Upper[Dependence::DVEntry::LT] = 2813 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2814 } 2815 else { 2816 // If the positive/negative part of the difference is 0, 2817 // we won't need to know the number of iterations. 2818 const SCEV *NegPart = 2819 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2820 if (NegPart->isZero()) 2821 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2822 const SCEV *PosPart = 2823 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2824 if (PosPart->isZero()) 2825 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2826 } 2827 } 2828 2829 2830 // Computes the upper and lower bounds for level K 2831 // using the > direction. Records them in Bound. 2832 // Wolfe gives the equations 2833 // 2834 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2835 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2836 // 2837 // Since we normalize loops, we can simplify these equations to 2838 // 2839 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2840 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2841 // 2842 // We must be careful to handle the case where the upper bound is unknown. 2843 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2844 CoefficientInfo *B, 2845 BoundInfo *Bound, 2846 unsigned K) const { 2847 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2848 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2849 if (Bound[K].Iterations) { 2850 const SCEV *Iter_1 = 2851 SE->getMinusSCEV(Bound[K].Iterations, 2852 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2853 const SCEV *NegPart = 2854 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2855 Bound[K].Lower[Dependence::DVEntry::GT] = 2856 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2857 const SCEV *PosPart = 2858 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2859 Bound[K].Upper[Dependence::DVEntry::GT] = 2860 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2861 } 2862 else { 2863 // If the positive/negative part of the difference is 0, 2864 // we won't need to know the number of iterations. 2865 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2866 if (NegPart->isZero()) 2867 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2868 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2869 if (PosPart->isZero()) 2870 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2871 } 2872 } 2873 2874 2875 // X^+ = max(X, 0) 2876 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2877 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2878 } 2879 2880 2881 // X^- = min(X, 0) 2882 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2883 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2884 } 2885 2886 2887 // Walks through the subscript, 2888 // collecting each coefficient, the associated loop bounds, 2889 // and recording its positive and negative parts for later use. 2890 DependenceAnalysis::CoefficientInfo * 2891 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2892 bool SrcFlag, 2893 const SCEV *&Constant) const { 2894 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2895 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2896 for (unsigned K = 1; K <= MaxLevels; ++K) { 2897 CI[K].Coeff = Zero; 2898 CI[K].PosPart = Zero; 2899 CI[K].NegPart = Zero; 2900 CI[K].Iterations = nullptr; 2901 } 2902 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2903 const Loop *L = AddRec->getLoop(); 2904 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2905 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2906 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2907 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2908 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2909 Subscript = AddRec->getStart(); 2910 } 2911 Constant = Subscript; 2912 #ifndef NDEBUG 2913 DEBUG(dbgs() << "\tCoefficient Info\n"); 2914 for (unsigned K = 1; K <= MaxLevels; ++K) { 2915 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2916 DEBUG(dbgs() << "\tPos Part = "); 2917 DEBUG(dbgs() << *CI[K].PosPart); 2918 DEBUG(dbgs() << "\tNeg Part = "); 2919 DEBUG(dbgs() << *CI[K].NegPart); 2920 DEBUG(dbgs() << "\tUpper Bound = "); 2921 if (CI[K].Iterations) 2922 DEBUG(dbgs() << *CI[K].Iterations); 2923 else 2924 DEBUG(dbgs() << "+inf"); 2925 DEBUG(dbgs() << '\n'); 2926 } 2927 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2928 #endif 2929 return CI; 2930 } 2931 2932 2933 // Looks through all the bounds info and 2934 // computes the lower bound given the current direction settings 2935 // at each level. If the lower bound for any level is -inf, 2936 // the result is -inf. 2937 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2938 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2939 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2940 if (Bound[K].Lower[Bound[K].Direction]) 2941 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2942 else 2943 Sum = nullptr; 2944 } 2945 return Sum; 2946 } 2947 2948 2949 // Looks through all the bounds info and 2950 // computes the upper bound given the current direction settings 2951 // at each level. If the upper bound at any level is +inf, 2952 // the result is +inf. 2953 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2954 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2955 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2956 if (Bound[K].Upper[Bound[K].Direction]) 2957 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2958 else 2959 Sum = nullptr; 2960 } 2961 return Sum; 2962 } 2963 2964 2965 //===----------------------------------------------------------------------===// 2966 // Constraint manipulation for Delta test. 2967 2968 // Given a linear SCEV, 2969 // return the coefficient (the step) 2970 // corresponding to the specified loop. 2971 // If there isn't one, return 0. 2972 // For example, given a*i + b*j + c*k, finding the coefficient 2973 // corresponding to the j loop would yield b. 2974 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2975 const Loop *TargetLoop) const { 2976 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2977 if (!AddRec) 2978 return SE->getConstant(Expr->getType(), 0); 2979 if (AddRec->getLoop() == TargetLoop) 2980 return AddRec->getStepRecurrence(*SE); 2981 return findCoefficient(AddRec->getStart(), TargetLoop); 2982 } 2983 2984 2985 // Given a linear SCEV, 2986 // return the SCEV given by zeroing out the coefficient 2987 // corresponding to the specified loop. 2988 // For example, given a*i + b*j + c*k, zeroing the coefficient 2989 // corresponding to the j loop would yield a*i + c*k. 2990 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2991 const Loop *TargetLoop) const { 2992 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2993 if (!AddRec) 2994 return Expr; // ignore 2995 if (AddRec->getLoop() == TargetLoop) 2996 return AddRec->getStart(); 2997 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2998 AddRec->getStepRecurrence(*SE), 2999 AddRec->getLoop(), 3000 AddRec->getNoWrapFlags()); 3001 } 3002 3003 3004 // Given a linear SCEV Expr, 3005 // return the SCEV given by adding some Value to the 3006 // coefficient corresponding to the specified TargetLoop. 3007 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3008 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3009 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 3010 const Loop *TargetLoop, 3011 const SCEV *Value) const { 3012 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3013 if (!AddRec) // create a new addRec 3014 return SE->getAddRecExpr(Expr, 3015 Value, 3016 TargetLoop, 3017 SCEV::FlagAnyWrap); // Worst case, with no info. 3018 if (AddRec->getLoop() == TargetLoop) { 3019 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3020 if (Sum->isZero()) 3021 return AddRec->getStart(); 3022 return SE->getAddRecExpr(AddRec->getStart(), 3023 Sum, 3024 AddRec->getLoop(), 3025 AddRec->getNoWrapFlags()); 3026 } 3027 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3028 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3029 return SE->getAddRecExpr( 3030 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3031 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3032 AddRec->getNoWrapFlags()); 3033 } 3034 3035 3036 // Review the constraints, looking for opportunities 3037 // to simplify a subscript pair (Src and Dst). 3038 // Return true if some simplification occurs. 3039 // If the simplification isn't exact (that is, if it is conservative 3040 // in terms of dependence), set consistent to false. 3041 // Corresponds to Figure 5 from the paper 3042 // 3043 // Practical Dependence Testing 3044 // Goff, Kennedy, Tseng 3045 // PLDI 1991 3046 bool DependenceAnalysis::propagate(const SCEV *&Src, 3047 const SCEV *&Dst, 3048 SmallBitVector &Loops, 3049 SmallVectorImpl<Constraint> &Constraints, 3050 bool &Consistent) { 3051 bool Result = false; 3052 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3053 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3054 DEBUG(Constraints[LI].dump(dbgs())); 3055 if (Constraints[LI].isDistance()) 3056 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3057 else if (Constraints[LI].isLine()) 3058 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3059 else if (Constraints[LI].isPoint()) 3060 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3061 } 3062 return Result; 3063 } 3064 3065 3066 // Attempt to propagate a distance 3067 // constraint into a subscript pair (Src and Dst). 3068 // Return true if some simplification occurs. 3069 // If the simplification isn't exact (that is, if it is conservative 3070 // in terms of dependence), set consistent to false. 3071 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3072 const SCEV *&Dst, 3073 Constraint &CurConstraint, 3074 bool &Consistent) { 3075 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3076 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3077 const SCEV *A_K = findCoefficient(Src, CurLoop); 3078 if (A_K->isZero()) 3079 return false; 3080 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3081 Src = SE->getMinusSCEV(Src, DA_K); 3082 Src = zeroCoefficient(Src, CurLoop); 3083 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3084 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3085 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3086 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3087 if (!findCoefficient(Dst, CurLoop)->isZero()) 3088 Consistent = false; 3089 return true; 3090 } 3091 3092 3093 // Attempt to propagate a line 3094 // constraint into a subscript pair (Src and Dst). 3095 // Return true if some simplification occurs. 3096 // If the simplification isn't exact (that is, if it is conservative 3097 // in terms of dependence), set consistent to false. 3098 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3099 const SCEV *&Dst, 3100 Constraint &CurConstraint, 3101 bool &Consistent) { 3102 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3103 const SCEV *A = CurConstraint.getA(); 3104 const SCEV *B = CurConstraint.getB(); 3105 const SCEV *C = CurConstraint.getC(); 3106 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3107 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3108 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3109 if (A->isZero()) { 3110 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3111 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3112 if (!Bconst || !Cconst) return false; 3113 APInt Beta = Bconst->getValue()->getValue(); 3114 APInt Charlie = Cconst->getValue()->getValue(); 3115 APInt CdivB = Charlie.sdiv(Beta); 3116 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3117 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3118 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3119 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3120 Dst = zeroCoefficient(Dst, CurLoop); 3121 if (!findCoefficient(Src, CurLoop)->isZero()) 3122 Consistent = false; 3123 } 3124 else if (B->isZero()) { 3125 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3126 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3127 if (!Aconst || !Cconst) return false; 3128 APInt Alpha = Aconst->getValue()->getValue(); 3129 APInt Charlie = Cconst->getValue()->getValue(); 3130 APInt CdivA = Charlie.sdiv(Alpha); 3131 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3132 const SCEV *A_K = findCoefficient(Src, CurLoop); 3133 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3134 Src = zeroCoefficient(Src, CurLoop); 3135 if (!findCoefficient(Dst, CurLoop)->isZero()) 3136 Consistent = false; 3137 } 3138 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3139 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3140 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3141 if (!Aconst || !Cconst) return false; 3142 APInt Alpha = Aconst->getValue()->getValue(); 3143 APInt Charlie = Cconst->getValue()->getValue(); 3144 APInt CdivA = Charlie.sdiv(Alpha); 3145 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3146 const SCEV *A_K = findCoefficient(Src, CurLoop); 3147 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3148 Src = zeroCoefficient(Src, CurLoop); 3149 Dst = addToCoefficient(Dst, CurLoop, A_K); 3150 if (!findCoefficient(Dst, CurLoop)->isZero()) 3151 Consistent = false; 3152 } 3153 else { 3154 // paper is incorrect here, or perhaps just misleading 3155 const SCEV *A_K = findCoefficient(Src, CurLoop); 3156 Src = SE->getMulExpr(Src, A); 3157 Dst = SE->getMulExpr(Dst, A); 3158 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3159 Src = zeroCoefficient(Src, CurLoop); 3160 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3161 if (!findCoefficient(Dst, CurLoop)->isZero()) 3162 Consistent = false; 3163 } 3164 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3165 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3166 return true; 3167 } 3168 3169 3170 // Attempt to propagate a point 3171 // constraint into a subscript pair (Src and Dst). 3172 // Return true if some simplification occurs. 3173 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3174 const SCEV *&Dst, 3175 Constraint &CurConstraint) { 3176 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3177 const SCEV *A_K = findCoefficient(Src, CurLoop); 3178 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3179 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3180 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3181 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3182 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3183 Src = zeroCoefficient(Src, CurLoop); 3184 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3185 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3186 Dst = zeroCoefficient(Dst, CurLoop); 3187 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3188 return true; 3189 } 3190 3191 3192 // Update direction vector entry based on the current constraint. 3193 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3194 const Constraint &CurConstraint 3195 ) const { 3196 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3197 DEBUG(CurConstraint.dump(dbgs())); 3198 if (CurConstraint.isAny()) 3199 ; // use defaults 3200 else if (CurConstraint.isDistance()) { 3201 // this one is consistent, the others aren't 3202 Level.Scalar = false; 3203 Level.Distance = CurConstraint.getD(); 3204 unsigned NewDirection = Dependence::DVEntry::NONE; 3205 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3206 NewDirection = Dependence::DVEntry::EQ; 3207 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3208 NewDirection |= Dependence::DVEntry::LT; 3209 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3210 NewDirection |= Dependence::DVEntry::GT; 3211 Level.Direction &= NewDirection; 3212 } 3213 else if (CurConstraint.isLine()) { 3214 Level.Scalar = false; 3215 Level.Distance = nullptr; 3216 // direction should be accurate 3217 } 3218 else if (CurConstraint.isPoint()) { 3219 Level.Scalar = false; 3220 Level.Distance = nullptr; 3221 unsigned NewDirection = Dependence::DVEntry::NONE; 3222 if (!isKnownPredicate(CmpInst::ICMP_NE, 3223 CurConstraint.getY(), 3224 CurConstraint.getX())) 3225 // if X may be = Y 3226 NewDirection |= Dependence::DVEntry::EQ; 3227 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3228 CurConstraint.getY(), 3229 CurConstraint.getX())) 3230 // if Y may be > X 3231 NewDirection |= Dependence::DVEntry::LT; 3232 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3233 CurConstraint.getY(), 3234 CurConstraint.getX())) 3235 // if Y may be < X 3236 NewDirection |= Dependence::DVEntry::GT; 3237 Level.Direction &= NewDirection; 3238 } 3239 else 3240 llvm_unreachable("constraint has unexpected kind"); 3241 } 3242 3243 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3244 /// source and destination array references are recurrences on a nested loop, 3245 /// this function flattens the nested recurrences into separate recurrences 3246 /// for each loop level. 3247 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3248 const SCEV *DstSCEV, 3249 SmallVectorImpl<Subscript> &Pair, 3250 const SCEV *ElementSize) { 3251 const SCEVUnknown *SrcBase = 3252 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3253 const SCEVUnknown *DstBase = 3254 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3255 3256 if (!SrcBase || !DstBase || SrcBase != DstBase) 3257 return false; 3258 3259 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3260 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3261 3262 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3263 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3264 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3265 return false; 3266 3267 // First step: collect parametric terms in both array references. 3268 SmallVector<const SCEV *, 4> Terms; 3269 SE->collectParametricTerms(SrcAR, Terms); 3270 SE->collectParametricTerms(DstAR, Terms); 3271 3272 // Second step: find subscript sizes. 3273 SmallVector<const SCEV *, 4> Sizes; 3274 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3275 3276 // Third step: compute the access functions for each subscript. 3277 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3278 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3279 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3280 3281 // Fail when there is only a subscript: that's a linearized access function. 3282 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3283 SrcSubscripts.size() != DstSubscripts.size()) 3284 return false; 3285 3286 int size = SrcSubscripts.size(); 3287 3288 DEBUG({ 3289 dbgs() << "\nSrcSubscripts: "; 3290 for (int i = 0; i < size; i++) 3291 dbgs() << *SrcSubscripts[i]; 3292 dbgs() << "\nDstSubscripts: "; 3293 for (int i = 0; i < size; i++) 3294 dbgs() << *DstSubscripts[i]; 3295 }); 3296 3297 // The delinearization transforms a single-subscript MIV dependence test into 3298 // a multi-subscript SIV dependence test that is easier to compute. So we 3299 // resize Pair to contain as many pairs of subscripts as the delinearization 3300 // has found, and then initialize the pairs following the delinearization. 3301 Pair.resize(size); 3302 for (int i = 0; i < size; ++i) { 3303 Pair[i].Src = SrcSubscripts[i]; 3304 Pair[i].Dst = DstSubscripts[i]; 3305 unifySubscriptType(&Pair[i]); 3306 3307 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3308 // delinearization has found, and add these constraints to the dependence 3309 // check to avoid memory accesses overflow from one dimension into another. 3310 // This is related to the problem of determining the existence of data 3311 // dependences in array accesses using a different number of subscripts: in 3312 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3313 } 3314 3315 return true; 3316 } 3317 3318 //===----------------------------------------------------------------------===// 3319 3320 #ifndef NDEBUG 3321 // For debugging purposes, dump a small bit vector to dbgs(). 3322 static void dumpSmallBitVector(SmallBitVector &BV) { 3323 dbgs() << "{"; 3324 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3325 dbgs() << VI; 3326 if (BV.find_next(VI) >= 0) 3327 dbgs() << ' '; 3328 } 3329 dbgs() << "}\n"; 3330 } 3331 #endif 3332 3333 3334 // depends - 3335 // Returns NULL if there is no dependence. 3336 // Otherwise, return a Dependence with as many details as possible. 3337 // Corresponds to Section 3.1 in the paper 3338 // 3339 // Practical Dependence Testing 3340 // Goff, Kennedy, Tseng 3341 // PLDI 1991 3342 // 3343 // Care is required to keep the routine below, getSplitIteration(), 3344 // up to date with respect to this routine. 3345 std::unique_ptr<Dependence> 3346 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3347 bool PossiblyLoopIndependent) { 3348 if (Src == Dst) 3349 PossiblyLoopIndependent = false; 3350 3351 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3352 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3353 // if both instructions don't reference memory, there's no dependence 3354 return nullptr; 3355 3356 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3357 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3358 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3359 return make_unique<Dependence>(Src, Dst); 3360 } 3361 3362 Value *SrcPtr = getPointerOperand(Src); 3363 Value *DstPtr = getPointerOperand(Dst); 3364 3365 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3366 SrcPtr)) { 3367 case MayAlias: 3368 case PartialAlias: 3369 // cannot analyse objects if we don't understand their aliasing. 3370 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3371 return make_unique<Dependence>(Src, Dst); 3372 case NoAlias: 3373 // If the objects noalias, they are distinct, accesses are independent. 3374 DEBUG(dbgs() << "no alias\n"); 3375 return nullptr; 3376 case MustAlias: 3377 break; // The underlying objects alias; test accesses for dependence. 3378 } 3379 3380 // establish loop nesting levels 3381 establishNestingLevels(Src, Dst); 3382 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3383 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3384 3385 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3386 ++TotalArrayPairs; 3387 3388 // See if there are GEPs we can use. 3389 bool UsefulGEP = false; 3390 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3391 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3392 if (SrcGEP && DstGEP && 3393 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3394 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3395 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3396 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3397 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3398 3399 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3400 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3401 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3402 } 3403 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3404 SmallVector<Subscript, 4> Pair(Pairs); 3405 if (UsefulGEP) { 3406 DEBUG(dbgs() << " using GEPs\n"); 3407 unsigned P = 0; 3408 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3409 SrcEnd = SrcGEP->idx_end(), 3410 DstIdx = DstGEP->idx_begin(); 3411 SrcIdx != SrcEnd; 3412 ++SrcIdx, ++DstIdx, ++P) { 3413 Pair[P].Src = SE->getSCEV(*SrcIdx); 3414 Pair[P].Dst = SE->getSCEV(*DstIdx); 3415 unifySubscriptType(&Pair[P]); 3416 } 3417 } 3418 else { 3419 DEBUG(dbgs() << " ignoring GEPs\n"); 3420 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3421 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3422 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3423 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3424 Pair[0].Src = SrcSCEV; 3425 Pair[0].Dst = DstSCEV; 3426 } 3427 3428 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3429 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3430 DEBUG(dbgs() << " delinerized GEP\n"); 3431 Pairs = Pair.size(); 3432 } 3433 3434 for (unsigned P = 0; P < Pairs; ++P) { 3435 Pair[P].Loops.resize(MaxLevels + 1); 3436 Pair[P].GroupLoops.resize(MaxLevels + 1); 3437 Pair[P].Group.resize(Pairs); 3438 removeMatchingExtensions(&Pair[P]); 3439 Pair[P].Classification = 3440 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3441 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3442 Pair[P].Loops); 3443 Pair[P].GroupLoops = Pair[P].Loops; 3444 Pair[P].Group.set(P); 3445 DEBUG(dbgs() << " subscript " << P << "\n"); 3446 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3447 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3448 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3449 DEBUG(dbgs() << "\tloops = "); 3450 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3451 } 3452 3453 SmallBitVector Separable(Pairs); 3454 SmallBitVector Coupled(Pairs); 3455 3456 // Partition subscripts into separable and minimally-coupled groups 3457 // Algorithm in paper is algorithmically better; 3458 // this may be faster in practice. Check someday. 3459 // 3460 // Here's an example of how it works. Consider this code: 3461 // 3462 // for (i = ...) { 3463 // for (j = ...) { 3464 // for (k = ...) { 3465 // for (l = ...) { 3466 // for (m = ...) { 3467 // A[i][j][k][m] = ...; 3468 // ... = A[0][j][l][i + j]; 3469 // } 3470 // } 3471 // } 3472 // } 3473 // } 3474 // 3475 // There are 4 subscripts here: 3476 // 0 [i] and [0] 3477 // 1 [j] and [j] 3478 // 2 [k] and [l] 3479 // 3 [m] and [i + j] 3480 // 3481 // We've already classified each subscript pair as ZIV, SIV, etc., 3482 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3483 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3484 // and set Pair[P].Group = {P}. 3485 // 3486 // Src Dst Classification Loops GroupLoops Group 3487 // 0 [i] [0] SIV {1} {1} {0} 3488 // 1 [j] [j] SIV {2} {2} {1} 3489 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3490 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3491 // 3492 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3493 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3494 // 3495 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3496 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3497 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3498 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3499 // to either Separable or Coupled). 3500 // 3501 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3502 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3503 // so Pair[3].Group = {0, 1, 3} and Done = false. 3504 // 3505 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3506 // Since Done remains true, we add 2 to the set of Separable pairs. 3507 // 3508 // Finally, we consider 3. There's nothing to compare it with, 3509 // so Done remains true and we add it to the Coupled set. 3510 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3511 // 3512 // In the end, we've got 1 separable subscript and 1 coupled group. 3513 for (unsigned SI = 0; SI < Pairs; ++SI) { 3514 if (Pair[SI].Classification == Subscript::NonLinear) { 3515 // ignore these, but collect loops for later 3516 ++NonlinearSubscriptPairs; 3517 collectCommonLoops(Pair[SI].Src, 3518 LI->getLoopFor(Src->getParent()), 3519 Pair[SI].Loops); 3520 collectCommonLoops(Pair[SI].Dst, 3521 LI->getLoopFor(Dst->getParent()), 3522 Pair[SI].Loops); 3523 Result.Consistent = false; 3524 } else if (Pair[SI].Classification == Subscript::ZIV) { 3525 // always separable 3526 Separable.set(SI); 3527 } 3528 else { 3529 // SIV, RDIV, or MIV, so check for coupled group 3530 bool Done = true; 3531 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3532 SmallBitVector Intersection = Pair[SI].GroupLoops; 3533 Intersection &= Pair[SJ].GroupLoops; 3534 if (Intersection.any()) { 3535 // accumulate set of all the loops in group 3536 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3537 // accumulate set of all subscripts in group 3538 Pair[SJ].Group |= Pair[SI].Group; 3539 Done = false; 3540 } 3541 } 3542 if (Done) { 3543 if (Pair[SI].Group.count() == 1) { 3544 Separable.set(SI); 3545 ++SeparableSubscriptPairs; 3546 } 3547 else { 3548 Coupled.set(SI); 3549 ++CoupledSubscriptPairs; 3550 } 3551 } 3552 } 3553 } 3554 3555 DEBUG(dbgs() << " Separable = "); 3556 DEBUG(dumpSmallBitVector(Separable)); 3557 DEBUG(dbgs() << " Coupled = "); 3558 DEBUG(dumpSmallBitVector(Coupled)); 3559 3560 Constraint NewConstraint; 3561 NewConstraint.setAny(SE); 3562 3563 // test separable subscripts 3564 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3565 DEBUG(dbgs() << "testing subscript " << SI); 3566 switch (Pair[SI].Classification) { 3567 case Subscript::ZIV: 3568 DEBUG(dbgs() << ", ZIV\n"); 3569 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3570 return nullptr; 3571 break; 3572 case Subscript::SIV: { 3573 DEBUG(dbgs() << ", SIV\n"); 3574 unsigned Level; 3575 const SCEV *SplitIter = nullptr; 3576 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3577 SplitIter)) 3578 return nullptr; 3579 break; 3580 } 3581 case Subscript::RDIV: 3582 DEBUG(dbgs() << ", RDIV\n"); 3583 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3584 return nullptr; 3585 break; 3586 case Subscript::MIV: 3587 DEBUG(dbgs() << ", MIV\n"); 3588 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3589 return nullptr; 3590 break; 3591 default: 3592 llvm_unreachable("subscript has unexpected classification"); 3593 } 3594 } 3595 3596 if (Coupled.count()) { 3597 // test coupled subscript groups 3598 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3599 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3600 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3601 for (unsigned II = 0; II <= MaxLevels; ++II) 3602 Constraints[II].setAny(SE); 3603 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3604 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3605 SmallBitVector Group(Pair[SI].Group); 3606 SmallBitVector Sivs(Pairs); 3607 SmallBitVector Mivs(Pairs); 3608 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3609 SmallVector<Subscript *, 4> PairsInGroup; 3610 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3611 DEBUG(dbgs() << SJ << " "); 3612 if (Pair[SJ].Classification == Subscript::SIV) 3613 Sivs.set(SJ); 3614 else 3615 Mivs.set(SJ); 3616 PairsInGroup.push_back(&Pair[SJ]); 3617 } 3618 unifySubscriptType(PairsInGroup); 3619 DEBUG(dbgs() << "}\n"); 3620 while (Sivs.any()) { 3621 bool Changed = false; 3622 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3623 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3624 // SJ is an SIV subscript that's part of the current coupled group 3625 unsigned Level; 3626 const SCEV *SplitIter = nullptr; 3627 DEBUG(dbgs() << "SIV\n"); 3628 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3629 SplitIter)) 3630 return nullptr; 3631 ConstrainedLevels.set(Level); 3632 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3633 if (Constraints[Level].isEmpty()) { 3634 ++DeltaIndependence; 3635 return nullptr; 3636 } 3637 Changed = true; 3638 } 3639 Sivs.reset(SJ); 3640 } 3641 if (Changed) { 3642 // propagate, possibly creating new SIVs and ZIVs 3643 DEBUG(dbgs() << " propagating\n"); 3644 DEBUG(dbgs() << "\tMivs = "); 3645 DEBUG(dumpSmallBitVector(Mivs)); 3646 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3647 // SJ is an MIV subscript that's part of the current coupled group 3648 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3649 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3650 Constraints, Result.Consistent)) { 3651 DEBUG(dbgs() << "\t Changed\n"); 3652 ++DeltaPropagations; 3653 Pair[SJ].Classification = 3654 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3655 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3656 Pair[SJ].Loops); 3657 switch (Pair[SJ].Classification) { 3658 case Subscript::ZIV: 3659 DEBUG(dbgs() << "ZIV\n"); 3660 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3661 return nullptr; 3662 Mivs.reset(SJ); 3663 break; 3664 case Subscript::SIV: 3665 Sivs.set(SJ); 3666 Mivs.reset(SJ); 3667 break; 3668 case Subscript::RDIV: 3669 case Subscript::MIV: 3670 break; 3671 default: 3672 llvm_unreachable("bad subscript classification"); 3673 } 3674 } 3675 } 3676 } 3677 } 3678 3679 // test & propagate remaining RDIVs 3680 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3681 if (Pair[SJ].Classification == Subscript::RDIV) { 3682 DEBUG(dbgs() << "RDIV test\n"); 3683 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3684 return nullptr; 3685 // I don't yet understand how to propagate RDIV results 3686 Mivs.reset(SJ); 3687 } 3688 } 3689 3690 // test remaining MIVs 3691 // This code is temporary. 3692 // Better to somehow test all remaining subscripts simultaneously. 3693 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3694 if (Pair[SJ].Classification == Subscript::MIV) { 3695 DEBUG(dbgs() << "MIV test\n"); 3696 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3697 return nullptr; 3698 } 3699 else 3700 llvm_unreachable("expected only MIV subscripts at this point"); 3701 } 3702 3703 // update Result.DV from constraint vector 3704 DEBUG(dbgs() << " updating\n"); 3705 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3706 SJ = ConstrainedLevels.find_next(SJ)) { 3707 if (SJ > (int)CommonLevels) 3708 break; 3709 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3710 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3711 return nullptr; 3712 } 3713 } 3714 } 3715 3716 // Make sure the Scalar flags are set correctly. 3717 SmallBitVector CompleteLoops(MaxLevels + 1); 3718 for (unsigned SI = 0; SI < Pairs; ++SI) 3719 CompleteLoops |= Pair[SI].Loops; 3720 for (unsigned II = 1; II <= CommonLevels; ++II) 3721 if (CompleteLoops[II]) 3722 Result.DV[II - 1].Scalar = false; 3723 3724 if (PossiblyLoopIndependent) { 3725 // Make sure the LoopIndependent flag is set correctly. 3726 // All directions must include equal, otherwise no 3727 // loop-independent dependence is possible. 3728 for (unsigned II = 1; II <= CommonLevels; ++II) { 3729 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3730 Result.LoopIndependent = false; 3731 break; 3732 } 3733 } 3734 } 3735 else { 3736 // On the other hand, if all directions are equal and there's no 3737 // loop-independent dependence possible, then no dependence exists. 3738 bool AllEqual = true; 3739 for (unsigned II = 1; II <= CommonLevels; ++II) { 3740 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3741 AllEqual = false; 3742 break; 3743 } 3744 } 3745 if (AllEqual) 3746 return nullptr; 3747 } 3748 3749 auto Final = make_unique<FullDependence>(Result); 3750 Result.DV = nullptr; 3751 return std::move(Final); 3752 } 3753 3754 3755 3756 //===----------------------------------------------------------------------===// 3757 // getSplitIteration - 3758 // Rather than spend rarely-used space recording the splitting iteration 3759 // during the Weak-Crossing SIV test, we re-compute it on demand. 3760 // The re-computation is basically a repeat of the entire dependence test, 3761 // though simplified since we know that the dependence exists. 3762 // It's tedious, since we must go through all propagations, etc. 3763 // 3764 // Care is required to keep this code up to date with respect to the routine 3765 // above, depends(). 3766 // 3767 // Generally, the dependence analyzer will be used to build 3768 // a dependence graph for a function (basically a map from instructions 3769 // to dependences). Looking for cycles in the graph shows us loops 3770 // that cannot be trivially vectorized/parallelized. 3771 // 3772 // We can try to improve the situation by examining all the dependences 3773 // that make up the cycle, looking for ones we can break. 3774 // Sometimes, peeling the first or last iteration of a loop will break 3775 // dependences, and we've got flags for those possibilities. 3776 // Sometimes, splitting a loop at some other iteration will do the trick, 3777 // and we've got a flag for that case. Rather than waste the space to 3778 // record the exact iteration (since we rarely know), we provide 3779 // a method that calculates the iteration. It's a drag that it must work 3780 // from scratch, but wonderful in that it's possible. 3781 // 3782 // Here's an example: 3783 // 3784 // for (i = 0; i < 10; i++) 3785 // A[i] = ... 3786 // ... = A[11 - i] 3787 // 3788 // There's a loop-carried flow dependence from the store to the load, 3789 // found by the weak-crossing SIV test. The dependence will have a flag, 3790 // indicating that the dependence can be broken by splitting the loop. 3791 // Calling getSplitIteration will return 5. 3792 // Splitting the loop breaks the dependence, like so: 3793 // 3794 // for (i = 0; i <= 5; i++) 3795 // A[i] = ... 3796 // ... = A[11 - i] 3797 // for (i = 6; i < 10; i++) 3798 // A[i] = ... 3799 // ... = A[11 - i] 3800 // 3801 // breaks the dependence and allows us to vectorize/parallelize 3802 // both loops. 3803 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3804 unsigned SplitLevel) { 3805 assert(Dep.isSplitable(SplitLevel) && 3806 "Dep should be splitable at SplitLevel"); 3807 Instruction *Src = Dep.getSrc(); 3808 Instruction *Dst = Dep.getDst(); 3809 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3810 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3811 assert(isLoadOrStore(Src)); 3812 assert(isLoadOrStore(Dst)); 3813 Value *SrcPtr = getPointerOperand(Src); 3814 Value *DstPtr = getPointerOperand(Dst); 3815 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3816 SrcPtr) == MustAlias); 3817 3818 // establish loop nesting levels 3819 establishNestingLevels(Src, Dst); 3820 3821 FullDependence Result(Src, Dst, false, CommonLevels); 3822 3823 // See if there are GEPs we can use. 3824 bool UsefulGEP = false; 3825 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3826 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3827 if (SrcGEP && DstGEP && 3828 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3829 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3830 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3831 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3832 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3833 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3834 } 3835 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3836 SmallVector<Subscript, 4> Pair(Pairs); 3837 if (UsefulGEP) { 3838 unsigned P = 0; 3839 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3840 SrcEnd = SrcGEP->idx_end(), 3841 DstIdx = DstGEP->idx_begin(); 3842 SrcIdx != SrcEnd; 3843 ++SrcIdx, ++DstIdx, ++P) { 3844 Pair[P].Src = SE->getSCEV(*SrcIdx); 3845 Pair[P].Dst = SE->getSCEV(*DstIdx); 3846 } 3847 } 3848 else { 3849 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3850 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3851 Pair[0].Src = SrcSCEV; 3852 Pair[0].Dst = DstSCEV; 3853 } 3854 3855 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3856 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3857 DEBUG(dbgs() << " delinerized GEP\n"); 3858 Pairs = Pair.size(); 3859 } 3860 3861 for (unsigned P = 0; P < Pairs; ++P) { 3862 Pair[P].Loops.resize(MaxLevels + 1); 3863 Pair[P].GroupLoops.resize(MaxLevels + 1); 3864 Pair[P].Group.resize(Pairs); 3865 removeMatchingExtensions(&Pair[P]); 3866 Pair[P].Classification = 3867 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3868 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3869 Pair[P].Loops); 3870 Pair[P].GroupLoops = Pair[P].Loops; 3871 Pair[P].Group.set(P); 3872 } 3873 3874 SmallBitVector Separable(Pairs); 3875 SmallBitVector Coupled(Pairs); 3876 3877 // partition subscripts into separable and minimally-coupled groups 3878 for (unsigned SI = 0; SI < Pairs; ++SI) { 3879 if (Pair[SI].Classification == Subscript::NonLinear) { 3880 // ignore these, but collect loops for later 3881 collectCommonLoops(Pair[SI].Src, 3882 LI->getLoopFor(Src->getParent()), 3883 Pair[SI].Loops); 3884 collectCommonLoops(Pair[SI].Dst, 3885 LI->getLoopFor(Dst->getParent()), 3886 Pair[SI].Loops); 3887 Result.Consistent = false; 3888 } 3889 else if (Pair[SI].Classification == Subscript::ZIV) 3890 Separable.set(SI); 3891 else { 3892 // SIV, RDIV, or MIV, so check for coupled group 3893 bool Done = true; 3894 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3895 SmallBitVector Intersection = Pair[SI].GroupLoops; 3896 Intersection &= Pair[SJ].GroupLoops; 3897 if (Intersection.any()) { 3898 // accumulate set of all the loops in group 3899 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3900 // accumulate set of all subscripts in group 3901 Pair[SJ].Group |= Pair[SI].Group; 3902 Done = false; 3903 } 3904 } 3905 if (Done) { 3906 if (Pair[SI].Group.count() == 1) 3907 Separable.set(SI); 3908 else 3909 Coupled.set(SI); 3910 } 3911 } 3912 } 3913 3914 Constraint NewConstraint; 3915 NewConstraint.setAny(SE); 3916 3917 // test separable subscripts 3918 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3919 switch (Pair[SI].Classification) { 3920 case Subscript::SIV: { 3921 unsigned Level; 3922 const SCEV *SplitIter = nullptr; 3923 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3924 Result, NewConstraint, SplitIter); 3925 if (Level == SplitLevel) { 3926 assert(SplitIter != nullptr); 3927 return SplitIter; 3928 } 3929 break; 3930 } 3931 case Subscript::ZIV: 3932 case Subscript::RDIV: 3933 case Subscript::MIV: 3934 break; 3935 default: 3936 llvm_unreachable("subscript has unexpected classification"); 3937 } 3938 } 3939 3940 if (Coupled.count()) { 3941 // test coupled subscript groups 3942 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3943 for (unsigned II = 0; II <= MaxLevels; ++II) 3944 Constraints[II].setAny(SE); 3945 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3946 SmallBitVector Group(Pair[SI].Group); 3947 SmallBitVector Sivs(Pairs); 3948 SmallBitVector Mivs(Pairs); 3949 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3950 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3951 if (Pair[SJ].Classification == Subscript::SIV) 3952 Sivs.set(SJ); 3953 else 3954 Mivs.set(SJ); 3955 } 3956 while (Sivs.any()) { 3957 bool Changed = false; 3958 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3959 // SJ is an SIV subscript that's part of the current coupled group 3960 unsigned Level; 3961 const SCEV *SplitIter = nullptr; 3962 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3963 Result, NewConstraint, SplitIter); 3964 if (Level == SplitLevel && SplitIter) 3965 return SplitIter; 3966 ConstrainedLevels.set(Level); 3967 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3968 Changed = true; 3969 Sivs.reset(SJ); 3970 } 3971 if (Changed) { 3972 // propagate, possibly creating new SIVs and ZIVs 3973 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3974 // SJ is an MIV subscript that's part of the current coupled group 3975 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3976 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3977 Pair[SJ].Classification = 3978 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3979 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3980 Pair[SJ].Loops); 3981 switch (Pair[SJ].Classification) { 3982 case Subscript::ZIV: 3983 Mivs.reset(SJ); 3984 break; 3985 case Subscript::SIV: 3986 Sivs.set(SJ); 3987 Mivs.reset(SJ); 3988 break; 3989 case Subscript::RDIV: 3990 case Subscript::MIV: 3991 break; 3992 default: 3993 llvm_unreachable("bad subscript classification"); 3994 } 3995 } 3996 } 3997 } 3998 } 3999 } 4000 } 4001 llvm_unreachable("somehow reached end of routine"); 4002 return nullptr; 4003 } 4004