1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 113 //===----------------------------------------------------------------------===// 114 // basics 115 116 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 117 "Dependence Analysis", true, true) 118 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 119 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 120 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 121 INITIALIZE_PASS_END(DependenceAnalysis, "da", 122 "Dependence Analysis", true, true) 123 124 char DependenceAnalysis::ID = 0; 125 126 127 FunctionPass *llvm::createDependenceAnalysisPass() { 128 return new DependenceAnalysis(); 129 } 130 131 132 bool DependenceAnalysis::runOnFunction(Function &F) { 133 this->F = &F; 134 AA = &getAnalysis<AliasAnalysis>(); 135 SE = &getAnalysis<ScalarEvolution>(); 136 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 137 return false; 138 } 139 140 141 void DependenceAnalysis::releaseMemory() { 142 } 143 144 145 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 146 AU.setPreservesAll(); 147 AU.addRequiredTransitive<AliasAnalysis>(); 148 AU.addRequiredTransitive<ScalarEvolution>(); 149 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 150 } 151 152 153 // Used to test the dependence analyzer. 154 // Looks through the function, noting loads and stores. 155 // Calls depends() on every possible pair and prints out the result. 156 // Ignores all other instructions. 157 static 158 void dumpExampleDependence(raw_ostream &OS, Function *F, 159 DependenceAnalysis *DA) { 160 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 161 SrcI != SrcE; ++SrcI) { 162 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 163 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 164 DstI != DstE; ++DstI) { 165 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 166 OS << "da analyze - "; 167 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 168 D->dump(OS); 169 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 170 if (D->isSplitable(Level)) { 171 OS << "da analyze - split level = " << Level; 172 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 173 OS << "!\n"; 174 } 175 } 176 } 177 else 178 OS << "none!\n"; 179 } 180 } 181 } 182 } 183 } 184 185 186 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 187 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 188 } 189 190 //===----------------------------------------------------------------------===// 191 // Dependence methods 192 193 // Returns true if this is an input dependence. 194 bool Dependence::isInput() const { 195 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 196 } 197 198 199 // Returns true if this is an output dependence. 200 bool Dependence::isOutput() const { 201 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 202 } 203 204 205 // Returns true if this is an flow (aka true) dependence. 206 bool Dependence::isFlow() const { 207 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 208 } 209 210 211 // Returns true if this is an anti dependence. 212 bool Dependence::isAnti() const { 213 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 214 } 215 216 217 // Returns true if a particular level is scalar; that is, 218 // if no subscript in the source or destination mention the induction 219 // variable associated with the loop at this level. 220 // Leave this out of line, so it will serve as a virtual method anchor 221 bool Dependence::isScalar(unsigned level) const { 222 return false; 223 } 224 225 226 //===----------------------------------------------------------------------===// 227 // FullDependence methods 228 229 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 230 bool PossiblyLoopIndependent, 231 unsigned CommonLevels) 232 : Dependence(Source, Destination), Levels(CommonLevels), 233 LoopIndependent(PossiblyLoopIndependent) { 234 Consistent = true; 235 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 236 } 237 238 // The rest are simple getters that hide the implementation. 239 240 // getDirection - Returns the direction associated with a particular level. 241 unsigned FullDependence::getDirection(unsigned Level) const { 242 assert(0 < Level && Level <= Levels && "Level out of range"); 243 return DV[Level - 1].Direction; 244 } 245 246 247 // Returns the distance (or NULL) associated with a particular level. 248 const SCEV *FullDependence::getDistance(unsigned Level) const { 249 assert(0 < Level && Level <= Levels && "Level out of range"); 250 return DV[Level - 1].Distance; 251 } 252 253 254 // Returns true if a particular level is scalar; that is, 255 // if no subscript in the source or destination mention the induction 256 // variable associated with the loop at this level. 257 bool FullDependence::isScalar(unsigned Level) const { 258 assert(0 < Level && Level <= Levels && "Level out of range"); 259 return DV[Level - 1].Scalar; 260 } 261 262 263 // Returns true if peeling the first iteration from this loop 264 // will break this dependence. 265 bool FullDependence::isPeelFirst(unsigned Level) const { 266 assert(0 < Level && Level <= Levels && "Level out of range"); 267 return DV[Level - 1].PeelFirst; 268 } 269 270 271 // Returns true if peeling the last iteration from this loop 272 // will break this dependence. 273 bool FullDependence::isPeelLast(unsigned Level) const { 274 assert(0 < Level && Level <= Levels && "Level out of range"); 275 return DV[Level - 1].PeelLast; 276 } 277 278 279 // Returns true if splitting this loop will break the dependence. 280 bool FullDependence::isSplitable(unsigned Level) const { 281 assert(0 < Level && Level <= Levels && "Level out of range"); 282 return DV[Level - 1].Splitable; 283 } 284 285 286 //===----------------------------------------------------------------------===// 287 // DependenceAnalysis::Constraint methods 288 289 // If constraint is a point <X, Y>, returns X. 290 // Otherwise assert. 291 const SCEV *DependenceAnalysis::Constraint::getX() const { 292 assert(Kind == Point && "Kind should be Point"); 293 return A; 294 } 295 296 297 // If constraint is a point <X, Y>, returns Y. 298 // Otherwise assert. 299 const SCEV *DependenceAnalysis::Constraint::getY() const { 300 assert(Kind == Point && "Kind should be Point"); 301 return B; 302 } 303 304 305 // If constraint is a line AX + BY = C, returns A. 306 // Otherwise assert. 307 const SCEV *DependenceAnalysis::Constraint::getA() const { 308 assert((Kind == Line || Kind == Distance) && 309 "Kind should be Line (or Distance)"); 310 return A; 311 } 312 313 314 // If constraint is a line AX + BY = C, returns B. 315 // Otherwise assert. 316 const SCEV *DependenceAnalysis::Constraint::getB() const { 317 assert((Kind == Line || Kind == Distance) && 318 "Kind should be Line (or Distance)"); 319 return B; 320 } 321 322 323 // If constraint is a line AX + BY = C, returns C. 324 // Otherwise assert. 325 const SCEV *DependenceAnalysis::Constraint::getC() const { 326 assert((Kind == Line || Kind == Distance) && 327 "Kind should be Line (or Distance)"); 328 return C; 329 } 330 331 332 // If constraint is a distance, returns D. 333 // Otherwise assert. 334 const SCEV *DependenceAnalysis::Constraint::getD() const { 335 assert(Kind == Distance && "Kind should be Distance"); 336 return SE->getNegativeSCEV(C); 337 } 338 339 340 // Returns the loop associated with this constraint. 341 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 342 assert((Kind == Distance || Kind == Line || Kind == Point) && 343 "Kind should be Distance, Line, or Point"); 344 return AssociatedLoop; 345 } 346 347 348 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 349 const SCEV *Y, 350 const Loop *CurLoop) { 351 Kind = Point; 352 A = X; 353 B = Y; 354 AssociatedLoop = CurLoop; 355 } 356 357 358 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 359 const SCEV *BB, 360 const SCEV *CC, 361 const Loop *CurLoop) { 362 Kind = Line; 363 A = AA; 364 B = BB; 365 C = CC; 366 AssociatedLoop = CurLoop; 367 } 368 369 370 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 371 const Loop *CurLoop) { 372 Kind = Distance; 373 A = SE->getConstant(D->getType(), 1); 374 B = SE->getNegativeSCEV(A); 375 C = SE->getNegativeSCEV(D); 376 AssociatedLoop = CurLoop; 377 } 378 379 380 void DependenceAnalysis::Constraint::setEmpty() { 381 Kind = Empty; 382 } 383 384 385 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 386 SE = NewSE; 387 Kind = Any; 388 } 389 390 391 // For debugging purposes. Dumps the constraint out to OS. 392 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 393 if (isEmpty()) 394 OS << " Empty\n"; 395 else if (isAny()) 396 OS << " Any\n"; 397 else if (isPoint()) 398 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 399 else if (isDistance()) 400 OS << " Distance is " << *getD() << 401 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 402 else if (isLine()) 403 OS << " Line is " << *getA() << "*X + " << 404 *getB() << "*Y = " << *getC() << "\n"; 405 else 406 llvm_unreachable("unknown constraint type in Constraint::dump"); 407 } 408 409 410 // Updates X with the intersection 411 // of the Constraints X and Y. Returns true if X has changed. 412 // Corresponds to Figure 4 from the paper 413 // 414 // Practical Dependence Testing 415 // Goff, Kennedy, Tseng 416 // PLDI 1991 417 bool DependenceAnalysis::intersectConstraints(Constraint *X, 418 const Constraint *Y) { 419 ++DeltaApplications; 420 DEBUG(dbgs() << "\tintersect constraints\n"); 421 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 422 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 423 assert(!Y->isPoint() && "Y must not be a Point"); 424 if (X->isAny()) { 425 if (Y->isAny()) 426 return false; 427 *X = *Y; 428 return true; 429 } 430 if (X->isEmpty()) 431 return false; 432 if (Y->isEmpty()) { 433 X->setEmpty(); 434 return true; 435 } 436 437 if (X->isDistance() && Y->isDistance()) { 438 DEBUG(dbgs() << "\t intersect 2 distances\n"); 439 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 440 return false; 441 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 442 X->setEmpty(); 443 ++DeltaSuccesses; 444 return true; 445 } 446 // Hmmm, interesting situation. 447 // I guess if either is constant, keep it and ignore the other. 448 if (isa<SCEVConstant>(Y->getD())) { 449 *X = *Y; 450 return true; 451 } 452 return false; 453 } 454 455 // At this point, the pseudo-code in Figure 4 of the paper 456 // checks if (X->isPoint() && Y->isPoint()). 457 // This case can't occur in our implementation, 458 // since a Point can only arise as the result of intersecting 459 // two Line constraints, and the right-hand value, Y, is never 460 // the result of an intersection. 461 assert(!(X->isPoint() && Y->isPoint()) && 462 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 463 464 if (X->isLine() && Y->isLine()) { 465 DEBUG(dbgs() << "\t intersect 2 lines\n"); 466 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 467 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 468 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 469 // slopes are equal, so lines are parallel 470 DEBUG(dbgs() << "\t\tsame slope\n"); 471 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 472 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 473 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 474 return false; 475 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 476 X->setEmpty(); 477 ++DeltaSuccesses; 478 return true; 479 } 480 return false; 481 } 482 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 483 // slopes differ, so lines intersect 484 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 485 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 486 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 487 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 488 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 489 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 490 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 491 const SCEVConstant *C1A2_C2A1 = 492 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 493 const SCEVConstant *C1B2_C2B1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 495 const SCEVConstant *A1B2_A2B1 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 497 const SCEVConstant *A2B1_A1B2 = 498 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 499 if (!C1B2_C2B1 || !C1A2_C2A1 || 500 !A1B2_A2B1 || !A2B1_A1B2) 501 return false; 502 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 503 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 504 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 505 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 506 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 507 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 508 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 509 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 510 APInt Xq = Xtop; // these need to be initialized, even 511 APInt Xr = Xtop; // though they're just going to be overwritten 512 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 513 APInt Yq = Ytop; 514 APInt Yr = Ytop; 515 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 516 if (Xr != 0 || Yr != 0) { 517 X->setEmpty(); 518 ++DeltaSuccesses; 519 return true; 520 } 521 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 522 if (Xq.slt(0) || Yq.slt(0)) { 523 X->setEmpty(); 524 ++DeltaSuccesses; 525 return true; 526 } 527 if (const SCEVConstant *CUB = 528 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 529 APInt UpperBound = CUB->getValue()->getValue(); 530 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 531 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 532 X->setEmpty(); 533 ++DeltaSuccesses; 534 return true; 535 } 536 } 537 X->setPoint(SE->getConstant(Xq), 538 SE->getConstant(Yq), 539 X->getAssociatedLoop()); 540 ++DeltaSuccesses; 541 return true; 542 } 543 return false; 544 } 545 546 // if (X->isLine() && Y->isPoint()) This case can't occur. 547 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 548 549 if (X->isPoint() && Y->isLine()) { 550 DEBUG(dbgs() << "\t intersect Point and Line\n"); 551 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 552 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 553 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 554 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 555 return false; 556 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 557 X->setEmpty(); 558 ++DeltaSuccesses; 559 return true; 560 } 561 return false; 562 } 563 564 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 565 return false; 566 } 567 568 569 //===----------------------------------------------------------------------===// 570 // DependenceAnalysis methods 571 572 // For debugging purposes. Dumps a dependence to OS. 573 void Dependence::dump(raw_ostream &OS) const { 574 bool Splitable = false; 575 if (isConfused()) 576 OS << "confused"; 577 else { 578 if (isConsistent()) 579 OS << "consistent "; 580 if (isFlow()) 581 OS << "flow"; 582 else if (isOutput()) 583 OS << "output"; 584 else if (isAnti()) 585 OS << "anti"; 586 else if (isInput()) 587 OS << "input"; 588 unsigned Levels = getLevels(); 589 OS << " ["; 590 for (unsigned II = 1; II <= Levels; ++II) { 591 if (isSplitable(II)) 592 Splitable = true; 593 if (isPeelFirst(II)) 594 OS << 'p'; 595 const SCEV *Distance = getDistance(II); 596 if (Distance) 597 OS << *Distance; 598 else if (isScalar(II)) 599 OS << "S"; 600 else { 601 unsigned Direction = getDirection(II); 602 if (Direction == DVEntry::ALL) 603 OS << "*"; 604 else { 605 if (Direction & DVEntry::LT) 606 OS << "<"; 607 if (Direction & DVEntry::EQ) 608 OS << "="; 609 if (Direction & DVEntry::GT) 610 OS << ">"; 611 } 612 } 613 if (isPeelLast(II)) 614 OS << 'p'; 615 if (II < Levels) 616 OS << " "; 617 } 618 if (isLoopIndependent()) 619 OS << "|<"; 620 OS << "]"; 621 if (Splitable) 622 OS << " splitable"; 623 } 624 OS << "!\n"; 625 } 626 627 628 629 static 630 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 631 const Value *A, 632 const Value *B) { 633 const Value *AObj = GetUnderlyingObject(A); 634 const Value *BObj = GetUnderlyingObject(B); 635 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 636 BObj, AA->getTypeStoreSize(BObj->getType())); 637 } 638 639 640 // Returns true if the load or store can be analyzed. Atomic and volatile 641 // operations have properties which this analysis does not understand. 642 static 643 bool isLoadOrStore(const Instruction *I) { 644 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 645 return LI->isUnordered(); 646 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 647 return SI->isUnordered(); 648 return false; 649 } 650 651 652 static 653 Value *getPointerOperand(Instruction *I) { 654 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 655 return LI->getPointerOperand(); 656 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 657 return SI->getPointerOperand(); 658 llvm_unreachable("Value is not load or store instruction"); 659 return nullptr; 660 } 661 662 663 // Examines the loop nesting of the Src and Dst 664 // instructions and establishes their shared loops. Sets the variables 665 // CommonLevels, SrcLevels, and MaxLevels. 666 // The source and destination instructions needn't be contained in the same 667 // loop. The routine establishNestingLevels finds the level of most deeply 668 // nested loop that contains them both, CommonLevels. An instruction that's 669 // not contained in a loop is at level = 0. MaxLevels is equal to the level 670 // of the source plus the level of the destination, minus CommonLevels. 671 // This lets us allocate vectors MaxLevels in length, with room for every 672 // distinct loop referenced in both the source and destination subscripts. 673 // The variable SrcLevels is the nesting depth of the source instruction. 674 // It's used to help calculate distinct loops referenced by the destination. 675 // Here's the map from loops to levels: 676 // 0 - unused 677 // 1 - outermost common loop 678 // ... - other common loops 679 // CommonLevels - innermost common loop 680 // ... - loops containing Src but not Dst 681 // SrcLevels - innermost loop containing Src but not Dst 682 // ... - loops containing Dst but not Src 683 // MaxLevels - innermost loops containing Dst but not Src 684 // Consider the follow code fragment: 685 // for (a = ...) { 686 // for (b = ...) { 687 // for (c = ...) { 688 // for (d = ...) { 689 // A[] = ...; 690 // } 691 // } 692 // for (e = ...) { 693 // for (f = ...) { 694 // for (g = ...) { 695 // ... = A[]; 696 // } 697 // } 698 // } 699 // } 700 // } 701 // If we're looking at the possibility of a dependence between the store 702 // to A (the Src) and the load from A (the Dst), we'll note that they 703 // have 2 loops in common, so CommonLevels will equal 2 and the direction 704 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 705 // A map from loop names to loop numbers would look like 706 // a - 1 707 // b - 2 = CommonLevels 708 // c - 3 709 // d - 4 = SrcLevels 710 // e - 5 711 // f - 6 712 // g - 7 = MaxLevels 713 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 714 const Instruction *Dst) { 715 const BasicBlock *SrcBlock = Src->getParent(); 716 const BasicBlock *DstBlock = Dst->getParent(); 717 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 718 unsigned DstLevel = LI->getLoopDepth(DstBlock); 719 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 720 const Loop *DstLoop = LI->getLoopFor(DstBlock); 721 SrcLevels = SrcLevel; 722 MaxLevels = SrcLevel + DstLevel; 723 while (SrcLevel > DstLevel) { 724 SrcLoop = SrcLoop->getParentLoop(); 725 SrcLevel--; 726 } 727 while (DstLevel > SrcLevel) { 728 DstLoop = DstLoop->getParentLoop(); 729 DstLevel--; 730 } 731 while (SrcLoop != DstLoop) { 732 SrcLoop = SrcLoop->getParentLoop(); 733 DstLoop = DstLoop->getParentLoop(); 734 SrcLevel--; 735 } 736 CommonLevels = SrcLevel; 737 MaxLevels -= CommonLevels; 738 } 739 740 741 // Given one of the loops containing the source, return 742 // its level index in our numbering scheme. 743 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 744 return SrcLoop->getLoopDepth(); 745 } 746 747 748 // Given one of the loops containing the destination, 749 // return its level index in our numbering scheme. 750 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 751 unsigned D = DstLoop->getLoopDepth(); 752 if (D > CommonLevels) 753 return D - CommonLevels + SrcLevels; 754 else 755 return D; 756 } 757 758 759 // Returns true if Expression is loop invariant in LoopNest. 760 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 761 const Loop *LoopNest) const { 762 if (!LoopNest) 763 return true; 764 return SE->isLoopInvariant(Expression, LoopNest) && 765 isLoopInvariant(Expression, LoopNest->getParentLoop()); 766 } 767 768 769 770 // Finds the set of loops from the LoopNest that 771 // have a level <= CommonLevels and are referred to by the SCEV Expression. 772 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 773 const Loop *LoopNest, 774 SmallBitVector &Loops) const { 775 while (LoopNest) { 776 unsigned Level = LoopNest->getLoopDepth(); 777 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 778 Loops.set(Level); 779 LoopNest = LoopNest->getParentLoop(); 780 } 781 } 782 783 void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { 784 const SCEV *Src = Pair->Src; 785 const SCEV *Dst = Pair->Dst; 786 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 787 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 788 if (SrcTy == nullptr || DstTy == nullptr) { 789 assert(SrcTy == DstTy && "This function only unify integer types and " 790 "expect Src and Dst share the same type " 791 "otherwise."); 792 return; 793 } 794 if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { 795 // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). 796 Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); 797 } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { 798 // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). 799 Pair->Src = SE->getSignExtendExpr(Src, DstTy); 800 } 801 } 802 803 // removeMatchingExtensions - Examines a subscript pair. 804 // If the source and destination are identically sign (or zero) 805 // extended, it strips off the extension in an effect to simplify 806 // the actual analysis. 807 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 808 const SCEV *Src = Pair->Src; 809 const SCEV *Dst = Pair->Dst; 810 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 811 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 812 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 813 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 814 const SCEV *SrcCastOp = SrcCast->getOperand(); 815 const SCEV *DstCastOp = DstCast->getOperand(); 816 if (SrcCastOp->getType() == DstCastOp->getType()) { 817 Pair->Src = SrcCastOp; 818 Pair->Dst = DstCastOp; 819 } 820 } 821 } 822 823 824 // Examine the scev and return true iff it's linear. 825 // Collect any loops mentioned in the set of "Loops". 826 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 827 const Loop *LoopNest, 828 SmallBitVector &Loops) { 829 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 830 if (!AddRec) 831 return isLoopInvariant(Src, LoopNest); 832 const SCEV *Start = AddRec->getStart(); 833 const SCEV *Step = AddRec->getStepRecurrence(*SE); 834 if (!isLoopInvariant(Step, LoopNest)) 835 return false; 836 Loops.set(mapSrcLoop(AddRec->getLoop())); 837 return checkSrcSubscript(Start, LoopNest, Loops); 838 } 839 840 841 842 // Examine the scev and return true iff it's linear. 843 // Collect any loops mentioned in the set of "Loops". 844 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 845 const Loop *LoopNest, 846 SmallBitVector &Loops) { 847 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 848 if (!AddRec) 849 return isLoopInvariant(Dst, LoopNest); 850 const SCEV *Start = AddRec->getStart(); 851 const SCEV *Step = AddRec->getStepRecurrence(*SE); 852 if (!isLoopInvariant(Step, LoopNest)) 853 return false; 854 Loops.set(mapDstLoop(AddRec->getLoop())); 855 return checkDstSubscript(Start, LoopNest, Loops); 856 } 857 858 859 // Examines the subscript pair (the Src and Dst SCEVs) 860 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 861 // Collects the associated loops in a set. 862 DependenceAnalysis::Subscript::ClassificationKind 863 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 864 const SCEV *Dst, const Loop *DstLoopNest, 865 SmallBitVector &Loops) { 866 SmallBitVector SrcLoops(MaxLevels + 1); 867 SmallBitVector DstLoops(MaxLevels + 1); 868 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 869 return Subscript::NonLinear; 870 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 871 return Subscript::NonLinear; 872 Loops = SrcLoops; 873 Loops |= DstLoops; 874 unsigned N = Loops.count(); 875 if (N == 0) 876 return Subscript::ZIV; 877 if (N == 1) 878 return Subscript::SIV; 879 if (N == 2 && (SrcLoops.count() == 0 || 880 DstLoops.count() == 0 || 881 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 882 return Subscript::RDIV; 883 return Subscript::MIV; 884 } 885 886 887 // A wrapper around SCEV::isKnownPredicate. 888 // Looks for cases where we're interested in comparing for equality. 889 // If both X and Y have been identically sign or zero extended, 890 // it strips off the (confusing) extensions before invoking 891 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 892 // will be similarly updated. 893 // 894 // If SCEV::isKnownPredicate can't prove the predicate, 895 // we try simple subtraction, which seems to help in some cases 896 // involving symbolics. 897 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 898 const SCEV *X, 899 const SCEV *Y) const { 900 if (Pred == CmpInst::ICMP_EQ || 901 Pred == CmpInst::ICMP_NE) { 902 if ((isa<SCEVSignExtendExpr>(X) && 903 isa<SCEVSignExtendExpr>(Y)) || 904 (isa<SCEVZeroExtendExpr>(X) && 905 isa<SCEVZeroExtendExpr>(Y))) { 906 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 907 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 908 const SCEV *Xop = CX->getOperand(); 909 const SCEV *Yop = CY->getOperand(); 910 if (Xop->getType() == Yop->getType()) { 911 X = Xop; 912 Y = Yop; 913 } 914 } 915 } 916 if (SE->isKnownPredicate(Pred, X, Y)) 917 return true; 918 // If SE->isKnownPredicate can't prove the condition, 919 // we try the brute-force approach of subtracting 920 // and testing the difference. 921 // By testing with SE->isKnownPredicate first, we avoid 922 // the possibility of overflow when the arguments are constants. 923 const SCEV *Delta = SE->getMinusSCEV(X, Y); 924 switch (Pred) { 925 case CmpInst::ICMP_EQ: 926 return Delta->isZero(); 927 case CmpInst::ICMP_NE: 928 return SE->isKnownNonZero(Delta); 929 case CmpInst::ICMP_SGE: 930 return SE->isKnownNonNegative(Delta); 931 case CmpInst::ICMP_SLE: 932 return SE->isKnownNonPositive(Delta); 933 case CmpInst::ICMP_SGT: 934 return SE->isKnownPositive(Delta); 935 case CmpInst::ICMP_SLT: 936 return SE->isKnownNegative(Delta); 937 default: 938 llvm_unreachable("unexpected predicate in isKnownPredicate"); 939 } 940 } 941 942 943 // All subscripts are all the same type. 944 // Loop bound may be smaller (e.g., a char). 945 // Should zero extend loop bound, since it's always >= 0. 946 // This routine collects upper bound and extends if needed. 947 // Return null if no bound available. 948 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 949 Type *T) const { 950 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 951 const SCEV *UB = SE->getBackedgeTakenCount(L); 952 return SE->getNoopOrZeroExtend(UB, T); 953 } 954 return nullptr; 955 } 956 957 958 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 959 // If the cast fails, returns NULL. 960 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 961 Type *T 962 ) const { 963 if (const SCEV *UB = collectUpperBound(L, T)) 964 return dyn_cast<SCEVConstant>(UB); 965 return nullptr; 966 } 967 968 969 // testZIV - 970 // When we have a pair of subscripts of the form [c1] and [c2], 971 // where c1 and c2 are both loop invariant, we attack it using 972 // the ZIV test. Basically, we test by comparing the two values, 973 // but there are actually three possible results: 974 // 1) the values are equal, so there's a dependence 975 // 2) the values are different, so there's no dependence 976 // 3) the values might be equal, so we have to assume a dependence. 977 // 978 // Return true if dependence disproved. 979 bool DependenceAnalysis::testZIV(const SCEV *Src, 980 const SCEV *Dst, 981 FullDependence &Result) const { 982 DEBUG(dbgs() << " src = " << *Src << "\n"); 983 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 984 ++ZIVapplications; 985 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 986 DEBUG(dbgs() << " provably dependent\n"); 987 return false; // provably dependent 988 } 989 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 990 DEBUG(dbgs() << " provably independent\n"); 991 ++ZIVindependence; 992 return true; // provably independent 993 } 994 DEBUG(dbgs() << " possibly dependent\n"); 995 Result.Consistent = false; 996 return false; // possibly dependent 997 } 998 999 1000 // strongSIVtest - 1001 // From the paper, Practical Dependence Testing, Section 4.2.1 1002 // 1003 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1004 // where i is an induction variable, c1 and c2 are loop invariant, 1005 // and a is a constant, we can solve it exactly using the Strong SIV test. 1006 // 1007 // Can prove independence. Failing that, can compute distance (and direction). 1008 // In the presence of symbolic terms, we can sometimes make progress. 1009 // 1010 // If there's a dependence, 1011 // 1012 // c1 + a*i = c2 + a*i' 1013 // 1014 // The dependence distance is 1015 // 1016 // d = i' - i = (c1 - c2)/a 1017 // 1018 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1019 // loop's upper bound. If a dependence exists, the dependence direction is 1020 // defined as 1021 // 1022 // { < if d > 0 1023 // direction = { = if d = 0 1024 // { > if d < 0 1025 // 1026 // Return true if dependence disproved. 1027 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1028 const SCEV *SrcConst, 1029 const SCEV *DstConst, 1030 const Loop *CurLoop, 1031 unsigned Level, 1032 FullDependence &Result, 1033 Constraint &NewConstraint) const { 1034 DEBUG(dbgs() << "\tStrong SIV test\n"); 1035 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1036 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1037 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1038 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1039 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1040 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1041 ++StrongSIVapplications; 1042 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1043 Level--; 1044 1045 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1046 DEBUG(dbgs() << "\t Delta = " << *Delta); 1047 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1048 1049 // check that |Delta| < iteration count 1050 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1051 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1052 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1053 const SCEV *AbsDelta = 1054 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1055 const SCEV *AbsCoeff = 1056 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1057 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1058 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1059 // Distance greater than trip count - no dependence 1060 ++StrongSIVindependence; 1061 ++StrongSIVsuccesses; 1062 return true; 1063 } 1064 } 1065 1066 // Can we compute distance? 1067 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1068 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1069 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1070 APInt Distance = ConstDelta; // these need to be initialized 1071 APInt Remainder = ConstDelta; 1072 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1073 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1074 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1075 // Make sure Coeff divides Delta exactly 1076 if (Remainder != 0) { 1077 // Coeff doesn't divide Distance, no dependence 1078 ++StrongSIVindependence; 1079 ++StrongSIVsuccesses; 1080 return true; 1081 } 1082 Result.DV[Level].Distance = SE->getConstant(Distance); 1083 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1084 if (Distance.sgt(0)) 1085 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1086 else if (Distance.slt(0)) 1087 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1088 else 1089 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1090 ++StrongSIVsuccesses; 1091 } 1092 else if (Delta->isZero()) { 1093 // since 0/X == 0 1094 Result.DV[Level].Distance = Delta; 1095 NewConstraint.setDistance(Delta, CurLoop); 1096 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1097 ++StrongSIVsuccesses; 1098 } 1099 else { 1100 if (Coeff->isOne()) { 1101 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1102 Result.DV[Level].Distance = Delta; // since X/1 == X 1103 NewConstraint.setDistance(Delta, CurLoop); 1104 } 1105 else { 1106 Result.Consistent = false; 1107 NewConstraint.setLine(Coeff, 1108 SE->getNegativeSCEV(Coeff), 1109 SE->getNegativeSCEV(Delta), CurLoop); 1110 } 1111 1112 // maybe we can get a useful direction 1113 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1114 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1115 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1116 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1117 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1118 // The double negatives above are confusing. 1119 // It helps to read !SE->isKnownNonZero(Delta) 1120 // as "Delta might be Zero" 1121 unsigned NewDirection = Dependence::DVEntry::NONE; 1122 if ((DeltaMaybePositive && CoeffMaybePositive) || 1123 (DeltaMaybeNegative && CoeffMaybeNegative)) 1124 NewDirection = Dependence::DVEntry::LT; 1125 if (DeltaMaybeZero) 1126 NewDirection |= Dependence::DVEntry::EQ; 1127 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1128 (DeltaMaybePositive && CoeffMaybeNegative)) 1129 NewDirection |= Dependence::DVEntry::GT; 1130 if (NewDirection < Result.DV[Level].Direction) 1131 ++StrongSIVsuccesses; 1132 Result.DV[Level].Direction &= NewDirection; 1133 } 1134 return false; 1135 } 1136 1137 1138 // weakCrossingSIVtest - 1139 // From the paper, Practical Dependence Testing, Section 4.2.2 1140 // 1141 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1142 // where i is an induction variable, c1 and c2 are loop invariant, 1143 // and a is a constant, we can solve it exactly using the 1144 // Weak-Crossing SIV test. 1145 // 1146 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1147 // the two lines, where i = i', yielding 1148 // 1149 // c1 + a*i = c2 - a*i 1150 // 2a*i = c2 - c1 1151 // i = (c2 - c1)/2a 1152 // 1153 // If i < 0, there is no dependence. 1154 // If i > upperbound, there is no dependence. 1155 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1156 // If i = upperbound, there's a dependence with distance = 0. 1157 // If i is integral, there's a dependence (all directions). 1158 // If the non-integer part = 1/2, there's a dependence (<> directions). 1159 // Otherwise, there's no dependence. 1160 // 1161 // Can prove independence. Failing that, 1162 // can sometimes refine the directions. 1163 // Can determine iteration for splitting. 1164 // 1165 // Return true if dependence disproved. 1166 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1167 const SCEV *SrcConst, 1168 const SCEV *DstConst, 1169 const Loop *CurLoop, 1170 unsigned Level, 1171 FullDependence &Result, 1172 Constraint &NewConstraint, 1173 const SCEV *&SplitIter) const { 1174 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1175 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1176 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1177 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1178 ++WeakCrossingSIVapplications; 1179 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1180 Level--; 1181 Result.Consistent = false; 1182 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1183 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1184 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1185 if (Delta->isZero()) { 1186 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1187 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1188 ++WeakCrossingSIVsuccesses; 1189 if (!Result.DV[Level].Direction) { 1190 ++WeakCrossingSIVindependence; 1191 return true; 1192 } 1193 Result.DV[Level].Distance = Delta; // = 0 1194 return false; 1195 } 1196 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1197 if (!ConstCoeff) 1198 return false; 1199 1200 Result.DV[Level].Splitable = true; 1201 if (SE->isKnownNegative(ConstCoeff)) { 1202 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1203 assert(ConstCoeff && 1204 "dynamic cast of negative of ConstCoeff should yield constant"); 1205 Delta = SE->getNegativeSCEV(Delta); 1206 } 1207 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1208 1209 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1210 SplitIter = 1211 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1212 Delta), 1213 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1214 ConstCoeff)); 1215 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1216 1217 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1218 if (!ConstDelta) 1219 return false; 1220 1221 // We're certain that ConstCoeff > 0; therefore, 1222 // if Delta < 0, then no dependence. 1223 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1224 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1225 if (SE->isKnownNegative(Delta)) { 1226 // No dependence, Delta < 0 1227 ++WeakCrossingSIVindependence; 1228 ++WeakCrossingSIVsuccesses; 1229 return true; 1230 } 1231 1232 // We're certain that Delta > 0 and ConstCoeff > 0. 1233 // Check Delta/(2*ConstCoeff) against upper loop bound 1234 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1235 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1236 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1237 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1238 ConstantTwo); 1239 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1240 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1241 // Delta too big, no dependence 1242 ++WeakCrossingSIVindependence; 1243 ++WeakCrossingSIVsuccesses; 1244 return true; 1245 } 1246 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1247 // i = i' = UB 1248 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1249 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1250 ++WeakCrossingSIVsuccesses; 1251 if (!Result.DV[Level].Direction) { 1252 ++WeakCrossingSIVindependence; 1253 return true; 1254 } 1255 Result.DV[Level].Splitable = false; 1256 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1257 return false; 1258 } 1259 } 1260 1261 // check that Coeff divides Delta 1262 APInt APDelta = ConstDelta->getValue()->getValue(); 1263 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1264 APInt Distance = APDelta; // these need to be initialzed 1265 APInt Remainder = APDelta; 1266 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1267 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1268 if (Remainder != 0) { 1269 // Coeff doesn't divide Delta, no dependence 1270 ++WeakCrossingSIVindependence; 1271 ++WeakCrossingSIVsuccesses; 1272 return true; 1273 } 1274 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1275 1276 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1277 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1278 Remainder = Distance.srem(Two); 1279 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1280 if (Remainder != 0) { 1281 // Equal direction isn't possible 1282 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1283 ++WeakCrossingSIVsuccesses; 1284 } 1285 return false; 1286 } 1287 1288 1289 // Kirch's algorithm, from 1290 // 1291 // Optimizing Supercompilers for Supercomputers 1292 // Michael Wolfe 1293 // MIT Press, 1989 1294 // 1295 // Program 2.1, page 29. 1296 // Computes the GCD of AM and BM. 1297 // Also finds a solution to the equation ax - by = gcd(a, b). 1298 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1299 static 1300 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1301 APInt &G, APInt &X, APInt &Y) { 1302 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1303 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1304 APInt G0 = AM.abs(); 1305 APInt G1 = BM.abs(); 1306 APInt Q = G0; // these need to be initialized 1307 APInt R = G0; 1308 APInt::sdivrem(G0, G1, Q, R); 1309 while (R != 0) { 1310 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1311 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1312 G0 = G1; G1 = R; 1313 APInt::sdivrem(G0, G1, Q, R); 1314 } 1315 G = G1; 1316 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1317 X = AM.slt(0) ? -A1 : A1; 1318 Y = BM.slt(0) ? B1 : -B1; 1319 1320 // make sure gcd divides Delta 1321 R = Delta.srem(G); 1322 if (R != 0) 1323 return true; // gcd doesn't divide Delta, no dependence 1324 Q = Delta.sdiv(G); 1325 X *= Q; 1326 Y *= Q; 1327 return false; 1328 } 1329 1330 1331 static 1332 APInt floorOfQuotient(APInt A, APInt B) { 1333 APInt Q = A; // these need to be initialized 1334 APInt R = A; 1335 APInt::sdivrem(A, B, Q, R); 1336 if (R == 0) 1337 return Q; 1338 if ((A.sgt(0) && B.sgt(0)) || 1339 (A.slt(0) && B.slt(0))) 1340 return Q; 1341 else 1342 return Q - 1; 1343 } 1344 1345 1346 static 1347 APInt ceilingOfQuotient(APInt A, APInt B) { 1348 APInt Q = A; // these need to be initialized 1349 APInt R = A; 1350 APInt::sdivrem(A, B, Q, R); 1351 if (R == 0) 1352 return Q; 1353 if ((A.sgt(0) && B.sgt(0)) || 1354 (A.slt(0) && B.slt(0))) 1355 return Q + 1; 1356 else 1357 return Q; 1358 } 1359 1360 1361 static 1362 APInt maxAPInt(APInt A, APInt B) { 1363 return A.sgt(B) ? A : B; 1364 } 1365 1366 1367 static 1368 APInt minAPInt(APInt A, APInt B) { 1369 return A.slt(B) ? A : B; 1370 } 1371 1372 1373 // exactSIVtest - 1374 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1375 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1376 // and a2 are constant, we can solve it exactly using an algorithm developed 1377 // by Banerjee and Wolfe. See Section 2.5.3 in 1378 // 1379 // Optimizing Supercompilers for Supercomputers 1380 // Michael Wolfe 1381 // MIT Press, 1989 1382 // 1383 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1384 // so use them if possible. They're also a bit better with symbolics and, 1385 // in the case of the strong SIV test, can compute Distances. 1386 // 1387 // Return true if dependence disproved. 1388 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1389 const SCEV *DstCoeff, 1390 const SCEV *SrcConst, 1391 const SCEV *DstConst, 1392 const Loop *CurLoop, 1393 unsigned Level, 1394 FullDependence &Result, 1395 Constraint &NewConstraint) const { 1396 DEBUG(dbgs() << "\tExact SIV test\n"); 1397 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1398 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1399 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1400 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1401 ++ExactSIVapplications; 1402 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1403 Level--; 1404 Result.Consistent = false; 1405 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1406 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1407 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1408 Delta, CurLoop); 1409 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1410 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1411 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1412 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1413 return false; 1414 1415 // find gcd 1416 APInt G, X, Y; 1417 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1418 APInt BM = ConstDstCoeff->getValue()->getValue(); 1419 unsigned Bits = AM.getBitWidth(); 1420 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1421 // gcd doesn't divide Delta, no dependence 1422 ++ExactSIVindependence; 1423 ++ExactSIVsuccesses; 1424 return true; 1425 } 1426 1427 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1428 1429 // since SCEV construction normalizes, LM = 0 1430 APInt UM(Bits, 1, true); 1431 bool UMvalid = false; 1432 // UM is perhaps unavailable, let's check 1433 if (const SCEVConstant *CUB = 1434 collectConstantUpperBound(CurLoop, Delta->getType())) { 1435 UM = CUB->getValue()->getValue(); 1436 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1437 UMvalid = true; 1438 } 1439 1440 APInt TU(APInt::getSignedMaxValue(Bits)); 1441 APInt TL(APInt::getSignedMinValue(Bits)); 1442 1443 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1444 APInt TMUL = BM.sdiv(G); 1445 if (TMUL.sgt(0)) { 1446 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1447 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1448 if (UMvalid) { 1449 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1450 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1451 } 1452 } 1453 else { 1454 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1455 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1456 if (UMvalid) { 1457 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1458 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1459 } 1460 } 1461 1462 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1463 TMUL = AM.sdiv(G); 1464 if (TMUL.sgt(0)) { 1465 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1466 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1467 if (UMvalid) { 1468 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1469 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1470 } 1471 } 1472 else { 1473 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1474 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1475 if (UMvalid) { 1476 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1477 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1478 } 1479 } 1480 if (TL.sgt(TU)) { 1481 ++ExactSIVindependence; 1482 ++ExactSIVsuccesses; 1483 return true; 1484 } 1485 1486 // explore directions 1487 unsigned NewDirection = Dependence::DVEntry::NONE; 1488 1489 // less than 1490 APInt SaveTU(TU); // save these 1491 APInt SaveTL(TL); 1492 DEBUG(dbgs() << "\t exploring LT direction\n"); 1493 TMUL = AM - BM; 1494 if (TMUL.sgt(0)) { 1495 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1496 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1497 } 1498 else { 1499 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1500 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1501 } 1502 if (TL.sle(TU)) { 1503 NewDirection |= Dependence::DVEntry::LT; 1504 ++ExactSIVsuccesses; 1505 } 1506 1507 // equal 1508 TU = SaveTU; // restore 1509 TL = SaveTL; 1510 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1511 if (TMUL.sgt(0)) { 1512 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1513 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1514 } 1515 else { 1516 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1517 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1518 } 1519 TMUL = BM - AM; 1520 if (TMUL.sgt(0)) { 1521 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1522 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1523 } 1524 else { 1525 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1526 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1527 } 1528 if (TL.sle(TU)) { 1529 NewDirection |= Dependence::DVEntry::EQ; 1530 ++ExactSIVsuccesses; 1531 } 1532 1533 // greater than 1534 TU = SaveTU; // restore 1535 TL = SaveTL; 1536 DEBUG(dbgs() << "\t exploring GT direction\n"); 1537 if (TMUL.sgt(0)) { 1538 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1539 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1540 } 1541 else { 1542 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1543 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1544 } 1545 if (TL.sle(TU)) { 1546 NewDirection |= Dependence::DVEntry::GT; 1547 ++ExactSIVsuccesses; 1548 } 1549 1550 // finished 1551 Result.DV[Level].Direction &= NewDirection; 1552 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1553 ++ExactSIVindependence; 1554 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1555 } 1556 1557 1558 1559 // Return true if the divisor evenly divides the dividend. 1560 static 1561 bool isRemainderZero(const SCEVConstant *Dividend, 1562 const SCEVConstant *Divisor) { 1563 APInt ConstDividend = Dividend->getValue()->getValue(); 1564 APInt ConstDivisor = Divisor->getValue()->getValue(); 1565 return ConstDividend.srem(ConstDivisor) == 0; 1566 } 1567 1568 1569 // weakZeroSrcSIVtest - 1570 // From the paper, Practical Dependence Testing, Section 4.2.2 1571 // 1572 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1573 // where i is an induction variable, c1 and c2 are loop invariant, 1574 // and a is a constant, we can solve it exactly using the 1575 // Weak-Zero SIV test. 1576 // 1577 // Given 1578 // 1579 // c1 = c2 + a*i 1580 // 1581 // we get 1582 // 1583 // (c1 - c2)/a = i 1584 // 1585 // If i is not an integer, there's no dependence. 1586 // If i < 0 or > UB, there's no dependence. 1587 // If i = 0, the direction is <= and peeling the 1588 // 1st iteration will break the dependence. 1589 // If i = UB, the direction is >= and peeling the 1590 // last iteration will break the dependence. 1591 // Otherwise, the direction is *. 1592 // 1593 // Can prove independence. Failing that, we can sometimes refine 1594 // the directions. Can sometimes show that first or last 1595 // iteration carries all the dependences (so worth peeling). 1596 // 1597 // (see also weakZeroDstSIVtest) 1598 // 1599 // Return true if dependence disproved. 1600 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1601 const SCEV *SrcConst, 1602 const SCEV *DstConst, 1603 const Loop *CurLoop, 1604 unsigned Level, 1605 FullDependence &Result, 1606 Constraint &NewConstraint) const { 1607 // For the WeakSIV test, it's possible the loop isn't common to 1608 // the Src and Dst loops. If it isn't, then there's no need to 1609 // record a direction. 1610 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1611 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1612 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1613 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1614 ++WeakZeroSIVapplications; 1615 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1616 Level--; 1617 Result.Consistent = false; 1618 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1619 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1620 DstCoeff, Delta, CurLoop); 1621 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1622 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1623 if (Level < CommonLevels) { 1624 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1625 Result.DV[Level].PeelFirst = true; 1626 ++WeakZeroSIVsuccesses; 1627 } 1628 return false; // dependences caused by first iteration 1629 } 1630 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1631 if (!ConstCoeff) 1632 return false; 1633 const SCEV *AbsCoeff = 1634 SE->isKnownNegative(ConstCoeff) ? 1635 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1636 const SCEV *NewDelta = 1637 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1638 1639 // check that Delta/SrcCoeff < iteration count 1640 // really check NewDelta < count*AbsCoeff 1641 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1642 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1643 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1644 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1645 ++WeakZeroSIVindependence; 1646 ++WeakZeroSIVsuccesses; 1647 return true; 1648 } 1649 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1650 // dependences caused by last iteration 1651 if (Level < CommonLevels) { 1652 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1653 Result.DV[Level].PeelLast = true; 1654 ++WeakZeroSIVsuccesses; 1655 } 1656 return false; 1657 } 1658 } 1659 1660 // check that Delta/SrcCoeff >= 0 1661 // really check that NewDelta >= 0 1662 if (SE->isKnownNegative(NewDelta)) { 1663 // No dependence, newDelta < 0 1664 ++WeakZeroSIVindependence; 1665 ++WeakZeroSIVsuccesses; 1666 return true; 1667 } 1668 1669 // if SrcCoeff doesn't divide Delta, then no dependence 1670 if (isa<SCEVConstant>(Delta) && 1671 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1672 ++WeakZeroSIVindependence; 1673 ++WeakZeroSIVsuccesses; 1674 return true; 1675 } 1676 return false; 1677 } 1678 1679 1680 // weakZeroDstSIVtest - 1681 // From the paper, Practical Dependence Testing, Section 4.2.2 1682 // 1683 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1684 // where i is an induction variable, c1 and c2 are loop invariant, 1685 // and a is a constant, we can solve it exactly using the 1686 // Weak-Zero SIV test. 1687 // 1688 // Given 1689 // 1690 // c1 + a*i = c2 1691 // 1692 // we get 1693 // 1694 // i = (c2 - c1)/a 1695 // 1696 // If i is not an integer, there's no dependence. 1697 // If i < 0 or > UB, there's no dependence. 1698 // If i = 0, the direction is <= and peeling the 1699 // 1st iteration will break the dependence. 1700 // If i = UB, the direction is >= and peeling the 1701 // last iteration will break the dependence. 1702 // Otherwise, the direction is *. 1703 // 1704 // Can prove independence. Failing that, we can sometimes refine 1705 // the directions. Can sometimes show that first or last 1706 // iteration carries all the dependences (so worth peeling). 1707 // 1708 // (see also weakZeroSrcSIVtest) 1709 // 1710 // Return true if dependence disproved. 1711 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1712 const SCEV *SrcConst, 1713 const SCEV *DstConst, 1714 const Loop *CurLoop, 1715 unsigned Level, 1716 FullDependence &Result, 1717 Constraint &NewConstraint) const { 1718 // For the WeakSIV test, it's possible the loop isn't common to the 1719 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1720 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1721 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1722 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1723 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1724 ++WeakZeroSIVapplications; 1725 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1726 Level--; 1727 Result.Consistent = false; 1728 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1729 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1730 Delta, CurLoop); 1731 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1732 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1733 if (Level < CommonLevels) { 1734 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1735 Result.DV[Level].PeelFirst = true; 1736 ++WeakZeroSIVsuccesses; 1737 } 1738 return false; // dependences caused by first iteration 1739 } 1740 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1741 if (!ConstCoeff) 1742 return false; 1743 const SCEV *AbsCoeff = 1744 SE->isKnownNegative(ConstCoeff) ? 1745 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1746 const SCEV *NewDelta = 1747 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1748 1749 // check that Delta/SrcCoeff < iteration count 1750 // really check NewDelta < count*AbsCoeff 1751 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1752 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1753 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1754 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1755 ++WeakZeroSIVindependence; 1756 ++WeakZeroSIVsuccesses; 1757 return true; 1758 } 1759 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1760 // dependences caused by last iteration 1761 if (Level < CommonLevels) { 1762 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1763 Result.DV[Level].PeelLast = true; 1764 ++WeakZeroSIVsuccesses; 1765 } 1766 return false; 1767 } 1768 } 1769 1770 // check that Delta/SrcCoeff >= 0 1771 // really check that NewDelta >= 0 1772 if (SE->isKnownNegative(NewDelta)) { 1773 // No dependence, newDelta < 0 1774 ++WeakZeroSIVindependence; 1775 ++WeakZeroSIVsuccesses; 1776 return true; 1777 } 1778 1779 // if SrcCoeff doesn't divide Delta, then no dependence 1780 if (isa<SCEVConstant>(Delta) && 1781 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1782 ++WeakZeroSIVindependence; 1783 ++WeakZeroSIVsuccesses; 1784 return true; 1785 } 1786 return false; 1787 } 1788 1789 1790 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1791 // Things of the form [c1 + a*i] and [c2 + b*j], 1792 // where i and j are induction variable, c1 and c2 are loop invariant, 1793 // and a and b are constants. 1794 // Returns true if any possible dependence is disproved. 1795 // Marks the result as inconsistent. 1796 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1797 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1798 const SCEV *DstCoeff, 1799 const SCEV *SrcConst, 1800 const SCEV *DstConst, 1801 const Loop *SrcLoop, 1802 const Loop *DstLoop, 1803 FullDependence &Result) const { 1804 DEBUG(dbgs() << "\tExact RDIV test\n"); 1805 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1806 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1807 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1808 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1809 ++ExactRDIVapplications; 1810 Result.Consistent = false; 1811 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1812 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1813 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1814 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1815 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1816 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1817 return false; 1818 1819 // find gcd 1820 APInt G, X, Y; 1821 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1822 APInt BM = ConstDstCoeff->getValue()->getValue(); 1823 unsigned Bits = AM.getBitWidth(); 1824 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1825 // gcd doesn't divide Delta, no dependence 1826 ++ExactRDIVindependence; 1827 return true; 1828 } 1829 1830 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1831 1832 // since SCEV construction seems to normalize, LM = 0 1833 APInt SrcUM(Bits, 1, true); 1834 bool SrcUMvalid = false; 1835 // SrcUM is perhaps unavailable, let's check 1836 if (const SCEVConstant *UpperBound = 1837 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1838 SrcUM = UpperBound->getValue()->getValue(); 1839 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1840 SrcUMvalid = true; 1841 } 1842 1843 APInt DstUM(Bits, 1, true); 1844 bool DstUMvalid = false; 1845 // UM is perhaps unavailable, let's check 1846 if (const SCEVConstant *UpperBound = 1847 collectConstantUpperBound(DstLoop, Delta->getType())) { 1848 DstUM = UpperBound->getValue()->getValue(); 1849 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1850 DstUMvalid = true; 1851 } 1852 1853 APInt TU(APInt::getSignedMaxValue(Bits)); 1854 APInt TL(APInt::getSignedMinValue(Bits)); 1855 1856 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1857 APInt TMUL = BM.sdiv(G); 1858 if (TMUL.sgt(0)) { 1859 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1860 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1861 if (SrcUMvalid) { 1862 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1863 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1864 } 1865 } 1866 else { 1867 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1868 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1869 if (SrcUMvalid) { 1870 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1871 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1872 } 1873 } 1874 1875 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1876 TMUL = AM.sdiv(G); 1877 if (TMUL.sgt(0)) { 1878 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1879 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1880 if (DstUMvalid) { 1881 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1882 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1883 } 1884 } 1885 else { 1886 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1887 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1888 if (DstUMvalid) { 1889 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1890 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1891 } 1892 } 1893 if (TL.sgt(TU)) 1894 ++ExactRDIVindependence; 1895 return TL.sgt(TU); 1896 } 1897 1898 1899 // symbolicRDIVtest - 1900 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1901 // introduce a special case of Banerjee's Inequalities (also called the 1902 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1903 // particularly cases with symbolics. Since it's only able to disprove 1904 // dependence (not compute distances or directions), we'll use it as a 1905 // fall back for the other tests. 1906 // 1907 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1908 // where i and j are induction variables and c1 and c2 are loop invariants, 1909 // we can use the symbolic tests to disprove some dependences, serving as a 1910 // backup for the RDIV test. Note that i and j can be the same variable, 1911 // letting this test serve as a backup for the various SIV tests. 1912 // 1913 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1914 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1915 // loop bounds for the i and j loops, respectively. So, ... 1916 // 1917 // c1 + a1*i = c2 + a2*j 1918 // a1*i - a2*j = c2 - c1 1919 // 1920 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1921 // range of the maximum and minimum possible values of a1*i - a2*j. 1922 // Considering the signs of a1 and a2, we have 4 possible cases: 1923 // 1924 // 1) If a1 >= 0 and a2 >= 0, then 1925 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1926 // -a2*N2 <= c2 - c1 <= a1*N1 1927 // 1928 // 2) If a1 >= 0 and a2 <= 0, then 1929 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1930 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1931 // 1932 // 3) If a1 <= 0 and a2 >= 0, then 1933 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1934 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1935 // 1936 // 4) If a1 <= 0 and a2 <= 0, then 1937 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1938 // a1*N1 <= c2 - c1 <= -a2*N2 1939 // 1940 // return true if dependence disproved 1941 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1942 const SCEV *A2, 1943 const SCEV *C1, 1944 const SCEV *C2, 1945 const Loop *Loop1, 1946 const Loop *Loop2) const { 1947 ++SymbolicRDIVapplications; 1948 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1949 DEBUG(dbgs() << "\t A1 = " << *A1); 1950 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1951 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1952 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1953 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1954 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1955 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1956 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1957 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1958 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1959 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1960 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1961 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1962 if (SE->isKnownNonNegative(A1)) { 1963 if (SE->isKnownNonNegative(A2)) { 1964 // A1 >= 0 && A2 >= 0 1965 if (N1) { 1966 // make sure that c2 - c1 <= a1*N1 1967 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1968 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1969 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1970 ++SymbolicRDIVindependence; 1971 return true; 1972 } 1973 } 1974 if (N2) { 1975 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1976 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1977 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1978 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1979 ++SymbolicRDIVindependence; 1980 return true; 1981 } 1982 } 1983 } 1984 else if (SE->isKnownNonPositive(A2)) { 1985 // a1 >= 0 && a2 <= 0 1986 if (N1 && N2) { 1987 // make sure that c2 - c1 <= a1*N1 - a2*N2 1988 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1989 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1990 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1991 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1992 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1993 ++SymbolicRDIVindependence; 1994 return true; 1995 } 1996 } 1997 // make sure that 0 <= c2 - c1 1998 if (SE->isKnownNegative(C2_C1)) { 1999 ++SymbolicRDIVindependence; 2000 return true; 2001 } 2002 } 2003 } 2004 else if (SE->isKnownNonPositive(A1)) { 2005 if (SE->isKnownNonNegative(A2)) { 2006 // a1 <= 0 && a2 >= 0 2007 if (N1 && N2) { 2008 // make sure that a1*N1 - a2*N2 <= c2 - c1 2009 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2010 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2011 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2012 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2013 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2014 ++SymbolicRDIVindependence; 2015 return true; 2016 } 2017 } 2018 // make sure that c2 - c1 <= 0 2019 if (SE->isKnownPositive(C2_C1)) { 2020 ++SymbolicRDIVindependence; 2021 return true; 2022 } 2023 } 2024 else if (SE->isKnownNonPositive(A2)) { 2025 // a1 <= 0 && a2 <= 0 2026 if (N1) { 2027 // make sure that a1*N1 <= c2 - c1 2028 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2029 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2030 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2031 ++SymbolicRDIVindependence; 2032 return true; 2033 } 2034 } 2035 if (N2) { 2036 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2037 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2038 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2039 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2040 ++SymbolicRDIVindependence; 2041 return true; 2042 } 2043 } 2044 } 2045 } 2046 return false; 2047 } 2048 2049 2050 // testSIV - 2051 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2052 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2053 // a2 are constant, we attack it with an SIV test. While they can all be 2054 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2055 // they apply; they're cheaper and sometimes more precise. 2056 // 2057 // Return true if dependence disproved. 2058 bool DependenceAnalysis::testSIV(const SCEV *Src, 2059 const SCEV *Dst, 2060 unsigned &Level, 2061 FullDependence &Result, 2062 Constraint &NewConstraint, 2063 const SCEV *&SplitIter) const { 2064 DEBUG(dbgs() << " src = " << *Src << "\n"); 2065 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2066 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2067 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2068 if (SrcAddRec && DstAddRec) { 2069 const SCEV *SrcConst = SrcAddRec->getStart(); 2070 const SCEV *DstConst = DstAddRec->getStart(); 2071 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2072 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2073 const Loop *CurLoop = SrcAddRec->getLoop(); 2074 assert(CurLoop == DstAddRec->getLoop() && 2075 "both loops in SIV should be same"); 2076 Level = mapSrcLoop(CurLoop); 2077 bool disproven; 2078 if (SrcCoeff == DstCoeff) 2079 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2080 Level, Result, NewConstraint); 2081 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2082 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2083 Level, Result, NewConstraint, SplitIter); 2084 else 2085 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2086 Level, Result, NewConstraint); 2087 return disproven || 2088 gcdMIVtest(Src, Dst, Result) || 2089 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2090 } 2091 if (SrcAddRec) { 2092 const SCEV *SrcConst = SrcAddRec->getStart(); 2093 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2094 const SCEV *DstConst = Dst; 2095 const Loop *CurLoop = SrcAddRec->getLoop(); 2096 Level = mapSrcLoop(CurLoop); 2097 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2098 Level, Result, NewConstraint) || 2099 gcdMIVtest(Src, Dst, Result); 2100 } 2101 if (DstAddRec) { 2102 const SCEV *DstConst = DstAddRec->getStart(); 2103 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2104 const SCEV *SrcConst = Src; 2105 const Loop *CurLoop = DstAddRec->getLoop(); 2106 Level = mapDstLoop(CurLoop); 2107 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2108 CurLoop, Level, Result, NewConstraint) || 2109 gcdMIVtest(Src, Dst, Result); 2110 } 2111 llvm_unreachable("SIV test expected at least one AddRec"); 2112 return false; 2113 } 2114 2115 2116 // testRDIV - 2117 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2118 // where i and j are induction variables, c1 and c2 are loop invariant, 2119 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2120 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2121 // It doesn't make sense to talk about distance or direction in this case, 2122 // so there's no point in making special versions of the Strong SIV test or 2123 // the Weak-crossing SIV test. 2124 // 2125 // With minor algebra, this test can also be used for things like 2126 // [c1 + a1*i + a2*j][c2]. 2127 // 2128 // Return true if dependence disproved. 2129 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2130 const SCEV *Dst, 2131 FullDependence &Result) const { 2132 // we have 3 possible situations here: 2133 // 1) [a*i + b] and [c*j + d] 2134 // 2) [a*i + c*j + b] and [d] 2135 // 3) [b] and [a*i + c*j + d] 2136 // We need to find what we've got and get organized 2137 2138 const SCEV *SrcConst, *DstConst; 2139 const SCEV *SrcCoeff, *DstCoeff; 2140 const Loop *SrcLoop, *DstLoop; 2141 2142 DEBUG(dbgs() << " src = " << *Src << "\n"); 2143 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2144 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2145 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2146 if (SrcAddRec && DstAddRec) { 2147 SrcConst = SrcAddRec->getStart(); 2148 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2149 SrcLoop = SrcAddRec->getLoop(); 2150 DstConst = DstAddRec->getStart(); 2151 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2152 DstLoop = DstAddRec->getLoop(); 2153 } 2154 else if (SrcAddRec) { 2155 if (const SCEVAddRecExpr *tmpAddRec = 2156 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2157 SrcConst = tmpAddRec->getStart(); 2158 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2159 SrcLoop = tmpAddRec->getLoop(); 2160 DstConst = Dst; 2161 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2162 DstLoop = SrcAddRec->getLoop(); 2163 } 2164 else 2165 llvm_unreachable("RDIV reached by surprising SCEVs"); 2166 } 2167 else if (DstAddRec) { 2168 if (const SCEVAddRecExpr *tmpAddRec = 2169 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2170 DstConst = tmpAddRec->getStart(); 2171 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2172 DstLoop = tmpAddRec->getLoop(); 2173 SrcConst = Src; 2174 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2175 SrcLoop = DstAddRec->getLoop(); 2176 } 2177 else 2178 llvm_unreachable("RDIV reached by surprising SCEVs"); 2179 } 2180 else 2181 llvm_unreachable("RDIV expected at least one AddRec"); 2182 return exactRDIVtest(SrcCoeff, DstCoeff, 2183 SrcConst, DstConst, 2184 SrcLoop, DstLoop, 2185 Result) || 2186 gcdMIVtest(Src, Dst, Result) || 2187 symbolicRDIVtest(SrcCoeff, DstCoeff, 2188 SrcConst, DstConst, 2189 SrcLoop, DstLoop); 2190 } 2191 2192 2193 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2194 // Return true if dependence disproved. 2195 // Can sometimes refine direction vectors. 2196 bool DependenceAnalysis::testMIV(const SCEV *Src, 2197 const SCEV *Dst, 2198 const SmallBitVector &Loops, 2199 FullDependence &Result) const { 2200 DEBUG(dbgs() << " src = " << *Src << "\n"); 2201 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2202 Result.Consistent = false; 2203 return gcdMIVtest(Src, Dst, Result) || 2204 banerjeeMIVtest(Src, Dst, Loops, Result); 2205 } 2206 2207 2208 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2209 // in this case 10. If there is no constant part, returns NULL. 2210 static 2211 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2212 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2213 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2214 return Constant; 2215 } 2216 return nullptr; 2217 } 2218 2219 2220 //===----------------------------------------------------------------------===// 2221 // gcdMIVtest - 2222 // Tests an MIV subscript pair for dependence. 2223 // Returns true if any possible dependence is disproved. 2224 // Marks the result as inconsistent. 2225 // Can sometimes disprove the equal direction for 1 or more loops, 2226 // as discussed in Michael Wolfe's book, 2227 // High Performance Compilers for Parallel Computing, page 235. 2228 // 2229 // We spend some effort (code!) to handle cases like 2230 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2231 // but M and N are just loop-invariant variables. 2232 // This should help us handle linearized subscripts; 2233 // also makes this test a useful backup to the various SIV tests. 2234 // 2235 // It occurs to me that the presence of loop-invariant variables 2236 // changes the nature of the test from "greatest common divisor" 2237 // to "a common divisor". 2238 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2239 const SCEV *Dst, 2240 FullDependence &Result) const { 2241 DEBUG(dbgs() << "starting gcd\n"); 2242 ++GCDapplications; 2243 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2244 APInt RunningGCD = APInt::getNullValue(BitWidth); 2245 2246 // Examine Src coefficients. 2247 // Compute running GCD and record source constant. 2248 // Because we're looking for the constant at the end of the chain, 2249 // we can't quit the loop just because the GCD == 1. 2250 const SCEV *Coefficients = Src; 2251 while (const SCEVAddRecExpr *AddRec = 2252 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2253 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2254 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2255 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2256 // If the coefficient is the product of a constant and other stuff, 2257 // we can use the constant in the GCD computation. 2258 Constant = getConstantPart(Product); 2259 if (!Constant) 2260 return false; 2261 APInt ConstCoeff = Constant->getValue()->getValue(); 2262 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2263 Coefficients = AddRec->getStart(); 2264 } 2265 const SCEV *SrcConst = Coefficients; 2266 2267 // Examine Dst coefficients. 2268 // Compute running GCD and record destination constant. 2269 // Because we're looking for the constant at the end of the chain, 2270 // we can't quit the loop just because the GCD == 1. 2271 Coefficients = Dst; 2272 while (const SCEVAddRecExpr *AddRec = 2273 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2274 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2275 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2276 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2277 // If the coefficient is the product of a constant and other stuff, 2278 // we can use the constant in the GCD computation. 2279 Constant = getConstantPart(Product); 2280 if (!Constant) 2281 return false; 2282 APInt ConstCoeff = Constant->getValue()->getValue(); 2283 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2284 Coefficients = AddRec->getStart(); 2285 } 2286 const SCEV *DstConst = Coefficients; 2287 2288 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2289 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2290 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2291 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2292 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2293 // If Delta is a sum of products, we may be able to make further progress. 2294 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2295 const SCEV *Operand = Sum->getOperand(Op); 2296 if (isa<SCEVConstant>(Operand)) { 2297 assert(!Constant && "Surprised to find multiple constants"); 2298 Constant = cast<SCEVConstant>(Operand); 2299 } 2300 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2301 // Search for constant operand to participate in GCD; 2302 // If none found; return false. 2303 const SCEVConstant *ConstOp = getConstantPart(Product); 2304 if (!ConstOp) 2305 return false; 2306 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2307 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2308 ConstOpValue.abs()); 2309 } 2310 else 2311 return false; 2312 } 2313 } 2314 if (!Constant) 2315 return false; 2316 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2317 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2318 if (ConstDelta == 0) 2319 return false; 2320 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2321 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2322 APInt Remainder = ConstDelta.srem(RunningGCD); 2323 if (Remainder != 0) { 2324 ++GCDindependence; 2325 return true; 2326 } 2327 2328 // Try to disprove equal directions. 2329 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2330 // the code above can't disprove the dependence because the GCD = 1. 2331 // So we consider what happen if i = i' and what happens if j = j'. 2332 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2333 // which is infeasible, so we can disallow the = direction for the i level. 2334 // Setting j = j' doesn't help matters, so we end up with a direction vector 2335 // of [<>, *] 2336 // 2337 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2338 // we need to remember that the constant part is 5 and the RunningGCD should 2339 // be initialized to ExtraGCD = 30. 2340 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2341 2342 bool Improved = false; 2343 Coefficients = Src; 2344 while (const SCEVAddRecExpr *AddRec = 2345 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2346 Coefficients = AddRec->getStart(); 2347 const Loop *CurLoop = AddRec->getLoop(); 2348 RunningGCD = ExtraGCD; 2349 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2350 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2351 const SCEV *Inner = Src; 2352 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2353 AddRec = cast<SCEVAddRecExpr>(Inner); 2354 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2355 if (CurLoop == AddRec->getLoop()) 2356 ; // SrcCoeff == Coeff 2357 else { 2358 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2359 // If the coefficient is the product of a constant and other stuff, 2360 // we can use the constant in the GCD computation. 2361 Constant = getConstantPart(Product); 2362 else 2363 Constant = cast<SCEVConstant>(Coeff); 2364 APInt ConstCoeff = Constant->getValue()->getValue(); 2365 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2366 } 2367 Inner = AddRec->getStart(); 2368 } 2369 Inner = Dst; 2370 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2371 AddRec = cast<SCEVAddRecExpr>(Inner); 2372 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2373 if (CurLoop == AddRec->getLoop()) 2374 DstCoeff = Coeff; 2375 else { 2376 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2377 // If the coefficient is the product of a constant and other stuff, 2378 // we can use the constant in the GCD computation. 2379 Constant = getConstantPart(Product); 2380 else 2381 Constant = cast<SCEVConstant>(Coeff); 2382 APInt ConstCoeff = Constant->getValue()->getValue(); 2383 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2384 } 2385 Inner = AddRec->getStart(); 2386 } 2387 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2388 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2389 // If the coefficient is the product of a constant and other stuff, 2390 // we can use the constant in the GCD computation. 2391 Constant = getConstantPart(Product); 2392 else if (isa<SCEVConstant>(Delta)) 2393 Constant = cast<SCEVConstant>(Delta); 2394 else { 2395 // The difference of the two coefficients might not be a product 2396 // or constant, in which case we give up on this direction. 2397 continue; 2398 } 2399 APInt ConstCoeff = Constant->getValue()->getValue(); 2400 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2401 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2402 if (RunningGCD != 0) { 2403 Remainder = ConstDelta.srem(RunningGCD); 2404 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2405 if (Remainder != 0) { 2406 unsigned Level = mapSrcLoop(CurLoop); 2407 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2408 Improved = true; 2409 } 2410 } 2411 } 2412 if (Improved) 2413 ++GCDsuccesses; 2414 DEBUG(dbgs() << "all done\n"); 2415 return false; 2416 } 2417 2418 2419 //===----------------------------------------------------------------------===// 2420 // banerjeeMIVtest - 2421 // Use Banerjee's Inequalities to test an MIV subscript pair. 2422 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2423 // Generally follows the discussion in Section 2.5.2 of 2424 // 2425 // Optimizing Supercompilers for Supercomputers 2426 // Michael Wolfe 2427 // 2428 // The inequalities given on page 25 are simplified in that loops are 2429 // normalized so that the lower bound is always 0 and the stride is always 1. 2430 // For example, Wolfe gives 2431 // 2432 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2433 // 2434 // where A_k is the coefficient of the kth index in the source subscript, 2435 // B_k is the coefficient of the kth index in the destination subscript, 2436 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2437 // index, and N_k is the stride of the kth index. Since all loops are normalized 2438 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2439 // equation to 2440 // 2441 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2442 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2443 // 2444 // Similar simplifications are possible for the other equations. 2445 // 2446 // When we can't determine the number of iterations for a loop, 2447 // we use NULL as an indicator for the worst case, infinity. 2448 // When computing the upper bound, NULL denotes +inf; 2449 // for the lower bound, NULL denotes -inf. 2450 // 2451 // Return true if dependence disproved. 2452 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2453 const SCEV *Dst, 2454 const SmallBitVector &Loops, 2455 FullDependence &Result) const { 2456 DEBUG(dbgs() << "starting Banerjee\n"); 2457 ++BanerjeeApplications; 2458 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2459 const SCEV *A0; 2460 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2461 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2462 const SCEV *B0; 2463 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2464 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2465 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2466 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2467 2468 // Compute bounds for all the * directions. 2469 DEBUG(dbgs() << "\tBounds[*]\n"); 2470 for (unsigned K = 1; K <= MaxLevels; ++K) { 2471 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2472 Bound[K].Direction = Dependence::DVEntry::ALL; 2473 Bound[K].DirSet = Dependence::DVEntry::NONE; 2474 findBoundsALL(A, B, Bound, K); 2475 #ifndef NDEBUG 2476 DEBUG(dbgs() << "\t " << K << '\t'); 2477 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2478 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2479 else 2480 DEBUG(dbgs() << "-inf\t"); 2481 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2482 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2483 else 2484 DEBUG(dbgs() << "+inf\n"); 2485 #endif 2486 } 2487 2488 // Test the *, *, *, ... case. 2489 bool Disproved = false; 2490 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2491 // Explore the direction vector hierarchy. 2492 unsigned DepthExpanded = 0; 2493 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2494 Loops, DepthExpanded, Delta); 2495 if (NewDeps > 0) { 2496 bool Improved = false; 2497 for (unsigned K = 1; K <= CommonLevels; ++K) { 2498 if (Loops[K]) { 2499 unsigned Old = Result.DV[K - 1].Direction; 2500 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2501 Improved |= Old != Result.DV[K - 1].Direction; 2502 if (!Result.DV[K - 1].Direction) { 2503 Improved = false; 2504 Disproved = true; 2505 break; 2506 } 2507 } 2508 } 2509 if (Improved) 2510 ++BanerjeeSuccesses; 2511 } 2512 else { 2513 ++BanerjeeIndependence; 2514 Disproved = true; 2515 } 2516 } 2517 else { 2518 ++BanerjeeIndependence; 2519 Disproved = true; 2520 } 2521 delete [] Bound; 2522 delete [] A; 2523 delete [] B; 2524 return Disproved; 2525 } 2526 2527 2528 // Hierarchically expands the direction vector 2529 // search space, combining the directions of discovered dependences 2530 // in the DirSet field of Bound. Returns the number of distinct 2531 // dependences discovered. If the dependence is disproved, 2532 // it will return 0. 2533 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2534 CoefficientInfo *A, 2535 CoefficientInfo *B, 2536 BoundInfo *Bound, 2537 const SmallBitVector &Loops, 2538 unsigned &DepthExpanded, 2539 const SCEV *Delta) const { 2540 if (Level > CommonLevels) { 2541 // record result 2542 DEBUG(dbgs() << "\t["); 2543 for (unsigned K = 1; K <= CommonLevels; ++K) { 2544 if (Loops[K]) { 2545 Bound[K].DirSet |= Bound[K].Direction; 2546 #ifndef NDEBUG 2547 switch (Bound[K].Direction) { 2548 case Dependence::DVEntry::LT: 2549 DEBUG(dbgs() << " <"); 2550 break; 2551 case Dependence::DVEntry::EQ: 2552 DEBUG(dbgs() << " ="); 2553 break; 2554 case Dependence::DVEntry::GT: 2555 DEBUG(dbgs() << " >"); 2556 break; 2557 case Dependence::DVEntry::ALL: 2558 DEBUG(dbgs() << " *"); 2559 break; 2560 default: 2561 llvm_unreachable("unexpected Bound[K].Direction"); 2562 } 2563 #endif 2564 } 2565 } 2566 DEBUG(dbgs() << " ]\n"); 2567 return 1; 2568 } 2569 if (Loops[Level]) { 2570 if (Level > DepthExpanded) { 2571 DepthExpanded = Level; 2572 // compute bounds for <, =, > at current level 2573 findBoundsLT(A, B, Bound, Level); 2574 findBoundsGT(A, B, Bound, Level); 2575 findBoundsEQ(A, B, Bound, Level); 2576 #ifndef NDEBUG 2577 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2578 DEBUG(dbgs() << "\t <\t"); 2579 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2580 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2581 else 2582 DEBUG(dbgs() << "-inf\t"); 2583 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2584 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2585 else 2586 DEBUG(dbgs() << "+inf\n"); 2587 DEBUG(dbgs() << "\t =\t"); 2588 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2589 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2590 else 2591 DEBUG(dbgs() << "-inf\t"); 2592 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2593 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2594 else 2595 DEBUG(dbgs() << "+inf\n"); 2596 DEBUG(dbgs() << "\t >\t"); 2597 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2598 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2599 else 2600 DEBUG(dbgs() << "-inf\t"); 2601 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2602 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2603 else 2604 DEBUG(dbgs() << "+inf\n"); 2605 #endif 2606 } 2607 2608 unsigned NewDeps = 0; 2609 2610 // test bounds for <, *, *, ... 2611 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2612 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2613 Loops, DepthExpanded, Delta); 2614 2615 // Test bounds for =, *, *, ... 2616 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2617 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2618 Loops, DepthExpanded, Delta); 2619 2620 // test bounds for >, *, *, ... 2621 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2622 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2623 Loops, DepthExpanded, Delta); 2624 2625 Bound[Level].Direction = Dependence::DVEntry::ALL; 2626 return NewDeps; 2627 } 2628 else 2629 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2630 } 2631 2632 2633 // Returns true iff the current bounds are plausible. 2634 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2635 unsigned Level, 2636 BoundInfo *Bound, 2637 const SCEV *Delta) const { 2638 Bound[Level].Direction = DirKind; 2639 if (const SCEV *LowerBound = getLowerBound(Bound)) 2640 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2641 return false; 2642 if (const SCEV *UpperBound = getUpperBound(Bound)) 2643 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2644 return false; 2645 return true; 2646 } 2647 2648 2649 // Computes the upper and lower bounds for level K 2650 // using the * direction. Records them in Bound. 2651 // Wolfe gives the equations 2652 // 2653 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2654 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2655 // 2656 // Since we normalize loops, we can simplify these equations to 2657 // 2658 // LB^*_k = (A^-_k - B^+_k)U_k 2659 // UB^*_k = (A^+_k - B^-_k)U_k 2660 // 2661 // We must be careful to handle the case where the upper bound is unknown. 2662 // Note that the lower bound is always <= 0 2663 // and the upper bound is always >= 0. 2664 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2665 CoefficientInfo *B, 2666 BoundInfo *Bound, 2667 unsigned K) const { 2668 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2669 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2670 if (Bound[K].Iterations) { 2671 Bound[K].Lower[Dependence::DVEntry::ALL] = 2672 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2673 Bound[K].Iterations); 2674 Bound[K].Upper[Dependence::DVEntry::ALL] = 2675 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2676 Bound[K].Iterations); 2677 } 2678 else { 2679 // If the difference is 0, we won't need to know the number of iterations. 2680 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2681 Bound[K].Lower[Dependence::DVEntry::ALL] = 2682 SE->getConstant(A[K].Coeff->getType(), 0); 2683 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2684 Bound[K].Upper[Dependence::DVEntry::ALL] = 2685 SE->getConstant(A[K].Coeff->getType(), 0); 2686 } 2687 } 2688 2689 2690 // Computes the upper and lower bounds for level K 2691 // using the = direction. Records them in Bound. 2692 // Wolfe gives the equations 2693 // 2694 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2695 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2696 // 2697 // Since we normalize loops, we can simplify these equations to 2698 // 2699 // LB^=_k = (A_k - B_k)^- U_k 2700 // UB^=_k = (A_k - B_k)^+ U_k 2701 // 2702 // We must be careful to handle the case where the upper bound is unknown. 2703 // Note that the lower bound is always <= 0 2704 // and the upper bound is always >= 0. 2705 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2706 CoefficientInfo *B, 2707 BoundInfo *Bound, 2708 unsigned K) const { 2709 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2710 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2711 if (Bound[K].Iterations) { 2712 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2713 const SCEV *NegativePart = getNegativePart(Delta); 2714 Bound[K].Lower[Dependence::DVEntry::EQ] = 2715 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2716 const SCEV *PositivePart = getPositivePart(Delta); 2717 Bound[K].Upper[Dependence::DVEntry::EQ] = 2718 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2719 } 2720 else { 2721 // If the positive/negative part of the difference is 0, 2722 // we won't need to know the number of iterations. 2723 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2724 const SCEV *NegativePart = getNegativePart(Delta); 2725 if (NegativePart->isZero()) 2726 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2727 const SCEV *PositivePart = getPositivePart(Delta); 2728 if (PositivePart->isZero()) 2729 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2730 } 2731 } 2732 2733 2734 // Computes the upper and lower bounds for level K 2735 // using the < direction. Records them in Bound. 2736 // Wolfe gives the equations 2737 // 2738 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2739 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2740 // 2741 // Since we normalize loops, we can simplify these equations to 2742 // 2743 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2744 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2745 // 2746 // We must be careful to handle the case where the upper bound is unknown. 2747 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2748 CoefficientInfo *B, 2749 BoundInfo *Bound, 2750 unsigned K) const { 2751 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2752 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2753 if (Bound[K].Iterations) { 2754 const SCEV *Iter_1 = 2755 SE->getMinusSCEV(Bound[K].Iterations, 2756 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2757 const SCEV *NegPart = 2758 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2759 Bound[K].Lower[Dependence::DVEntry::LT] = 2760 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2761 const SCEV *PosPart = 2762 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2763 Bound[K].Upper[Dependence::DVEntry::LT] = 2764 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2765 } 2766 else { 2767 // If the positive/negative part of the difference is 0, 2768 // we won't need to know the number of iterations. 2769 const SCEV *NegPart = 2770 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2771 if (NegPart->isZero()) 2772 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2773 const SCEV *PosPart = 2774 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2775 if (PosPart->isZero()) 2776 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2777 } 2778 } 2779 2780 2781 // Computes the upper and lower bounds for level K 2782 // using the > direction. Records them in Bound. 2783 // Wolfe gives the equations 2784 // 2785 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2786 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2787 // 2788 // Since we normalize loops, we can simplify these equations to 2789 // 2790 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2791 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2792 // 2793 // We must be careful to handle the case where the upper bound is unknown. 2794 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2795 CoefficientInfo *B, 2796 BoundInfo *Bound, 2797 unsigned K) const { 2798 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2799 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2800 if (Bound[K].Iterations) { 2801 const SCEV *Iter_1 = 2802 SE->getMinusSCEV(Bound[K].Iterations, 2803 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2804 const SCEV *NegPart = 2805 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2806 Bound[K].Lower[Dependence::DVEntry::GT] = 2807 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2808 const SCEV *PosPart = 2809 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2810 Bound[K].Upper[Dependence::DVEntry::GT] = 2811 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2812 } 2813 else { 2814 // If the positive/negative part of the difference is 0, 2815 // we won't need to know the number of iterations. 2816 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2817 if (NegPart->isZero()) 2818 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2819 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2820 if (PosPart->isZero()) 2821 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2822 } 2823 } 2824 2825 2826 // X^+ = max(X, 0) 2827 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2828 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2829 } 2830 2831 2832 // X^- = min(X, 0) 2833 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2834 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2835 } 2836 2837 2838 // Walks through the subscript, 2839 // collecting each coefficient, the associated loop bounds, 2840 // and recording its positive and negative parts for later use. 2841 DependenceAnalysis::CoefficientInfo * 2842 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2843 bool SrcFlag, 2844 const SCEV *&Constant) const { 2845 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2846 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2847 for (unsigned K = 1; K <= MaxLevels; ++K) { 2848 CI[K].Coeff = Zero; 2849 CI[K].PosPart = Zero; 2850 CI[K].NegPart = Zero; 2851 CI[K].Iterations = nullptr; 2852 } 2853 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2854 const Loop *L = AddRec->getLoop(); 2855 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2856 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2857 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2858 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2859 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2860 Subscript = AddRec->getStart(); 2861 } 2862 Constant = Subscript; 2863 #ifndef NDEBUG 2864 DEBUG(dbgs() << "\tCoefficient Info\n"); 2865 for (unsigned K = 1; K <= MaxLevels; ++K) { 2866 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2867 DEBUG(dbgs() << "\tPos Part = "); 2868 DEBUG(dbgs() << *CI[K].PosPart); 2869 DEBUG(dbgs() << "\tNeg Part = "); 2870 DEBUG(dbgs() << *CI[K].NegPart); 2871 DEBUG(dbgs() << "\tUpper Bound = "); 2872 if (CI[K].Iterations) 2873 DEBUG(dbgs() << *CI[K].Iterations); 2874 else 2875 DEBUG(dbgs() << "+inf"); 2876 DEBUG(dbgs() << '\n'); 2877 } 2878 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2879 #endif 2880 return CI; 2881 } 2882 2883 2884 // Looks through all the bounds info and 2885 // computes the lower bound given the current direction settings 2886 // at each level. If the lower bound for any level is -inf, 2887 // the result is -inf. 2888 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2889 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2890 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2891 if (Bound[K].Lower[Bound[K].Direction]) 2892 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2893 else 2894 Sum = nullptr; 2895 } 2896 return Sum; 2897 } 2898 2899 2900 // Looks through all the bounds info and 2901 // computes the upper bound given the current direction settings 2902 // at each level. If the upper bound at any level is +inf, 2903 // the result is +inf. 2904 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2905 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2906 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2907 if (Bound[K].Upper[Bound[K].Direction]) 2908 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2909 else 2910 Sum = nullptr; 2911 } 2912 return Sum; 2913 } 2914 2915 2916 //===----------------------------------------------------------------------===// 2917 // Constraint manipulation for Delta test. 2918 2919 // Given a linear SCEV, 2920 // return the coefficient (the step) 2921 // corresponding to the specified loop. 2922 // If there isn't one, return 0. 2923 // For example, given a*i + b*j + c*k, zeroing the coefficient 2924 // corresponding to the j loop would yield b. 2925 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2926 const Loop *TargetLoop) const { 2927 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2928 if (!AddRec) 2929 return SE->getConstant(Expr->getType(), 0); 2930 if (AddRec->getLoop() == TargetLoop) 2931 return AddRec->getStepRecurrence(*SE); 2932 return findCoefficient(AddRec->getStart(), TargetLoop); 2933 } 2934 2935 2936 // Given a linear SCEV, 2937 // return the SCEV given by zeroing out the coefficient 2938 // corresponding to the specified loop. 2939 // For example, given a*i + b*j + c*k, zeroing the coefficient 2940 // corresponding to the j loop would yield a*i + c*k. 2941 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2942 const Loop *TargetLoop) const { 2943 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2944 if (!AddRec) 2945 return Expr; // ignore 2946 if (AddRec->getLoop() == TargetLoop) 2947 return AddRec->getStart(); 2948 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2949 AddRec->getStepRecurrence(*SE), 2950 AddRec->getLoop(), 2951 AddRec->getNoWrapFlags()); 2952 } 2953 2954 2955 // Given a linear SCEV Expr, 2956 // return the SCEV given by adding some Value to the 2957 // coefficient corresponding to the specified TargetLoop. 2958 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2959 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2960 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2961 const Loop *TargetLoop, 2962 const SCEV *Value) const { 2963 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2964 if (!AddRec) // create a new addRec 2965 return SE->getAddRecExpr(Expr, 2966 Value, 2967 TargetLoop, 2968 SCEV::FlagAnyWrap); // Worst case, with no info. 2969 if (AddRec->getLoop() == TargetLoop) { 2970 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2971 if (Sum->isZero()) 2972 return AddRec->getStart(); 2973 return SE->getAddRecExpr(AddRec->getStart(), 2974 Sum, 2975 AddRec->getLoop(), 2976 AddRec->getNoWrapFlags()); 2977 } 2978 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2979 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2980 return SE->getAddRecExpr( 2981 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2982 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2983 AddRec->getNoWrapFlags()); 2984 } 2985 2986 2987 // Review the constraints, looking for opportunities 2988 // to simplify a subscript pair (Src and Dst). 2989 // Return true if some simplification occurs. 2990 // If the simplification isn't exact (that is, if it is conservative 2991 // in terms of dependence), set consistent to false. 2992 // Corresponds to Figure 5 from the paper 2993 // 2994 // Practical Dependence Testing 2995 // Goff, Kennedy, Tseng 2996 // PLDI 1991 2997 bool DependenceAnalysis::propagate(const SCEV *&Src, 2998 const SCEV *&Dst, 2999 SmallBitVector &Loops, 3000 SmallVectorImpl<Constraint> &Constraints, 3001 bool &Consistent) { 3002 bool Result = false; 3003 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3004 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3005 DEBUG(Constraints[LI].dump(dbgs())); 3006 if (Constraints[LI].isDistance()) 3007 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3008 else if (Constraints[LI].isLine()) 3009 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3010 else if (Constraints[LI].isPoint()) 3011 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3012 } 3013 return Result; 3014 } 3015 3016 3017 // Attempt to propagate a distance 3018 // constraint into a subscript pair (Src and Dst). 3019 // Return true if some simplification occurs. 3020 // If the simplification isn't exact (that is, if it is conservative 3021 // in terms of dependence), set consistent to false. 3022 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3023 const SCEV *&Dst, 3024 Constraint &CurConstraint, 3025 bool &Consistent) { 3026 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3027 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3028 const SCEV *A_K = findCoefficient(Src, CurLoop); 3029 if (A_K->isZero()) 3030 return false; 3031 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3032 Src = SE->getMinusSCEV(Src, DA_K); 3033 Src = zeroCoefficient(Src, CurLoop); 3034 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3035 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3036 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3037 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3038 if (!findCoefficient(Dst, CurLoop)->isZero()) 3039 Consistent = false; 3040 return true; 3041 } 3042 3043 3044 // Attempt to propagate a line 3045 // constraint into a subscript pair (Src and Dst). 3046 // Return true if some simplification occurs. 3047 // If the simplification isn't exact (that is, if it is conservative 3048 // in terms of dependence), set consistent to false. 3049 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3050 const SCEV *&Dst, 3051 Constraint &CurConstraint, 3052 bool &Consistent) { 3053 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3054 const SCEV *A = CurConstraint.getA(); 3055 const SCEV *B = CurConstraint.getB(); 3056 const SCEV *C = CurConstraint.getC(); 3057 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3058 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3059 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3060 if (A->isZero()) { 3061 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3062 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3063 if (!Bconst || !Cconst) return false; 3064 APInt Beta = Bconst->getValue()->getValue(); 3065 APInt Charlie = Cconst->getValue()->getValue(); 3066 APInt CdivB = Charlie.sdiv(Beta); 3067 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3068 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3069 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3070 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3071 Dst = zeroCoefficient(Dst, CurLoop); 3072 if (!findCoefficient(Src, CurLoop)->isZero()) 3073 Consistent = false; 3074 } 3075 else if (B->isZero()) { 3076 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3077 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3078 if (!Aconst || !Cconst) return false; 3079 APInt Alpha = Aconst->getValue()->getValue(); 3080 APInt Charlie = Cconst->getValue()->getValue(); 3081 APInt CdivA = Charlie.sdiv(Alpha); 3082 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3083 const SCEV *A_K = findCoefficient(Src, CurLoop); 3084 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3085 Src = zeroCoefficient(Src, CurLoop); 3086 if (!findCoefficient(Dst, CurLoop)->isZero()) 3087 Consistent = false; 3088 } 3089 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3090 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3091 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3092 if (!Aconst || !Cconst) return false; 3093 APInt Alpha = Aconst->getValue()->getValue(); 3094 APInt Charlie = Cconst->getValue()->getValue(); 3095 APInt CdivA = Charlie.sdiv(Alpha); 3096 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3097 const SCEV *A_K = findCoefficient(Src, CurLoop); 3098 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3099 Src = zeroCoefficient(Src, CurLoop); 3100 Dst = addToCoefficient(Dst, CurLoop, A_K); 3101 if (!findCoefficient(Dst, CurLoop)->isZero()) 3102 Consistent = false; 3103 } 3104 else { 3105 // paper is incorrect here, or perhaps just misleading 3106 const SCEV *A_K = findCoefficient(Src, CurLoop); 3107 Src = SE->getMulExpr(Src, A); 3108 Dst = SE->getMulExpr(Dst, A); 3109 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3110 Src = zeroCoefficient(Src, CurLoop); 3111 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3112 if (!findCoefficient(Dst, CurLoop)->isZero()) 3113 Consistent = false; 3114 } 3115 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3116 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3117 return true; 3118 } 3119 3120 3121 // Attempt to propagate a point 3122 // constraint into a subscript pair (Src and Dst). 3123 // Return true if some simplification occurs. 3124 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3125 const SCEV *&Dst, 3126 Constraint &CurConstraint) { 3127 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3128 const SCEV *A_K = findCoefficient(Src, CurLoop); 3129 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3130 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3131 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3132 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3133 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3134 Src = zeroCoefficient(Src, CurLoop); 3135 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3136 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3137 Dst = zeroCoefficient(Dst, CurLoop); 3138 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3139 return true; 3140 } 3141 3142 3143 // Update direction vector entry based on the current constraint. 3144 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3145 const Constraint &CurConstraint 3146 ) const { 3147 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3148 DEBUG(CurConstraint.dump(dbgs())); 3149 if (CurConstraint.isAny()) 3150 ; // use defaults 3151 else if (CurConstraint.isDistance()) { 3152 // this one is consistent, the others aren't 3153 Level.Scalar = false; 3154 Level.Distance = CurConstraint.getD(); 3155 unsigned NewDirection = Dependence::DVEntry::NONE; 3156 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3157 NewDirection = Dependence::DVEntry::EQ; 3158 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3159 NewDirection |= Dependence::DVEntry::LT; 3160 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3161 NewDirection |= Dependence::DVEntry::GT; 3162 Level.Direction &= NewDirection; 3163 } 3164 else if (CurConstraint.isLine()) { 3165 Level.Scalar = false; 3166 Level.Distance = nullptr; 3167 // direction should be accurate 3168 } 3169 else if (CurConstraint.isPoint()) { 3170 Level.Scalar = false; 3171 Level.Distance = nullptr; 3172 unsigned NewDirection = Dependence::DVEntry::NONE; 3173 if (!isKnownPredicate(CmpInst::ICMP_NE, 3174 CurConstraint.getY(), 3175 CurConstraint.getX())) 3176 // if X may be = Y 3177 NewDirection |= Dependence::DVEntry::EQ; 3178 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3179 CurConstraint.getY(), 3180 CurConstraint.getX())) 3181 // if Y may be > X 3182 NewDirection |= Dependence::DVEntry::LT; 3183 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3184 CurConstraint.getY(), 3185 CurConstraint.getX())) 3186 // if Y may be < X 3187 NewDirection |= Dependence::DVEntry::GT; 3188 Level.Direction &= NewDirection; 3189 } 3190 else 3191 llvm_unreachable("constraint has unexpected kind"); 3192 } 3193 3194 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3195 /// source and destination array references are recurrences on a nested loop, 3196 /// this function flattens the nested recurrences into separate recurrences 3197 /// for each loop level. 3198 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3199 const SCEV *DstSCEV, 3200 SmallVectorImpl<Subscript> &Pair, 3201 const SCEV *ElementSize) { 3202 const SCEVUnknown *SrcBase = 3203 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3204 const SCEVUnknown *DstBase = 3205 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3206 3207 if (!SrcBase || !DstBase || SrcBase != DstBase) 3208 return false; 3209 3210 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3211 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3212 3213 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3214 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3215 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3216 return false; 3217 3218 // First step: collect parametric terms in both array references. 3219 SmallVector<const SCEV *, 4> Terms; 3220 SrcAR->collectParametricTerms(*SE, Terms); 3221 DstAR->collectParametricTerms(*SE, Terms); 3222 3223 // Second step: find subscript sizes. 3224 SmallVector<const SCEV *, 4> Sizes; 3225 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3226 3227 // Third step: compute the access functions for each subscript. 3228 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3229 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3230 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3231 3232 // Fail when there is only a subscript: that's a linearized access function. 3233 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3234 SrcSubscripts.size() != DstSubscripts.size()) 3235 return false; 3236 3237 int size = SrcSubscripts.size(); 3238 3239 DEBUG({ 3240 dbgs() << "\nSrcSubscripts: "; 3241 for (int i = 0; i < size; i++) 3242 dbgs() << *SrcSubscripts[i]; 3243 dbgs() << "\nDstSubscripts: "; 3244 for (int i = 0; i < size; i++) 3245 dbgs() << *DstSubscripts[i]; 3246 }); 3247 3248 // The delinearization transforms a single-subscript MIV dependence test into 3249 // a multi-subscript SIV dependence test that is easier to compute. So we 3250 // resize Pair to contain as many pairs of subscripts as the delinearization 3251 // has found, and then initialize the pairs following the delinearization. 3252 Pair.resize(size); 3253 for (int i = 0; i < size; ++i) { 3254 Pair[i].Src = SrcSubscripts[i]; 3255 Pair[i].Dst = DstSubscripts[i]; 3256 unifySubscriptType(&Pair[i]); 3257 3258 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3259 // delinearization has found, and add these constraints to the dependence 3260 // check to avoid memory accesses overflow from one dimension into another. 3261 // This is related to the problem of determining the existence of data 3262 // dependences in array accesses using a different number of subscripts: in 3263 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3264 } 3265 3266 return true; 3267 } 3268 3269 //===----------------------------------------------------------------------===// 3270 3271 #ifndef NDEBUG 3272 // For debugging purposes, dump a small bit vector to dbgs(). 3273 static void dumpSmallBitVector(SmallBitVector &BV) { 3274 dbgs() << "{"; 3275 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3276 dbgs() << VI; 3277 if (BV.find_next(VI) >= 0) 3278 dbgs() << ' '; 3279 } 3280 dbgs() << "}\n"; 3281 } 3282 #endif 3283 3284 3285 // depends - 3286 // Returns NULL if there is no dependence. 3287 // Otherwise, return a Dependence with as many details as possible. 3288 // Corresponds to Section 3.1 in the paper 3289 // 3290 // Practical Dependence Testing 3291 // Goff, Kennedy, Tseng 3292 // PLDI 1991 3293 // 3294 // Care is required to keep the routine below, getSplitIteration(), 3295 // up to date with respect to this routine. 3296 std::unique_ptr<Dependence> 3297 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3298 bool PossiblyLoopIndependent) { 3299 if (Src == Dst) 3300 PossiblyLoopIndependent = false; 3301 3302 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3303 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3304 // if both instructions don't reference memory, there's no dependence 3305 return nullptr; 3306 3307 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3308 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3309 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3310 return make_unique<Dependence>(Src, Dst); 3311 } 3312 3313 Value *SrcPtr = getPointerOperand(Src); 3314 Value *DstPtr = getPointerOperand(Dst); 3315 3316 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3317 case AliasAnalysis::MayAlias: 3318 case AliasAnalysis::PartialAlias: 3319 // cannot analyse objects if we don't understand their aliasing. 3320 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3321 return make_unique<Dependence>(Src, Dst); 3322 case AliasAnalysis::NoAlias: 3323 // If the objects noalias, they are distinct, accesses are independent. 3324 DEBUG(dbgs() << "no alias\n"); 3325 return nullptr; 3326 case AliasAnalysis::MustAlias: 3327 break; // The underlying objects alias; test accesses for dependence. 3328 } 3329 3330 // establish loop nesting levels 3331 establishNestingLevels(Src, Dst); 3332 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3333 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3334 3335 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3336 ++TotalArrayPairs; 3337 3338 // See if there are GEPs we can use. 3339 bool UsefulGEP = false; 3340 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3341 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3342 if (SrcGEP && DstGEP && 3343 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3344 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3345 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3346 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3347 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3348 3349 UsefulGEP = 3350 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3351 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3352 } 3353 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3354 SmallVector<Subscript, 4> Pair(Pairs); 3355 if (UsefulGEP) { 3356 DEBUG(dbgs() << " using GEPs\n"); 3357 unsigned P = 0; 3358 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3359 SrcEnd = SrcGEP->idx_end(), 3360 DstIdx = DstGEP->idx_begin(); 3361 SrcIdx != SrcEnd; 3362 ++SrcIdx, ++DstIdx, ++P) { 3363 Pair[P].Src = SE->getSCEV(*SrcIdx); 3364 Pair[P].Dst = SE->getSCEV(*DstIdx); 3365 unifySubscriptType(&Pair[P]); 3366 } 3367 } 3368 else { 3369 DEBUG(dbgs() << " ignoring GEPs\n"); 3370 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3371 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3372 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3373 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3374 Pair[0].Src = SrcSCEV; 3375 Pair[0].Dst = DstSCEV; 3376 } 3377 3378 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3379 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3380 DEBUG(dbgs() << " delinerized GEP\n"); 3381 Pairs = Pair.size(); 3382 } 3383 3384 for (unsigned P = 0; P < Pairs; ++P) { 3385 Pair[P].Loops.resize(MaxLevels + 1); 3386 Pair[P].GroupLoops.resize(MaxLevels + 1); 3387 Pair[P].Group.resize(Pairs); 3388 removeMatchingExtensions(&Pair[P]); 3389 Pair[P].Classification = 3390 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3391 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3392 Pair[P].Loops); 3393 Pair[P].GroupLoops = Pair[P].Loops; 3394 Pair[P].Group.set(P); 3395 DEBUG(dbgs() << " subscript " << P << "\n"); 3396 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3397 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3398 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3399 DEBUG(dbgs() << "\tloops = "); 3400 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3401 } 3402 3403 SmallBitVector Separable(Pairs); 3404 SmallBitVector Coupled(Pairs); 3405 3406 // Partition subscripts into separable and minimally-coupled groups 3407 // Algorithm in paper is algorithmically better; 3408 // this may be faster in practice. Check someday. 3409 // 3410 // Here's an example of how it works. Consider this code: 3411 // 3412 // for (i = ...) { 3413 // for (j = ...) { 3414 // for (k = ...) { 3415 // for (l = ...) { 3416 // for (m = ...) { 3417 // A[i][j][k][m] = ...; 3418 // ... = A[0][j][l][i + j]; 3419 // } 3420 // } 3421 // } 3422 // } 3423 // } 3424 // 3425 // There are 4 subscripts here: 3426 // 0 [i] and [0] 3427 // 1 [j] and [j] 3428 // 2 [k] and [l] 3429 // 3 [m] and [i + j] 3430 // 3431 // We've already classified each subscript pair as ZIV, SIV, etc., 3432 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3433 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3434 // and set Pair[P].Group = {P}. 3435 // 3436 // Src Dst Classification Loops GroupLoops Group 3437 // 0 [i] [0] SIV {1} {1} {0} 3438 // 1 [j] [j] SIV {2} {2} {1} 3439 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3440 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3441 // 3442 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3443 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3444 // 3445 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3446 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3447 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3448 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3449 // to either Separable or Coupled). 3450 // 3451 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3452 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3453 // so Pair[3].Group = {0, 1, 3} and Done = false. 3454 // 3455 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3456 // Since Done remains true, we add 2 to the set of Separable pairs. 3457 // 3458 // Finally, we consider 3. There's nothing to compare it with, 3459 // so Done remains true and we add it to the Coupled set. 3460 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3461 // 3462 // In the end, we've got 1 separable subscript and 1 coupled group. 3463 for (unsigned SI = 0; SI < Pairs; ++SI) { 3464 if (Pair[SI].Classification == Subscript::NonLinear) { 3465 // ignore these, but collect loops for later 3466 ++NonlinearSubscriptPairs; 3467 collectCommonLoops(Pair[SI].Src, 3468 LI->getLoopFor(Src->getParent()), 3469 Pair[SI].Loops); 3470 collectCommonLoops(Pair[SI].Dst, 3471 LI->getLoopFor(Dst->getParent()), 3472 Pair[SI].Loops); 3473 Result.Consistent = false; 3474 } else if (Pair[SI].Classification == Subscript::ZIV) { 3475 // always separable 3476 Separable.set(SI); 3477 } 3478 else { 3479 // SIV, RDIV, or MIV, so check for coupled group 3480 bool Done = true; 3481 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3482 SmallBitVector Intersection = Pair[SI].GroupLoops; 3483 Intersection &= Pair[SJ].GroupLoops; 3484 if (Intersection.any()) { 3485 // accumulate set of all the loops in group 3486 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3487 // accumulate set of all subscripts in group 3488 Pair[SJ].Group |= Pair[SI].Group; 3489 Done = false; 3490 } 3491 } 3492 if (Done) { 3493 if (Pair[SI].Group.count() == 1) { 3494 Separable.set(SI); 3495 ++SeparableSubscriptPairs; 3496 } 3497 else { 3498 Coupled.set(SI); 3499 ++CoupledSubscriptPairs; 3500 } 3501 } 3502 } 3503 } 3504 3505 DEBUG(dbgs() << " Separable = "); 3506 DEBUG(dumpSmallBitVector(Separable)); 3507 DEBUG(dbgs() << " Coupled = "); 3508 DEBUG(dumpSmallBitVector(Coupled)); 3509 3510 Constraint NewConstraint; 3511 NewConstraint.setAny(SE); 3512 3513 // test separable subscripts 3514 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3515 DEBUG(dbgs() << "testing subscript " << SI); 3516 switch (Pair[SI].Classification) { 3517 case Subscript::ZIV: 3518 DEBUG(dbgs() << ", ZIV\n"); 3519 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3520 return nullptr; 3521 break; 3522 case Subscript::SIV: { 3523 DEBUG(dbgs() << ", SIV\n"); 3524 unsigned Level; 3525 const SCEV *SplitIter = nullptr; 3526 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3527 SplitIter)) 3528 return nullptr; 3529 break; 3530 } 3531 case Subscript::RDIV: 3532 DEBUG(dbgs() << ", RDIV\n"); 3533 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3534 return nullptr; 3535 break; 3536 case Subscript::MIV: 3537 DEBUG(dbgs() << ", MIV\n"); 3538 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3539 return nullptr; 3540 break; 3541 default: 3542 llvm_unreachable("subscript has unexpected classification"); 3543 } 3544 } 3545 3546 if (Coupled.count()) { 3547 // test coupled subscript groups 3548 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3549 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3550 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3551 for (unsigned II = 0; II <= MaxLevels; ++II) 3552 Constraints[II].setAny(SE); 3553 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3554 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3555 SmallBitVector Group(Pair[SI].Group); 3556 SmallBitVector Sivs(Pairs); 3557 SmallBitVector Mivs(Pairs); 3558 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3559 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3560 DEBUG(dbgs() << SJ << " "); 3561 if (Pair[SJ].Classification == Subscript::SIV) 3562 Sivs.set(SJ); 3563 else 3564 Mivs.set(SJ); 3565 } 3566 DEBUG(dbgs() << "}\n"); 3567 while (Sivs.any()) { 3568 bool Changed = false; 3569 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3570 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3571 // SJ is an SIV subscript that's part of the current coupled group 3572 unsigned Level; 3573 const SCEV *SplitIter = nullptr; 3574 DEBUG(dbgs() << "SIV\n"); 3575 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3576 SplitIter)) 3577 return nullptr; 3578 ConstrainedLevels.set(Level); 3579 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3580 if (Constraints[Level].isEmpty()) { 3581 ++DeltaIndependence; 3582 return nullptr; 3583 } 3584 Changed = true; 3585 } 3586 Sivs.reset(SJ); 3587 } 3588 if (Changed) { 3589 // propagate, possibly creating new SIVs and ZIVs 3590 DEBUG(dbgs() << " propagating\n"); 3591 DEBUG(dbgs() << "\tMivs = "); 3592 DEBUG(dumpSmallBitVector(Mivs)); 3593 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3594 // SJ is an MIV subscript that's part of the current coupled group 3595 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3596 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3597 Constraints, Result.Consistent)) { 3598 DEBUG(dbgs() << "\t Changed\n"); 3599 ++DeltaPropagations; 3600 Pair[SJ].Classification = 3601 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3602 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3603 Pair[SJ].Loops); 3604 switch (Pair[SJ].Classification) { 3605 case Subscript::ZIV: 3606 DEBUG(dbgs() << "ZIV\n"); 3607 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3608 return nullptr; 3609 Mivs.reset(SJ); 3610 break; 3611 case Subscript::SIV: 3612 Sivs.set(SJ); 3613 Mivs.reset(SJ); 3614 break; 3615 case Subscript::RDIV: 3616 case Subscript::MIV: 3617 break; 3618 default: 3619 llvm_unreachable("bad subscript classification"); 3620 } 3621 } 3622 } 3623 } 3624 } 3625 3626 // test & propagate remaining RDIVs 3627 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3628 if (Pair[SJ].Classification == Subscript::RDIV) { 3629 DEBUG(dbgs() << "RDIV test\n"); 3630 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3631 return nullptr; 3632 // I don't yet understand how to propagate RDIV results 3633 Mivs.reset(SJ); 3634 } 3635 } 3636 3637 // test remaining MIVs 3638 // This code is temporary. 3639 // Better to somehow test all remaining subscripts simultaneously. 3640 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3641 if (Pair[SJ].Classification == Subscript::MIV) { 3642 DEBUG(dbgs() << "MIV test\n"); 3643 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3644 return nullptr; 3645 } 3646 else 3647 llvm_unreachable("expected only MIV subscripts at this point"); 3648 } 3649 3650 // update Result.DV from constraint vector 3651 DEBUG(dbgs() << " updating\n"); 3652 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3653 SJ = ConstrainedLevels.find_next(SJ)) { 3654 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3655 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3656 return nullptr; 3657 } 3658 } 3659 } 3660 3661 // Make sure the Scalar flags are set correctly. 3662 SmallBitVector CompleteLoops(MaxLevels + 1); 3663 for (unsigned SI = 0; SI < Pairs; ++SI) 3664 CompleteLoops |= Pair[SI].Loops; 3665 for (unsigned II = 1; II <= CommonLevels; ++II) 3666 if (CompleteLoops[II]) 3667 Result.DV[II - 1].Scalar = false; 3668 3669 if (PossiblyLoopIndependent) { 3670 // Make sure the LoopIndependent flag is set correctly. 3671 // All directions must include equal, otherwise no 3672 // loop-independent dependence is possible. 3673 for (unsigned II = 1; II <= CommonLevels; ++II) { 3674 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3675 Result.LoopIndependent = false; 3676 break; 3677 } 3678 } 3679 } 3680 else { 3681 // On the other hand, if all directions are equal and there's no 3682 // loop-independent dependence possible, then no dependence exists. 3683 bool AllEqual = true; 3684 for (unsigned II = 1; II <= CommonLevels; ++II) { 3685 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3686 AllEqual = false; 3687 break; 3688 } 3689 } 3690 if (AllEqual) 3691 return nullptr; 3692 } 3693 3694 auto Final = make_unique<FullDependence>(Result); 3695 Result.DV = nullptr; 3696 return std::move(Final); 3697 } 3698 3699 3700 3701 //===----------------------------------------------------------------------===// 3702 // getSplitIteration - 3703 // Rather than spend rarely-used space recording the splitting iteration 3704 // during the Weak-Crossing SIV test, we re-compute it on demand. 3705 // The re-computation is basically a repeat of the entire dependence test, 3706 // though simplified since we know that the dependence exists. 3707 // It's tedious, since we must go through all propagations, etc. 3708 // 3709 // Care is required to keep this code up to date with respect to the routine 3710 // above, depends(). 3711 // 3712 // Generally, the dependence analyzer will be used to build 3713 // a dependence graph for a function (basically a map from instructions 3714 // to dependences). Looking for cycles in the graph shows us loops 3715 // that cannot be trivially vectorized/parallelized. 3716 // 3717 // We can try to improve the situation by examining all the dependences 3718 // that make up the cycle, looking for ones we can break. 3719 // Sometimes, peeling the first or last iteration of a loop will break 3720 // dependences, and we've got flags for those possibilities. 3721 // Sometimes, splitting a loop at some other iteration will do the trick, 3722 // and we've got a flag for that case. Rather than waste the space to 3723 // record the exact iteration (since we rarely know), we provide 3724 // a method that calculates the iteration. It's a drag that it must work 3725 // from scratch, but wonderful in that it's possible. 3726 // 3727 // Here's an example: 3728 // 3729 // for (i = 0; i < 10; i++) 3730 // A[i] = ... 3731 // ... = A[11 - i] 3732 // 3733 // There's a loop-carried flow dependence from the store to the load, 3734 // found by the weak-crossing SIV test. The dependence will have a flag, 3735 // indicating that the dependence can be broken by splitting the loop. 3736 // Calling getSplitIteration will return 5. 3737 // Splitting the loop breaks the dependence, like so: 3738 // 3739 // for (i = 0; i <= 5; i++) 3740 // A[i] = ... 3741 // ... = A[11 - i] 3742 // for (i = 6; i < 10; i++) 3743 // A[i] = ... 3744 // ... = A[11 - i] 3745 // 3746 // breaks the dependence and allows us to vectorize/parallelize 3747 // both loops. 3748 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3749 unsigned SplitLevel) { 3750 assert(Dep.isSplitable(SplitLevel) && 3751 "Dep should be splitable at SplitLevel"); 3752 Instruction *Src = Dep.getSrc(); 3753 Instruction *Dst = Dep.getDst(); 3754 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3755 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3756 assert(isLoadOrStore(Src)); 3757 assert(isLoadOrStore(Dst)); 3758 Value *SrcPtr = getPointerOperand(Src); 3759 Value *DstPtr = getPointerOperand(Dst); 3760 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3761 AliasAnalysis::MustAlias); 3762 3763 // establish loop nesting levels 3764 establishNestingLevels(Src, Dst); 3765 3766 FullDependence Result(Src, Dst, false, CommonLevels); 3767 3768 // See if there are GEPs we can use. 3769 bool UsefulGEP = false; 3770 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3771 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3772 if (SrcGEP && DstGEP && 3773 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3774 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3775 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3776 UsefulGEP = 3777 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3778 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3779 } 3780 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3781 SmallVector<Subscript, 4> Pair(Pairs); 3782 if (UsefulGEP) { 3783 unsigned P = 0; 3784 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3785 SrcEnd = SrcGEP->idx_end(), 3786 DstIdx = DstGEP->idx_begin(); 3787 SrcIdx != SrcEnd; 3788 ++SrcIdx, ++DstIdx, ++P) { 3789 Pair[P].Src = SE->getSCEV(*SrcIdx); 3790 Pair[P].Dst = SE->getSCEV(*DstIdx); 3791 } 3792 } 3793 else { 3794 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3795 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3796 Pair[0].Src = SrcSCEV; 3797 Pair[0].Dst = DstSCEV; 3798 } 3799 3800 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3801 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3802 DEBUG(dbgs() << " delinerized GEP\n"); 3803 Pairs = Pair.size(); 3804 } 3805 3806 for (unsigned P = 0; P < Pairs; ++P) { 3807 Pair[P].Loops.resize(MaxLevels + 1); 3808 Pair[P].GroupLoops.resize(MaxLevels + 1); 3809 Pair[P].Group.resize(Pairs); 3810 removeMatchingExtensions(&Pair[P]); 3811 Pair[P].Classification = 3812 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3813 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3814 Pair[P].Loops); 3815 Pair[P].GroupLoops = Pair[P].Loops; 3816 Pair[P].Group.set(P); 3817 } 3818 3819 SmallBitVector Separable(Pairs); 3820 SmallBitVector Coupled(Pairs); 3821 3822 // partition subscripts into separable and minimally-coupled groups 3823 for (unsigned SI = 0; SI < Pairs; ++SI) { 3824 if (Pair[SI].Classification == Subscript::NonLinear) { 3825 // ignore these, but collect loops for later 3826 collectCommonLoops(Pair[SI].Src, 3827 LI->getLoopFor(Src->getParent()), 3828 Pair[SI].Loops); 3829 collectCommonLoops(Pair[SI].Dst, 3830 LI->getLoopFor(Dst->getParent()), 3831 Pair[SI].Loops); 3832 Result.Consistent = false; 3833 } 3834 else if (Pair[SI].Classification == Subscript::ZIV) 3835 Separable.set(SI); 3836 else { 3837 // SIV, RDIV, or MIV, so check for coupled group 3838 bool Done = true; 3839 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3840 SmallBitVector Intersection = Pair[SI].GroupLoops; 3841 Intersection &= Pair[SJ].GroupLoops; 3842 if (Intersection.any()) { 3843 // accumulate set of all the loops in group 3844 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3845 // accumulate set of all subscripts in group 3846 Pair[SJ].Group |= Pair[SI].Group; 3847 Done = false; 3848 } 3849 } 3850 if (Done) { 3851 if (Pair[SI].Group.count() == 1) 3852 Separable.set(SI); 3853 else 3854 Coupled.set(SI); 3855 } 3856 } 3857 } 3858 3859 Constraint NewConstraint; 3860 NewConstraint.setAny(SE); 3861 3862 // test separable subscripts 3863 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3864 switch (Pair[SI].Classification) { 3865 case Subscript::SIV: { 3866 unsigned Level; 3867 const SCEV *SplitIter = nullptr; 3868 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3869 Result, NewConstraint, SplitIter); 3870 if (Level == SplitLevel) { 3871 assert(SplitIter != nullptr); 3872 return SplitIter; 3873 } 3874 break; 3875 } 3876 case Subscript::ZIV: 3877 case Subscript::RDIV: 3878 case Subscript::MIV: 3879 break; 3880 default: 3881 llvm_unreachable("subscript has unexpected classification"); 3882 } 3883 } 3884 3885 if (Coupled.count()) { 3886 // test coupled subscript groups 3887 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3888 for (unsigned II = 0; II <= MaxLevels; ++II) 3889 Constraints[II].setAny(SE); 3890 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3891 SmallBitVector Group(Pair[SI].Group); 3892 SmallBitVector Sivs(Pairs); 3893 SmallBitVector Mivs(Pairs); 3894 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3895 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3896 if (Pair[SJ].Classification == Subscript::SIV) 3897 Sivs.set(SJ); 3898 else 3899 Mivs.set(SJ); 3900 } 3901 while (Sivs.any()) { 3902 bool Changed = false; 3903 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3904 // SJ is an SIV subscript that's part of the current coupled group 3905 unsigned Level; 3906 const SCEV *SplitIter = nullptr; 3907 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3908 Result, NewConstraint, SplitIter); 3909 if (Level == SplitLevel && SplitIter) 3910 return SplitIter; 3911 ConstrainedLevels.set(Level); 3912 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3913 Changed = true; 3914 Sivs.reset(SJ); 3915 } 3916 if (Changed) { 3917 // propagate, possibly creating new SIVs and ZIVs 3918 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3919 // SJ is an MIV subscript that's part of the current coupled group 3920 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3921 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3922 Pair[SJ].Classification = 3923 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3924 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3925 Pair[SJ].Loops); 3926 switch (Pair[SJ].Classification) { 3927 case Subscript::ZIV: 3928 Mivs.reset(SJ); 3929 break; 3930 case Subscript::SIV: 3931 Sivs.set(SJ); 3932 Mivs.reset(SJ); 3933 break; 3934 case Subscript::RDIV: 3935 case Subscript::MIV: 3936 break; 3937 default: 3938 llvm_unreachable("bad subscript classification"); 3939 } 3940 } 3941 } 3942 } 3943 } 3944 } 3945 } 3946 llvm_unreachable("somehow reached end of routine"); 3947 return nullptr; 3948 } 3949