1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 118 "Dependence Analysis", true, true) 119 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 120 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 121 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 122 INITIALIZE_PASS_END(DependenceAnalysis, "da", 123 "Dependence Analysis", true, true) 124 125 char DependenceAnalysis::ID = 0; 126 127 128 FunctionPass *llvm::createDependenceAnalysisPass() { 129 return new DependenceAnalysis(); 130 } 131 132 133 bool DependenceAnalysis::runOnFunction(Function &F) { 134 this->F = &F; 135 AA = &getAnalysis<AliasAnalysis>(); 136 SE = &getAnalysis<ScalarEvolution>(); 137 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 138 return false; 139 } 140 141 142 void DependenceAnalysis::releaseMemory() { 143 } 144 145 146 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 147 AU.setPreservesAll(); 148 AU.addRequiredTransitive<AliasAnalysis>(); 149 AU.addRequiredTransitive<ScalarEvolution>(); 150 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 151 } 152 153 154 // Used to test the dependence analyzer. 155 // Looks through the function, noting loads and stores. 156 // Calls depends() on every possible pair and prints out the result. 157 // Ignores all other instructions. 158 static 159 void dumpExampleDependence(raw_ostream &OS, Function *F, 160 DependenceAnalysis *DA) { 161 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 162 SrcI != SrcE; ++SrcI) { 163 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 164 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 165 DstI != DstE; ++DstI) { 166 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 167 OS << "da analyze - "; 168 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 169 D->dump(OS); 170 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 171 if (D->isSplitable(Level)) { 172 OS << "da analyze - split level = " << Level; 173 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 174 OS << "!\n"; 175 } 176 } 177 } 178 else 179 OS << "none!\n"; 180 } 181 } 182 } 183 } 184 } 185 186 187 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 188 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 189 } 190 191 //===----------------------------------------------------------------------===// 192 // Dependence methods 193 194 // Returns true if this is an input dependence. 195 bool Dependence::isInput() const { 196 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 197 } 198 199 200 // Returns true if this is an output dependence. 201 bool Dependence::isOutput() const { 202 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 203 } 204 205 206 // Returns true if this is an flow (aka true) dependence. 207 bool Dependence::isFlow() const { 208 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 209 } 210 211 212 // Returns true if this is an anti dependence. 213 bool Dependence::isAnti() const { 214 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 215 } 216 217 218 // Returns true if a particular level is scalar; that is, 219 // if no subscript in the source or destination mention the induction 220 // variable associated with the loop at this level. 221 // Leave this out of line, so it will serve as a virtual method anchor 222 bool Dependence::isScalar(unsigned level) const { 223 return false; 224 } 225 226 227 //===----------------------------------------------------------------------===// 228 // FullDependence methods 229 230 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 231 bool PossiblyLoopIndependent, 232 unsigned CommonLevels) 233 : Dependence(Source, Destination), Levels(CommonLevels), 234 LoopIndependent(PossiblyLoopIndependent) { 235 Consistent = true; 236 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 237 } 238 239 // The rest are simple getters that hide the implementation. 240 241 // getDirection - Returns the direction associated with a particular level. 242 unsigned FullDependence::getDirection(unsigned Level) const { 243 assert(0 < Level && Level <= Levels && "Level out of range"); 244 return DV[Level - 1].Direction; 245 } 246 247 248 // Returns the distance (or NULL) associated with a particular level. 249 const SCEV *FullDependence::getDistance(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Distance; 252 } 253 254 255 // Returns true if a particular level is scalar; that is, 256 // if no subscript in the source or destination mention the induction 257 // variable associated with the loop at this level. 258 bool FullDependence::isScalar(unsigned Level) const { 259 assert(0 < Level && Level <= Levels && "Level out of range"); 260 return DV[Level - 1].Scalar; 261 } 262 263 264 // Returns true if peeling the first iteration from this loop 265 // will break this dependence. 266 bool FullDependence::isPeelFirst(unsigned Level) const { 267 assert(0 < Level && Level <= Levels && "Level out of range"); 268 return DV[Level - 1].PeelFirst; 269 } 270 271 272 // Returns true if peeling the last iteration from this loop 273 // will break this dependence. 274 bool FullDependence::isPeelLast(unsigned Level) const { 275 assert(0 < Level && Level <= Levels && "Level out of range"); 276 return DV[Level - 1].PeelLast; 277 } 278 279 280 // Returns true if splitting this loop will break the dependence. 281 bool FullDependence::isSplitable(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].Splitable; 284 } 285 286 287 //===----------------------------------------------------------------------===// 288 // DependenceAnalysis::Constraint methods 289 290 // If constraint is a point <X, Y>, returns X. 291 // Otherwise assert. 292 const SCEV *DependenceAnalysis::Constraint::getX() const { 293 assert(Kind == Point && "Kind should be Point"); 294 return A; 295 } 296 297 298 // If constraint is a point <X, Y>, returns Y. 299 // Otherwise assert. 300 const SCEV *DependenceAnalysis::Constraint::getY() const { 301 assert(Kind == Point && "Kind should be Point"); 302 return B; 303 } 304 305 306 // If constraint is a line AX + BY = C, returns A. 307 // Otherwise assert. 308 const SCEV *DependenceAnalysis::Constraint::getA() const { 309 assert((Kind == Line || Kind == Distance) && 310 "Kind should be Line (or Distance)"); 311 return A; 312 } 313 314 315 // If constraint is a line AX + BY = C, returns B. 316 // Otherwise assert. 317 const SCEV *DependenceAnalysis::Constraint::getB() const { 318 assert((Kind == Line || Kind == Distance) && 319 "Kind should be Line (or Distance)"); 320 return B; 321 } 322 323 324 // If constraint is a line AX + BY = C, returns C. 325 // Otherwise assert. 326 const SCEV *DependenceAnalysis::Constraint::getC() const { 327 assert((Kind == Line || Kind == Distance) && 328 "Kind should be Line (or Distance)"); 329 return C; 330 } 331 332 333 // If constraint is a distance, returns D. 334 // Otherwise assert. 335 const SCEV *DependenceAnalysis::Constraint::getD() const { 336 assert(Kind == Distance && "Kind should be Distance"); 337 return SE->getNegativeSCEV(C); 338 } 339 340 341 // Returns the loop associated with this constraint. 342 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 343 assert((Kind == Distance || Kind == Line || Kind == Point) && 344 "Kind should be Distance, Line, or Point"); 345 return AssociatedLoop; 346 } 347 348 349 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 350 const SCEV *Y, 351 const Loop *CurLoop) { 352 Kind = Point; 353 A = X; 354 B = Y; 355 AssociatedLoop = CurLoop; 356 } 357 358 359 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 360 const SCEV *BB, 361 const SCEV *CC, 362 const Loop *CurLoop) { 363 Kind = Line; 364 A = AA; 365 B = BB; 366 C = CC; 367 AssociatedLoop = CurLoop; 368 } 369 370 371 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 372 const Loop *CurLoop) { 373 Kind = Distance; 374 A = SE->getConstant(D->getType(), 1); 375 B = SE->getNegativeSCEV(A); 376 C = SE->getNegativeSCEV(D); 377 AssociatedLoop = CurLoop; 378 } 379 380 381 void DependenceAnalysis::Constraint::setEmpty() { 382 Kind = Empty; 383 } 384 385 386 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 387 SE = NewSE; 388 Kind = Any; 389 } 390 391 392 // For debugging purposes. Dumps the constraint out to OS. 393 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 394 if (isEmpty()) 395 OS << " Empty\n"; 396 else if (isAny()) 397 OS << " Any\n"; 398 else if (isPoint()) 399 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 400 else if (isDistance()) 401 OS << " Distance is " << *getD() << 402 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 403 else if (isLine()) 404 OS << " Line is " << *getA() << "*X + " << 405 *getB() << "*Y = " << *getC() << "\n"; 406 else 407 llvm_unreachable("unknown constraint type in Constraint::dump"); 408 } 409 410 411 // Updates X with the intersection 412 // of the Constraints X and Y. Returns true if X has changed. 413 // Corresponds to Figure 4 from the paper 414 // 415 // Practical Dependence Testing 416 // Goff, Kennedy, Tseng 417 // PLDI 1991 418 bool DependenceAnalysis::intersectConstraints(Constraint *X, 419 const Constraint *Y) { 420 ++DeltaApplications; 421 DEBUG(dbgs() << "\tintersect constraints\n"); 422 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 423 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 424 assert(!Y->isPoint() && "Y must not be a Point"); 425 if (X->isAny()) { 426 if (Y->isAny()) 427 return false; 428 *X = *Y; 429 return true; 430 } 431 if (X->isEmpty()) 432 return false; 433 if (Y->isEmpty()) { 434 X->setEmpty(); 435 return true; 436 } 437 438 if (X->isDistance() && Y->isDistance()) { 439 DEBUG(dbgs() << "\t intersect 2 distances\n"); 440 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 441 return false; 442 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 443 X->setEmpty(); 444 ++DeltaSuccesses; 445 return true; 446 } 447 // Hmmm, interesting situation. 448 // I guess if either is constant, keep it and ignore the other. 449 if (isa<SCEVConstant>(Y->getD())) { 450 *X = *Y; 451 return true; 452 } 453 return false; 454 } 455 456 // At this point, the pseudo-code in Figure 4 of the paper 457 // checks if (X->isPoint() && Y->isPoint()). 458 // This case can't occur in our implementation, 459 // since a Point can only arise as the result of intersecting 460 // two Line constraints, and the right-hand value, Y, is never 461 // the result of an intersection. 462 assert(!(X->isPoint() && Y->isPoint()) && 463 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 464 465 if (X->isLine() && Y->isLine()) { 466 DEBUG(dbgs() << "\t intersect 2 lines\n"); 467 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 468 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 469 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 470 // slopes are equal, so lines are parallel 471 DEBUG(dbgs() << "\t\tsame slope\n"); 472 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 473 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 474 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 475 return false; 476 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 477 X->setEmpty(); 478 ++DeltaSuccesses; 479 return true; 480 } 481 return false; 482 } 483 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 484 // slopes differ, so lines intersect 485 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 486 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 487 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 488 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 489 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 490 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 491 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 492 const SCEVConstant *C1A2_C2A1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 494 const SCEVConstant *C1B2_C2B1 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 496 const SCEVConstant *A1B2_A2B1 = 497 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 498 const SCEVConstant *A2B1_A1B2 = 499 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 500 if (!C1B2_C2B1 || !C1A2_C2A1 || 501 !A1B2_A2B1 || !A2B1_A1B2) 502 return false; 503 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 504 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 505 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 506 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 507 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 508 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 509 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 510 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 511 APInt Xq = Xtop; // these need to be initialized, even 512 APInt Xr = Xtop; // though they're just going to be overwritten 513 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 514 APInt Yq = Ytop; 515 APInt Yr = Ytop; 516 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 517 if (Xr != 0 || Yr != 0) { 518 X->setEmpty(); 519 ++DeltaSuccesses; 520 return true; 521 } 522 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 523 if (Xq.slt(0) || Yq.slt(0)) { 524 X->setEmpty(); 525 ++DeltaSuccesses; 526 return true; 527 } 528 if (const SCEVConstant *CUB = 529 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 530 APInt UpperBound = CUB->getValue()->getValue(); 531 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 532 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 533 X->setEmpty(); 534 ++DeltaSuccesses; 535 return true; 536 } 537 } 538 X->setPoint(SE->getConstant(Xq), 539 SE->getConstant(Yq), 540 X->getAssociatedLoop()); 541 ++DeltaSuccesses; 542 return true; 543 } 544 return false; 545 } 546 547 // if (X->isLine() && Y->isPoint()) This case can't occur. 548 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 549 550 if (X->isPoint() && Y->isLine()) { 551 DEBUG(dbgs() << "\t intersect Point and Line\n"); 552 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 553 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 554 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 555 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 556 return false; 557 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 558 X->setEmpty(); 559 ++DeltaSuccesses; 560 return true; 561 } 562 return false; 563 } 564 565 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 566 return false; 567 } 568 569 570 //===----------------------------------------------------------------------===// 571 // DependenceAnalysis methods 572 573 // For debugging purposes. Dumps a dependence to OS. 574 void Dependence::dump(raw_ostream &OS) const { 575 bool Splitable = false; 576 if (isConfused()) 577 OS << "confused"; 578 else { 579 if (isConsistent()) 580 OS << "consistent "; 581 if (isFlow()) 582 OS << "flow"; 583 else if (isOutput()) 584 OS << "output"; 585 else if (isAnti()) 586 OS << "anti"; 587 else if (isInput()) 588 OS << "input"; 589 unsigned Levels = getLevels(); 590 OS << " ["; 591 for (unsigned II = 1; II <= Levels; ++II) { 592 if (isSplitable(II)) 593 Splitable = true; 594 if (isPeelFirst(II)) 595 OS << 'p'; 596 const SCEV *Distance = getDistance(II); 597 if (Distance) 598 OS << *Distance; 599 else if (isScalar(II)) 600 OS << "S"; 601 else { 602 unsigned Direction = getDirection(II); 603 if (Direction == DVEntry::ALL) 604 OS << "*"; 605 else { 606 if (Direction & DVEntry::LT) 607 OS << "<"; 608 if (Direction & DVEntry::EQ) 609 OS << "="; 610 if (Direction & DVEntry::GT) 611 OS << ">"; 612 } 613 } 614 if (isPeelLast(II)) 615 OS << 'p'; 616 if (II < Levels) 617 OS << " "; 618 } 619 if (isLoopIndependent()) 620 OS << "|<"; 621 OS << "]"; 622 if (Splitable) 623 OS << " splitable"; 624 } 625 OS << "!\n"; 626 } 627 628 static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 629 const DataLayout &DL, 630 const Value *A, 631 const Value *B) { 632 const Value *AObj = GetUnderlyingObject(A, DL); 633 const Value *BObj = GetUnderlyingObject(B, DL); 634 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 635 BObj, AA->getTypeStoreSize(BObj->getType())); 636 } 637 638 639 // Returns true if the load or store can be analyzed. Atomic and volatile 640 // operations have properties which this analysis does not understand. 641 static 642 bool isLoadOrStore(const Instruction *I) { 643 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 644 return LI->isUnordered(); 645 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 646 return SI->isUnordered(); 647 return false; 648 } 649 650 651 static 652 Value *getPointerOperand(Instruction *I) { 653 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 654 return LI->getPointerOperand(); 655 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 656 return SI->getPointerOperand(); 657 llvm_unreachable("Value is not load or store instruction"); 658 return nullptr; 659 } 660 661 662 // Examines the loop nesting of the Src and Dst 663 // instructions and establishes their shared loops. Sets the variables 664 // CommonLevels, SrcLevels, and MaxLevels. 665 // The source and destination instructions needn't be contained in the same 666 // loop. The routine establishNestingLevels finds the level of most deeply 667 // nested loop that contains them both, CommonLevels. An instruction that's 668 // not contained in a loop is at level = 0. MaxLevels is equal to the level 669 // of the source plus the level of the destination, minus CommonLevels. 670 // This lets us allocate vectors MaxLevels in length, with room for every 671 // distinct loop referenced in both the source and destination subscripts. 672 // The variable SrcLevels is the nesting depth of the source instruction. 673 // It's used to help calculate distinct loops referenced by the destination. 674 // Here's the map from loops to levels: 675 // 0 - unused 676 // 1 - outermost common loop 677 // ... - other common loops 678 // CommonLevels - innermost common loop 679 // ... - loops containing Src but not Dst 680 // SrcLevels - innermost loop containing Src but not Dst 681 // ... - loops containing Dst but not Src 682 // MaxLevels - innermost loops containing Dst but not Src 683 // Consider the follow code fragment: 684 // for (a = ...) { 685 // for (b = ...) { 686 // for (c = ...) { 687 // for (d = ...) { 688 // A[] = ...; 689 // } 690 // } 691 // for (e = ...) { 692 // for (f = ...) { 693 // for (g = ...) { 694 // ... = A[]; 695 // } 696 // } 697 // } 698 // } 699 // } 700 // If we're looking at the possibility of a dependence between the store 701 // to A (the Src) and the load from A (the Dst), we'll note that they 702 // have 2 loops in common, so CommonLevels will equal 2 and the direction 703 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 704 // A map from loop names to loop numbers would look like 705 // a - 1 706 // b - 2 = CommonLevels 707 // c - 3 708 // d - 4 = SrcLevels 709 // e - 5 710 // f - 6 711 // g - 7 = MaxLevels 712 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 713 const Instruction *Dst) { 714 const BasicBlock *SrcBlock = Src->getParent(); 715 const BasicBlock *DstBlock = Dst->getParent(); 716 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 717 unsigned DstLevel = LI->getLoopDepth(DstBlock); 718 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 719 const Loop *DstLoop = LI->getLoopFor(DstBlock); 720 SrcLevels = SrcLevel; 721 MaxLevels = SrcLevel + DstLevel; 722 while (SrcLevel > DstLevel) { 723 SrcLoop = SrcLoop->getParentLoop(); 724 SrcLevel--; 725 } 726 while (DstLevel > SrcLevel) { 727 DstLoop = DstLoop->getParentLoop(); 728 DstLevel--; 729 } 730 while (SrcLoop != DstLoop) { 731 SrcLoop = SrcLoop->getParentLoop(); 732 DstLoop = DstLoop->getParentLoop(); 733 SrcLevel--; 734 } 735 CommonLevels = SrcLevel; 736 MaxLevels -= CommonLevels; 737 } 738 739 740 // Given one of the loops containing the source, return 741 // its level index in our numbering scheme. 742 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 743 return SrcLoop->getLoopDepth(); 744 } 745 746 747 // Given one of the loops containing the destination, 748 // return its level index in our numbering scheme. 749 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 750 unsigned D = DstLoop->getLoopDepth(); 751 if (D > CommonLevels) 752 return D - CommonLevels + SrcLevels; 753 else 754 return D; 755 } 756 757 758 // Returns true if Expression is loop invariant in LoopNest. 759 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 760 const Loop *LoopNest) const { 761 if (!LoopNest) 762 return true; 763 return SE->isLoopInvariant(Expression, LoopNest) && 764 isLoopInvariant(Expression, LoopNest->getParentLoop()); 765 } 766 767 768 769 // Finds the set of loops from the LoopNest that 770 // have a level <= CommonLevels and are referred to by the SCEV Expression. 771 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 772 const Loop *LoopNest, 773 SmallBitVector &Loops) const { 774 while (LoopNest) { 775 unsigned Level = LoopNest->getLoopDepth(); 776 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 777 Loops.set(Level); 778 LoopNest = LoopNest->getParentLoop(); 779 } 780 } 781 782 void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { 783 const SCEV *Src = Pair->Src; 784 const SCEV *Dst = Pair->Dst; 785 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 786 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 787 if (SrcTy == nullptr || DstTy == nullptr) { 788 assert(SrcTy == DstTy && "This function only unify integer types and " 789 "expect Src and Dst share the same type " 790 "otherwise."); 791 return; 792 } 793 if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { 794 // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). 795 Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); 796 } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { 797 // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). 798 Pair->Src = SE->getSignExtendExpr(Src, DstTy); 799 } 800 } 801 802 // removeMatchingExtensions - Examines a subscript pair. 803 // If the source and destination are identically sign (or zero) 804 // extended, it strips off the extension in an effect to simplify 805 // the actual analysis. 806 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 807 const SCEV *Src = Pair->Src; 808 const SCEV *Dst = Pair->Dst; 809 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 810 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 811 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 812 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 813 const SCEV *SrcCastOp = SrcCast->getOperand(); 814 const SCEV *DstCastOp = DstCast->getOperand(); 815 if (SrcCastOp->getType() == DstCastOp->getType()) { 816 Pair->Src = SrcCastOp; 817 Pair->Dst = DstCastOp; 818 } 819 } 820 } 821 822 823 // Examine the scev and return true iff it's linear. 824 // Collect any loops mentioned in the set of "Loops". 825 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 826 const Loop *LoopNest, 827 SmallBitVector &Loops) { 828 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 829 if (!AddRec) 830 return isLoopInvariant(Src, LoopNest); 831 const SCEV *Start = AddRec->getStart(); 832 const SCEV *Step = AddRec->getStepRecurrence(*SE); 833 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 834 if (!isa<SCEVCouldNotCompute>(UB)) { 835 if (SE->getTypeSizeInBits(Start->getType()) < 836 SE->getTypeSizeInBits(UB->getType())) { 837 if (!AddRec->getNoWrapFlags()) 838 return false; 839 } 840 } 841 if (!isLoopInvariant(Step, LoopNest)) 842 return false; 843 Loops.set(mapSrcLoop(AddRec->getLoop())); 844 return checkSrcSubscript(Start, LoopNest, Loops); 845 } 846 847 848 849 // Examine the scev and return true iff it's linear. 850 // Collect any loops mentioned in the set of "Loops". 851 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 852 const Loop *LoopNest, 853 SmallBitVector &Loops) { 854 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 855 if (!AddRec) 856 return isLoopInvariant(Dst, LoopNest); 857 const SCEV *Start = AddRec->getStart(); 858 const SCEV *Step = AddRec->getStepRecurrence(*SE); 859 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 860 if (!isa<SCEVCouldNotCompute>(UB)) { 861 if (SE->getTypeSizeInBits(Start->getType()) < 862 SE->getTypeSizeInBits(UB->getType())) { 863 if (!AddRec->getNoWrapFlags()) 864 return false; 865 } 866 } 867 if (!isLoopInvariant(Step, LoopNest)) 868 return false; 869 Loops.set(mapDstLoop(AddRec->getLoop())); 870 return checkDstSubscript(Start, LoopNest, Loops); 871 } 872 873 874 // Examines the subscript pair (the Src and Dst SCEVs) 875 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 876 // Collects the associated loops in a set. 877 DependenceAnalysis::Subscript::ClassificationKind 878 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 879 const SCEV *Dst, const Loop *DstLoopNest, 880 SmallBitVector &Loops) { 881 SmallBitVector SrcLoops(MaxLevels + 1); 882 SmallBitVector DstLoops(MaxLevels + 1); 883 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 884 return Subscript::NonLinear; 885 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 886 return Subscript::NonLinear; 887 Loops = SrcLoops; 888 Loops |= DstLoops; 889 unsigned N = Loops.count(); 890 if (N == 0) 891 return Subscript::ZIV; 892 if (N == 1) 893 return Subscript::SIV; 894 if (N == 2 && (SrcLoops.count() == 0 || 895 DstLoops.count() == 0 || 896 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 897 return Subscript::RDIV; 898 return Subscript::MIV; 899 } 900 901 902 // A wrapper around SCEV::isKnownPredicate. 903 // Looks for cases where we're interested in comparing for equality. 904 // If both X and Y have been identically sign or zero extended, 905 // it strips off the (confusing) extensions before invoking 906 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 907 // will be similarly updated. 908 // 909 // If SCEV::isKnownPredicate can't prove the predicate, 910 // we try simple subtraction, which seems to help in some cases 911 // involving symbolics. 912 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 913 const SCEV *X, 914 const SCEV *Y) const { 915 if (Pred == CmpInst::ICMP_EQ || 916 Pred == CmpInst::ICMP_NE) { 917 if ((isa<SCEVSignExtendExpr>(X) && 918 isa<SCEVSignExtendExpr>(Y)) || 919 (isa<SCEVZeroExtendExpr>(X) && 920 isa<SCEVZeroExtendExpr>(Y))) { 921 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 922 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 923 const SCEV *Xop = CX->getOperand(); 924 const SCEV *Yop = CY->getOperand(); 925 if (Xop->getType() == Yop->getType()) { 926 X = Xop; 927 Y = Yop; 928 } 929 } 930 } 931 if (SE->isKnownPredicate(Pred, X, Y)) 932 return true; 933 // If SE->isKnownPredicate can't prove the condition, 934 // we try the brute-force approach of subtracting 935 // and testing the difference. 936 // By testing with SE->isKnownPredicate first, we avoid 937 // the possibility of overflow when the arguments are constants. 938 const SCEV *Delta = SE->getMinusSCEV(X, Y); 939 switch (Pred) { 940 case CmpInst::ICMP_EQ: 941 return Delta->isZero(); 942 case CmpInst::ICMP_NE: 943 return SE->isKnownNonZero(Delta); 944 case CmpInst::ICMP_SGE: 945 return SE->isKnownNonNegative(Delta); 946 case CmpInst::ICMP_SLE: 947 return SE->isKnownNonPositive(Delta); 948 case CmpInst::ICMP_SGT: 949 return SE->isKnownPositive(Delta); 950 case CmpInst::ICMP_SLT: 951 return SE->isKnownNegative(Delta); 952 default: 953 llvm_unreachable("unexpected predicate in isKnownPredicate"); 954 } 955 } 956 957 958 // All subscripts are all the same type. 959 // Loop bound may be smaller (e.g., a char). 960 // Should zero extend loop bound, since it's always >= 0. 961 // This routine collects upper bound and extends or truncates if needed. 962 // Truncating is safe when subscripts are known not to wrap. Cases without 963 // nowrap flags should have been rejected earlier. 964 // Return null if no bound available. 965 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 966 Type *T) const { 967 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 968 const SCEV *UB = SE->getBackedgeTakenCount(L); 969 return SE->getTruncateOrZeroExtend(UB, T); 970 } 971 return nullptr; 972 } 973 974 975 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 976 // If the cast fails, returns NULL. 977 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 978 Type *T 979 ) const { 980 if (const SCEV *UB = collectUpperBound(L, T)) 981 return dyn_cast<SCEVConstant>(UB); 982 return nullptr; 983 } 984 985 986 // testZIV - 987 // When we have a pair of subscripts of the form [c1] and [c2], 988 // where c1 and c2 are both loop invariant, we attack it using 989 // the ZIV test. Basically, we test by comparing the two values, 990 // but there are actually three possible results: 991 // 1) the values are equal, so there's a dependence 992 // 2) the values are different, so there's no dependence 993 // 3) the values might be equal, so we have to assume a dependence. 994 // 995 // Return true if dependence disproved. 996 bool DependenceAnalysis::testZIV(const SCEV *Src, 997 const SCEV *Dst, 998 FullDependence &Result) const { 999 DEBUG(dbgs() << " src = " << *Src << "\n"); 1000 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1001 ++ZIVapplications; 1002 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1003 DEBUG(dbgs() << " provably dependent\n"); 1004 return false; // provably dependent 1005 } 1006 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1007 DEBUG(dbgs() << " provably independent\n"); 1008 ++ZIVindependence; 1009 return true; // provably independent 1010 } 1011 DEBUG(dbgs() << " possibly dependent\n"); 1012 Result.Consistent = false; 1013 return false; // possibly dependent 1014 } 1015 1016 1017 // strongSIVtest - 1018 // From the paper, Practical Dependence Testing, Section 4.2.1 1019 // 1020 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1021 // where i is an induction variable, c1 and c2 are loop invariant, 1022 // and a is a constant, we can solve it exactly using the Strong SIV test. 1023 // 1024 // Can prove independence. Failing that, can compute distance (and direction). 1025 // In the presence of symbolic terms, we can sometimes make progress. 1026 // 1027 // If there's a dependence, 1028 // 1029 // c1 + a*i = c2 + a*i' 1030 // 1031 // The dependence distance is 1032 // 1033 // d = i' - i = (c1 - c2)/a 1034 // 1035 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1036 // loop's upper bound. If a dependence exists, the dependence direction is 1037 // defined as 1038 // 1039 // { < if d > 0 1040 // direction = { = if d = 0 1041 // { > if d < 0 1042 // 1043 // Return true if dependence disproved. 1044 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1045 const SCEV *SrcConst, 1046 const SCEV *DstConst, 1047 const Loop *CurLoop, 1048 unsigned Level, 1049 FullDependence &Result, 1050 Constraint &NewConstraint) const { 1051 DEBUG(dbgs() << "\tStrong SIV test\n"); 1052 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1053 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1054 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1055 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1056 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1057 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1058 ++StrongSIVapplications; 1059 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1060 Level--; 1061 1062 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1063 DEBUG(dbgs() << "\t Delta = " << *Delta); 1064 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1065 1066 // check that |Delta| < iteration count 1067 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1068 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1069 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1070 const SCEV *AbsDelta = 1071 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1072 const SCEV *AbsCoeff = 1073 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1074 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1075 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1076 // Distance greater than trip count - no dependence 1077 ++StrongSIVindependence; 1078 ++StrongSIVsuccesses; 1079 return true; 1080 } 1081 } 1082 1083 // Can we compute distance? 1084 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1085 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1086 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1087 APInt Distance = ConstDelta; // these need to be initialized 1088 APInt Remainder = ConstDelta; 1089 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1090 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1091 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1092 // Make sure Coeff divides Delta exactly 1093 if (Remainder != 0) { 1094 // Coeff doesn't divide Distance, no dependence 1095 ++StrongSIVindependence; 1096 ++StrongSIVsuccesses; 1097 return true; 1098 } 1099 Result.DV[Level].Distance = SE->getConstant(Distance); 1100 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1101 if (Distance.sgt(0)) 1102 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1103 else if (Distance.slt(0)) 1104 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1105 else 1106 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1107 ++StrongSIVsuccesses; 1108 } 1109 else if (Delta->isZero()) { 1110 // since 0/X == 0 1111 Result.DV[Level].Distance = Delta; 1112 NewConstraint.setDistance(Delta, CurLoop); 1113 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1114 ++StrongSIVsuccesses; 1115 } 1116 else { 1117 if (Coeff->isOne()) { 1118 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1119 Result.DV[Level].Distance = Delta; // since X/1 == X 1120 NewConstraint.setDistance(Delta, CurLoop); 1121 } 1122 else { 1123 Result.Consistent = false; 1124 NewConstraint.setLine(Coeff, 1125 SE->getNegativeSCEV(Coeff), 1126 SE->getNegativeSCEV(Delta), CurLoop); 1127 } 1128 1129 // maybe we can get a useful direction 1130 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1131 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1132 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1133 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1134 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1135 // The double negatives above are confusing. 1136 // It helps to read !SE->isKnownNonZero(Delta) 1137 // as "Delta might be Zero" 1138 unsigned NewDirection = Dependence::DVEntry::NONE; 1139 if ((DeltaMaybePositive && CoeffMaybePositive) || 1140 (DeltaMaybeNegative && CoeffMaybeNegative)) 1141 NewDirection = Dependence::DVEntry::LT; 1142 if (DeltaMaybeZero) 1143 NewDirection |= Dependence::DVEntry::EQ; 1144 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1145 (DeltaMaybePositive && CoeffMaybeNegative)) 1146 NewDirection |= Dependence::DVEntry::GT; 1147 if (NewDirection < Result.DV[Level].Direction) 1148 ++StrongSIVsuccesses; 1149 Result.DV[Level].Direction &= NewDirection; 1150 } 1151 return false; 1152 } 1153 1154 1155 // weakCrossingSIVtest - 1156 // From the paper, Practical Dependence Testing, Section 4.2.2 1157 // 1158 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1159 // where i is an induction variable, c1 and c2 are loop invariant, 1160 // and a is a constant, we can solve it exactly using the 1161 // Weak-Crossing SIV test. 1162 // 1163 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1164 // the two lines, where i = i', yielding 1165 // 1166 // c1 + a*i = c2 - a*i 1167 // 2a*i = c2 - c1 1168 // i = (c2 - c1)/2a 1169 // 1170 // If i < 0, there is no dependence. 1171 // If i > upperbound, there is no dependence. 1172 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1173 // If i = upperbound, there's a dependence with distance = 0. 1174 // If i is integral, there's a dependence (all directions). 1175 // If the non-integer part = 1/2, there's a dependence (<> directions). 1176 // Otherwise, there's no dependence. 1177 // 1178 // Can prove independence. Failing that, 1179 // can sometimes refine the directions. 1180 // Can determine iteration for splitting. 1181 // 1182 // Return true if dependence disproved. 1183 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1184 const SCEV *SrcConst, 1185 const SCEV *DstConst, 1186 const Loop *CurLoop, 1187 unsigned Level, 1188 FullDependence &Result, 1189 Constraint &NewConstraint, 1190 const SCEV *&SplitIter) const { 1191 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1192 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1193 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1194 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1195 ++WeakCrossingSIVapplications; 1196 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1197 Level--; 1198 Result.Consistent = false; 1199 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1200 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1201 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1202 if (Delta->isZero()) { 1203 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1204 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1205 ++WeakCrossingSIVsuccesses; 1206 if (!Result.DV[Level].Direction) { 1207 ++WeakCrossingSIVindependence; 1208 return true; 1209 } 1210 Result.DV[Level].Distance = Delta; // = 0 1211 return false; 1212 } 1213 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1214 if (!ConstCoeff) 1215 return false; 1216 1217 Result.DV[Level].Splitable = true; 1218 if (SE->isKnownNegative(ConstCoeff)) { 1219 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1220 assert(ConstCoeff && 1221 "dynamic cast of negative of ConstCoeff should yield constant"); 1222 Delta = SE->getNegativeSCEV(Delta); 1223 } 1224 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1225 1226 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1227 SplitIter = 1228 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1229 Delta), 1230 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1231 ConstCoeff)); 1232 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1233 1234 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1235 if (!ConstDelta) 1236 return false; 1237 1238 // We're certain that ConstCoeff > 0; therefore, 1239 // if Delta < 0, then no dependence. 1240 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1241 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1242 if (SE->isKnownNegative(Delta)) { 1243 // No dependence, Delta < 0 1244 ++WeakCrossingSIVindependence; 1245 ++WeakCrossingSIVsuccesses; 1246 return true; 1247 } 1248 1249 // We're certain that Delta > 0 and ConstCoeff > 0. 1250 // Check Delta/(2*ConstCoeff) against upper loop bound 1251 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1252 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1253 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1254 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1255 ConstantTwo); 1256 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1257 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1258 // Delta too big, no dependence 1259 ++WeakCrossingSIVindependence; 1260 ++WeakCrossingSIVsuccesses; 1261 return true; 1262 } 1263 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1264 // i = i' = UB 1265 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1266 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1267 ++WeakCrossingSIVsuccesses; 1268 if (!Result.DV[Level].Direction) { 1269 ++WeakCrossingSIVindependence; 1270 return true; 1271 } 1272 Result.DV[Level].Splitable = false; 1273 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1274 return false; 1275 } 1276 } 1277 1278 // check that Coeff divides Delta 1279 APInt APDelta = ConstDelta->getValue()->getValue(); 1280 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1281 APInt Distance = APDelta; // these need to be initialzed 1282 APInt Remainder = APDelta; 1283 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1284 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1285 if (Remainder != 0) { 1286 // Coeff doesn't divide Delta, no dependence 1287 ++WeakCrossingSIVindependence; 1288 ++WeakCrossingSIVsuccesses; 1289 return true; 1290 } 1291 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1292 1293 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1294 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1295 Remainder = Distance.srem(Two); 1296 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1297 if (Remainder != 0) { 1298 // Equal direction isn't possible 1299 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1300 ++WeakCrossingSIVsuccesses; 1301 } 1302 return false; 1303 } 1304 1305 1306 // Kirch's algorithm, from 1307 // 1308 // Optimizing Supercompilers for Supercomputers 1309 // Michael Wolfe 1310 // MIT Press, 1989 1311 // 1312 // Program 2.1, page 29. 1313 // Computes the GCD of AM and BM. 1314 // Also finds a solution to the equation ax - by = gcd(a, b). 1315 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1316 static 1317 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1318 APInt &G, APInt &X, APInt &Y) { 1319 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1320 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1321 APInt G0 = AM.abs(); 1322 APInt G1 = BM.abs(); 1323 APInt Q = G0; // these need to be initialized 1324 APInt R = G0; 1325 APInt::sdivrem(G0, G1, Q, R); 1326 while (R != 0) { 1327 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1328 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1329 G0 = G1; G1 = R; 1330 APInt::sdivrem(G0, G1, Q, R); 1331 } 1332 G = G1; 1333 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1334 X = AM.slt(0) ? -A1 : A1; 1335 Y = BM.slt(0) ? B1 : -B1; 1336 1337 // make sure gcd divides Delta 1338 R = Delta.srem(G); 1339 if (R != 0) 1340 return true; // gcd doesn't divide Delta, no dependence 1341 Q = Delta.sdiv(G); 1342 X *= Q; 1343 Y *= Q; 1344 return false; 1345 } 1346 1347 1348 static 1349 APInt floorOfQuotient(APInt A, APInt B) { 1350 APInt Q = A; // these need to be initialized 1351 APInt R = A; 1352 APInt::sdivrem(A, B, Q, R); 1353 if (R == 0) 1354 return Q; 1355 if ((A.sgt(0) && B.sgt(0)) || 1356 (A.slt(0) && B.slt(0))) 1357 return Q; 1358 else 1359 return Q - 1; 1360 } 1361 1362 1363 static 1364 APInt ceilingOfQuotient(APInt A, APInt B) { 1365 APInt Q = A; // these need to be initialized 1366 APInt R = A; 1367 APInt::sdivrem(A, B, Q, R); 1368 if (R == 0) 1369 return Q; 1370 if ((A.sgt(0) && B.sgt(0)) || 1371 (A.slt(0) && B.slt(0))) 1372 return Q + 1; 1373 else 1374 return Q; 1375 } 1376 1377 1378 static 1379 APInt maxAPInt(APInt A, APInt B) { 1380 return A.sgt(B) ? A : B; 1381 } 1382 1383 1384 static 1385 APInt minAPInt(APInt A, APInt B) { 1386 return A.slt(B) ? A : B; 1387 } 1388 1389 1390 // exactSIVtest - 1391 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1392 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1393 // and a2 are constant, we can solve it exactly using an algorithm developed 1394 // by Banerjee and Wolfe. See Section 2.5.3 in 1395 // 1396 // Optimizing Supercompilers for Supercomputers 1397 // Michael Wolfe 1398 // MIT Press, 1989 1399 // 1400 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1401 // so use them if possible. They're also a bit better with symbolics and, 1402 // in the case of the strong SIV test, can compute Distances. 1403 // 1404 // Return true if dependence disproved. 1405 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1406 const SCEV *DstCoeff, 1407 const SCEV *SrcConst, 1408 const SCEV *DstConst, 1409 const Loop *CurLoop, 1410 unsigned Level, 1411 FullDependence &Result, 1412 Constraint &NewConstraint) const { 1413 DEBUG(dbgs() << "\tExact SIV test\n"); 1414 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1415 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1416 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1417 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1418 ++ExactSIVapplications; 1419 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1420 Level--; 1421 Result.Consistent = false; 1422 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1423 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1424 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1425 Delta, CurLoop); 1426 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1427 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1428 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1429 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1430 return false; 1431 1432 // find gcd 1433 APInt G, X, Y; 1434 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1435 APInt BM = ConstDstCoeff->getValue()->getValue(); 1436 unsigned Bits = AM.getBitWidth(); 1437 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1438 // gcd doesn't divide Delta, no dependence 1439 ++ExactSIVindependence; 1440 ++ExactSIVsuccesses; 1441 return true; 1442 } 1443 1444 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1445 1446 // since SCEV construction normalizes, LM = 0 1447 APInt UM(Bits, 1, true); 1448 bool UMvalid = false; 1449 // UM is perhaps unavailable, let's check 1450 if (const SCEVConstant *CUB = 1451 collectConstantUpperBound(CurLoop, Delta->getType())) { 1452 UM = CUB->getValue()->getValue(); 1453 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1454 UMvalid = true; 1455 } 1456 1457 APInt TU(APInt::getSignedMaxValue(Bits)); 1458 APInt TL(APInt::getSignedMinValue(Bits)); 1459 1460 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1461 APInt TMUL = BM.sdiv(G); 1462 if (TMUL.sgt(0)) { 1463 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1464 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1465 if (UMvalid) { 1466 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1467 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1468 } 1469 } 1470 else { 1471 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1472 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1473 if (UMvalid) { 1474 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1475 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1476 } 1477 } 1478 1479 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1480 TMUL = AM.sdiv(G); 1481 if (TMUL.sgt(0)) { 1482 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1483 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1484 if (UMvalid) { 1485 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1486 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1487 } 1488 } 1489 else { 1490 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1491 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1492 if (UMvalid) { 1493 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1494 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1495 } 1496 } 1497 if (TL.sgt(TU)) { 1498 ++ExactSIVindependence; 1499 ++ExactSIVsuccesses; 1500 return true; 1501 } 1502 1503 // explore directions 1504 unsigned NewDirection = Dependence::DVEntry::NONE; 1505 1506 // less than 1507 APInt SaveTU(TU); // save these 1508 APInt SaveTL(TL); 1509 DEBUG(dbgs() << "\t exploring LT direction\n"); 1510 TMUL = AM - BM; 1511 if (TMUL.sgt(0)) { 1512 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1513 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1514 } 1515 else { 1516 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1517 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1518 } 1519 if (TL.sle(TU)) { 1520 NewDirection |= Dependence::DVEntry::LT; 1521 ++ExactSIVsuccesses; 1522 } 1523 1524 // equal 1525 TU = SaveTU; // restore 1526 TL = SaveTL; 1527 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1528 if (TMUL.sgt(0)) { 1529 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1530 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1531 } 1532 else { 1533 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1534 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1535 } 1536 TMUL = BM - AM; 1537 if (TMUL.sgt(0)) { 1538 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1539 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1540 } 1541 else { 1542 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1543 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1544 } 1545 if (TL.sle(TU)) { 1546 NewDirection |= Dependence::DVEntry::EQ; 1547 ++ExactSIVsuccesses; 1548 } 1549 1550 // greater than 1551 TU = SaveTU; // restore 1552 TL = SaveTL; 1553 DEBUG(dbgs() << "\t exploring GT direction\n"); 1554 if (TMUL.sgt(0)) { 1555 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1556 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1557 } 1558 else { 1559 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1560 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1561 } 1562 if (TL.sle(TU)) { 1563 NewDirection |= Dependence::DVEntry::GT; 1564 ++ExactSIVsuccesses; 1565 } 1566 1567 // finished 1568 Result.DV[Level].Direction &= NewDirection; 1569 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1570 ++ExactSIVindependence; 1571 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1572 } 1573 1574 1575 1576 // Return true if the divisor evenly divides the dividend. 1577 static 1578 bool isRemainderZero(const SCEVConstant *Dividend, 1579 const SCEVConstant *Divisor) { 1580 APInt ConstDividend = Dividend->getValue()->getValue(); 1581 APInt ConstDivisor = Divisor->getValue()->getValue(); 1582 return ConstDividend.srem(ConstDivisor) == 0; 1583 } 1584 1585 1586 // weakZeroSrcSIVtest - 1587 // From the paper, Practical Dependence Testing, Section 4.2.2 1588 // 1589 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1590 // where i is an induction variable, c1 and c2 are loop invariant, 1591 // and a is a constant, we can solve it exactly using the 1592 // Weak-Zero SIV test. 1593 // 1594 // Given 1595 // 1596 // c1 = c2 + a*i 1597 // 1598 // we get 1599 // 1600 // (c1 - c2)/a = i 1601 // 1602 // If i is not an integer, there's no dependence. 1603 // If i < 0 or > UB, there's no dependence. 1604 // If i = 0, the direction is <= and peeling the 1605 // 1st iteration will break the dependence. 1606 // If i = UB, the direction is >= and peeling the 1607 // last iteration will break the dependence. 1608 // Otherwise, the direction is *. 1609 // 1610 // Can prove independence. Failing that, we can sometimes refine 1611 // the directions. Can sometimes show that first or last 1612 // iteration carries all the dependences (so worth peeling). 1613 // 1614 // (see also weakZeroDstSIVtest) 1615 // 1616 // Return true if dependence disproved. 1617 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1618 const SCEV *SrcConst, 1619 const SCEV *DstConst, 1620 const Loop *CurLoop, 1621 unsigned Level, 1622 FullDependence &Result, 1623 Constraint &NewConstraint) const { 1624 // For the WeakSIV test, it's possible the loop isn't common to 1625 // the Src and Dst loops. If it isn't, then there's no need to 1626 // record a direction. 1627 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1628 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1629 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1630 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1631 ++WeakZeroSIVapplications; 1632 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1633 Level--; 1634 Result.Consistent = false; 1635 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1636 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1637 DstCoeff, Delta, CurLoop); 1638 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1639 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1640 if (Level < CommonLevels) { 1641 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1642 Result.DV[Level].PeelFirst = true; 1643 ++WeakZeroSIVsuccesses; 1644 } 1645 return false; // dependences caused by first iteration 1646 } 1647 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1648 if (!ConstCoeff) 1649 return false; 1650 const SCEV *AbsCoeff = 1651 SE->isKnownNegative(ConstCoeff) ? 1652 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1653 const SCEV *NewDelta = 1654 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1655 1656 // check that Delta/SrcCoeff < iteration count 1657 // really check NewDelta < count*AbsCoeff 1658 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1659 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1660 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1661 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1662 ++WeakZeroSIVindependence; 1663 ++WeakZeroSIVsuccesses; 1664 return true; 1665 } 1666 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1667 // dependences caused by last iteration 1668 if (Level < CommonLevels) { 1669 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1670 Result.DV[Level].PeelLast = true; 1671 ++WeakZeroSIVsuccesses; 1672 } 1673 return false; 1674 } 1675 } 1676 1677 // check that Delta/SrcCoeff >= 0 1678 // really check that NewDelta >= 0 1679 if (SE->isKnownNegative(NewDelta)) { 1680 // No dependence, newDelta < 0 1681 ++WeakZeroSIVindependence; 1682 ++WeakZeroSIVsuccesses; 1683 return true; 1684 } 1685 1686 // if SrcCoeff doesn't divide Delta, then no dependence 1687 if (isa<SCEVConstant>(Delta) && 1688 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1689 ++WeakZeroSIVindependence; 1690 ++WeakZeroSIVsuccesses; 1691 return true; 1692 } 1693 return false; 1694 } 1695 1696 1697 // weakZeroDstSIVtest - 1698 // From the paper, Practical Dependence Testing, Section 4.2.2 1699 // 1700 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1701 // where i is an induction variable, c1 and c2 are loop invariant, 1702 // and a is a constant, we can solve it exactly using the 1703 // Weak-Zero SIV test. 1704 // 1705 // Given 1706 // 1707 // c1 + a*i = c2 1708 // 1709 // we get 1710 // 1711 // i = (c2 - c1)/a 1712 // 1713 // If i is not an integer, there's no dependence. 1714 // If i < 0 or > UB, there's no dependence. 1715 // If i = 0, the direction is <= and peeling the 1716 // 1st iteration will break the dependence. 1717 // If i = UB, the direction is >= and peeling the 1718 // last iteration will break the dependence. 1719 // Otherwise, the direction is *. 1720 // 1721 // Can prove independence. Failing that, we can sometimes refine 1722 // the directions. Can sometimes show that first or last 1723 // iteration carries all the dependences (so worth peeling). 1724 // 1725 // (see also weakZeroSrcSIVtest) 1726 // 1727 // Return true if dependence disproved. 1728 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1729 const SCEV *SrcConst, 1730 const SCEV *DstConst, 1731 const Loop *CurLoop, 1732 unsigned Level, 1733 FullDependence &Result, 1734 Constraint &NewConstraint) const { 1735 // For the WeakSIV test, it's possible the loop isn't common to the 1736 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1737 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1738 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1739 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1740 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1741 ++WeakZeroSIVapplications; 1742 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1743 Level--; 1744 Result.Consistent = false; 1745 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1746 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1747 Delta, CurLoop); 1748 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1749 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1750 if (Level < CommonLevels) { 1751 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1752 Result.DV[Level].PeelFirst = true; 1753 ++WeakZeroSIVsuccesses; 1754 } 1755 return false; // dependences caused by first iteration 1756 } 1757 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1758 if (!ConstCoeff) 1759 return false; 1760 const SCEV *AbsCoeff = 1761 SE->isKnownNegative(ConstCoeff) ? 1762 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1763 const SCEV *NewDelta = 1764 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1765 1766 // check that Delta/SrcCoeff < iteration count 1767 // really check NewDelta < count*AbsCoeff 1768 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1769 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1770 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1771 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1772 ++WeakZeroSIVindependence; 1773 ++WeakZeroSIVsuccesses; 1774 return true; 1775 } 1776 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1777 // dependences caused by last iteration 1778 if (Level < CommonLevels) { 1779 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1780 Result.DV[Level].PeelLast = true; 1781 ++WeakZeroSIVsuccesses; 1782 } 1783 return false; 1784 } 1785 } 1786 1787 // check that Delta/SrcCoeff >= 0 1788 // really check that NewDelta >= 0 1789 if (SE->isKnownNegative(NewDelta)) { 1790 // No dependence, newDelta < 0 1791 ++WeakZeroSIVindependence; 1792 ++WeakZeroSIVsuccesses; 1793 return true; 1794 } 1795 1796 // if SrcCoeff doesn't divide Delta, then no dependence 1797 if (isa<SCEVConstant>(Delta) && 1798 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1799 ++WeakZeroSIVindependence; 1800 ++WeakZeroSIVsuccesses; 1801 return true; 1802 } 1803 return false; 1804 } 1805 1806 1807 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1808 // Things of the form [c1 + a*i] and [c2 + b*j], 1809 // where i and j are induction variable, c1 and c2 are loop invariant, 1810 // and a and b are constants. 1811 // Returns true if any possible dependence is disproved. 1812 // Marks the result as inconsistent. 1813 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1814 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1815 const SCEV *DstCoeff, 1816 const SCEV *SrcConst, 1817 const SCEV *DstConst, 1818 const Loop *SrcLoop, 1819 const Loop *DstLoop, 1820 FullDependence &Result) const { 1821 DEBUG(dbgs() << "\tExact RDIV test\n"); 1822 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1823 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1824 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1825 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1826 ++ExactRDIVapplications; 1827 Result.Consistent = false; 1828 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1829 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1830 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1831 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1832 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1833 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1834 return false; 1835 1836 // find gcd 1837 APInt G, X, Y; 1838 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1839 APInt BM = ConstDstCoeff->getValue()->getValue(); 1840 unsigned Bits = AM.getBitWidth(); 1841 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1842 // gcd doesn't divide Delta, no dependence 1843 ++ExactRDIVindependence; 1844 return true; 1845 } 1846 1847 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1848 1849 // since SCEV construction seems to normalize, LM = 0 1850 APInt SrcUM(Bits, 1, true); 1851 bool SrcUMvalid = false; 1852 // SrcUM is perhaps unavailable, let's check 1853 if (const SCEVConstant *UpperBound = 1854 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1855 SrcUM = UpperBound->getValue()->getValue(); 1856 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1857 SrcUMvalid = true; 1858 } 1859 1860 APInt DstUM(Bits, 1, true); 1861 bool DstUMvalid = false; 1862 // UM is perhaps unavailable, let's check 1863 if (const SCEVConstant *UpperBound = 1864 collectConstantUpperBound(DstLoop, Delta->getType())) { 1865 DstUM = UpperBound->getValue()->getValue(); 1866 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1867 DstUMvalid = true; 1868 } 1869 1870 APInt TU(APInt::getSignedMaxValue(Bits)); 1871 APInt TL(APInt::getSignedMinValue(Bits)); 1872 1873 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1874 APInt TMUL = BM.sdiv(G); 1875 if (TMUL.sgt(0)) { 1876 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1877 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1878 if (SrcUMvalid) { 1879 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1880 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1881 } 1882 } 1883 else { 1884 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1885 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1886 if (SrcUMvalid) { 1887 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1888 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1889 } 1890 } 1891 1892 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1893 TMUL = AM.sdiv(G); 1894 if (TMUL.sgt(0)) { 1895 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1896 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1897 if (DstUMvalid) { 1898 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1899 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1900 } 1901 } 1902 else { 1903 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1904 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1905 if (DstUMvalid) { 1906 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1907 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1908 } 1909 } 1910 if (TL.sgt(TU)) 1911 ++ExactRDIVindependence; 1912 return TL.sgt(TU); 1913 } 1914 1915 1916 // symbolicRDIVtest - 1917 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1918 // introduce a special case of Banerjee's Inequalities (also called the 1919 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1920 // particularly cases with symbolics. Since it's only able to disprove 1921 // dependence (not compute distances or directions), we'll use it as a 1922 // fall back for the other tests. 1923 // 1924 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1925 // where i and j are induction variables and c1 and c2 are loop invariants, 1926 // we can use the symbolic tests to disprove some dependences, serving as a 1927 // backup for the RDIV test. Note that i and j can be the same variable, 1928 // letting this test serve as a backup for the various SIV tests. 1929 // 1930 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1931 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1932 // loop bounds for the i and j loops, respectively. So, ... 1933 // 1934 // c1 + a1*i = c2 + a2*j 1935 // a1*i - a2*j = c2 - c1 1936 // 1937 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1938 // range of the maximum and minimum possible values of a1*i - a2*j. 1939 // Considering the signs of a1 and a2, we have 4 possible cases: 1940 // 1941 // 1) If a1 >= 0 and a2 >= 0, then 1942 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1943 // -a2*N2 <= c2 - c1 <= a1*N1 1944 // 1945 // 2) If a1 >= 0 and a2 <= 0, then 1946 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1947 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1948 // 1949 // 3) If a1 <= 0 and a2 >= 0, then 1950 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1951 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1952 // 1953 // 4) If a1 <= 0 and a2 <= 0, then 1954 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1955 // a1*N1 <= c2 - c1 <= -a2*N2 1956 // 1957 // return true if dependence disproved 1958 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1959 const SCEV *A2, 1960 const SCEV *C1, 1961 const SCEV *C2, 1962 const Loop *Loop1, 1963 const Loop *Loop2) const { 1964 ++SymbolicRDIVapplications; 1965 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1966 DEBUG(dbgs() << "\t A1 = " << *A1); 1967 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1968 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1969 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1970 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1971 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1972 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1973 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1974 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1975 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1976 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1977 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1978 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1979 if (SE->isKnownNonNegative(A1)) { 1980 if (SE->isKnownNonNegative(A2)) { 1981 // A1 >= 0 && A2 >= 0 1982 if (N1) { 1983 // make sure that c2 - c1 <= a1*N1 1984 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1985 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1986 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1987 ++SymbolicRDIVindependence; 1988 return true; 1989 } 1990 } 1991 if (N2) { 1992 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1993 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1994 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1995 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1996 ++SymbolicRDIVindependence; 1997 return true; 1998 } 1999 } 2000 } 2001 else if (SE->isKnownNonPositive(A2)) { 2002 // a1 >= 0 && a2 <= 0 2003 if (N1 && N2) { 2004 // make sure that c2 - c1 <= a1*N1 - a2*N2 2005 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2006 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2007 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2008 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2009 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2010 ++SymbolicRDIVindependence; 2011 return true; 2012 } 2013 } 2014 // make sure that 0 <= c2 - c1 2015 if (SE->isKnownNegative(C2_C1)) { 2016 ++SymbolicRDIVindependence; 2017 return true; 2018 } 2019 } 2020 } 2021 else if (SE->isKnownNonPositive(A1)) { 2022 if (SE->isKnownNonNegative(A2)) { 2023 // a1 <= 0 && a2 >= 0 2024 if (N1 && N2) { 2025 // make sure that a1*N1 - a2*N2 <= c2 - c1 2026 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2027 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2028 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2029 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2030 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2031 ++SymbolicRDIVindependence; 2032 return true; 2033 } 2034 } 2035 // make sure that c2 - c1 <= 0 2036 if (SE->isKnownPositive(C2_C1)) { 2037 ++SymbolicRDIVindependence; 2038 return true; 2039 } 2040 } 2041 else if (SE->isKnownNonPositive(A2)) { 2042 // a1 <= 0 && a2 <= 0 2043 if (N1) { 2044 // make sure that a1*N1 <= c2 - c1 2045 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2046 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2047 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2048 ++SymbolicRDIVindependence; 2049 return true; 2050 } 2051 } 2052 if (N2) { 2053 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2054 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2055 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2056 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2057 ++SymbolicRDIVindependence; 2058 return true; 2059 } 2060 } 2061 } 2062 } 2063 return false; 2064 } 2065 2066 2067 // testSIV - 2068 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2069 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2070 // a2 are constant, we attack it with an SIV test. While they can all be 2071 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2072 // they apply; they're cheaper and sometimes more precise. 2073 // 2074 // Return true if dependence disproved. 2075 bool DependenceAnalysis::testSIV(const SCEV *Src, 2076 const SCEV *Dst, 2077 unsigned &Level, 2078 FullDependence &Result, 2079 Constraint &NewConstraint, 2080 const SCEV *&SplitIter) const { 2081 DEBUG(dbgs() << " src = " << *Src << "\n"); 2082 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2083 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2084 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2085 if (SrcAddRec && DstAddRec) { 2086 const SCEV *SrcConst = SrcAddRec->getStart(); 2087 const SCEV *DstConst = DstAddRec->getStart(); 2088 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2089 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2090 const Loop *CurLoop = SrcAddRec->getLoop(); 2091 assert(CurLoop == DstAddRec->getLoop() && 2092 "both loops in SIV should be same"); 2093 Level = mapSrcLoop(CurLoop); 2094 bool disproven; 2095 if (SrcCoeff == DstCoeff) 2096 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2097 Level, Result, NewConstraint); 2098 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2099 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2100 Level, Result, NewConstraint, SplitIter); 2101 else 2102 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2103 Level, Result, NewConstraint); 2104 return disproven || 2105 gcdMIVtest(Src, Dst, Result) || 2106 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2107 } 2108 if (SrcAddRec) { 2109 const SCEV *SrcConst = SrcAddRec->getStart(); 2110 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2111 const SCEV *DstConst = Dst; 2112 const Loop *CurLoop = SrcAddRec->getLoop(); 2113 Level = mapSrcLoop(CurLoop); 2114 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2115 Level, Result, NewConstraint) || 2116 gcdMIVtest(Src, Dst, Result); 2117 } 2118 if (DstAddRec) { 2119 const SCEV *DstConst = DstAddRec->getStart(); 2120 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2121 const SCEV *SrcConst = Src; 2122 const Loop *CurLoop = DstAddRec->getLoop(); 2123 Level = mapDstLoop(CurLoop); 2124 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2125 CurLoop, Level, Result, NewConstraint) || 2126 gcdMIVtest(Src, Dst, Result); 2127 } 2128 llvm_unreachable("SIV test expected at least one AddRec"); 2129 return false; 2130 } 2131 2132 2133 // testRDIV - 2134 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2135 // where i and j are induction variables, c1 and c2 are loop invariant, 2136 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2137 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2138 // It doesn't make sense to talk about distance or direction in this case, 2139 // so there's no point in making special versions of the Strong SIV test or 2140 // the Weak-crossing SIV test. 2141 // 2142 // With minor algebra, this test can also be used for things like 2143 // [c1 + a1*i + a2*j][c2]. 2144 // 2145 // Return true if dependence disproved. 2146 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2147 const SCEV *Dst, 2148 FullDependence &Result) const { 2149 // we have 3 possible situations here: 2150 // 1) [a*i + b] and [c*j + d] 2151 // 2) [a*i + c*j + b] and [d] 2152 // 3) [b] and [a*i + c*j + d] 2153 // We need to find what we've got and get organized 2154 2155 const SCEV *SrcConst, *DstConst; 2156 const SCEV *SrcCoeff, *DstCoeff; 2157 const Loop *SrcLoop, *DstLoop; 2158 2159 DEBUG(dbgs() << " src = " << *Src << "\n"); 2160 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2161 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2162 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2163 if (SrcAddRec && DstAddRec) { 2164 SrcConst = SrcAddRec->getStart(); 2165 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2166 SrcLoop = SrcAddRec->getLoop(); 2167 DstConst = DstAddRec->getStart(); 2168 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2169 DstLoop = DstAddRec->getLoop(); 2170 } 2171 else if (SrcAddRec) { 2172 if (const SCEVAddRecExpr *tmpAddRec = 2173 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2174 SrcConst = tmpAddRec->getStart(); 2175 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2176 SrcLoop = tmpAddRec->getLoop(); 2177 DstConst = Dst; 2178 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2179 DstLoop = SrcAddRec->getLoop(); 2180 } 2181 else 2182 llvm_unreachable("RDIV reached by surprising SCEVs"); 2183 } 2184 else if (DstAddRec) { 2185 if (const SCEVAddRecExpr *tmpAddRec = 2186 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2187 DstConst = tmpAddRec->getStart(); 2188 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2189 DstLoop = tmpAddRec->getLoop(); 2190 SrcConst = Src; 2191 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2192 SrcLoop = DstAddRec->getLoop(); 2193 } 2194 else 2195 llvm_unreachable("RDIV reached by surprising SCEVs"); 2196 } 2197 else 2198 llvm_unreachable("RDIV expected at least one AddRec"); 2199 return exactRDIVtest(SrcCoeff, DstCoeff, 2200 SrcConst, DstConst, 2201 SrcLoop, DstLoop, 2202 Result) || 2203 gcdMIVtest(Src, Dst, Result) || 2204 symbolicRDIVtest(SrcCoeff, DstCoeff, 2205 SrcConst, DstConst, 2206 SrcLoop, DstLoop); 2207 } 2208 2209 2210 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2211 // Return true if dependence disproved. 2212 // Can sometimes refine direction vectors. 2213 bool DependenceAnalysis::testMIV(const SCEV *Src, 2214 const SCEV *Dst, 2215 const SmallBitVector &Loops, 2216 FullDependence &Result) const { 2217 DEBUG(dbgs() << " src = " << *Src << "\n"); 2218 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2219 Result.Consistent = false; 2220 return gcdMIVtest(Src, Dst, Result) || 2221 banerjeeMIVtest(Src, Dst, Loops, Result); 2222 } 2223 2224 2225 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2226 // in this case 10. If there is no constant part, returns NULL. 2227 static 2228 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2229 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2230 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2231 return Constant; 2232 } 2233 return nullptr; 2234 } 2235 2236 2237 //===----------------------------------------------------------------------===// 2238 // gcdMIVtest - 2239 // Tests an MIV subscript pair for dependence. 2240 // Returns true if any possible dependence is disproved. 2241 // Marks the result as inconsistent. 2242 // Can sometimes disprove the equal direction for 1 or more loops, 2243 // as discussed in Michael Wolfe's book, 2244 // High Performance Compilers for Parallel Computing, page 235. 2245 // 2246 // We spend some effort (code!) to handle cases like 2247 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2248 // but M and N are just loop-invariant variables. 2249 // This should help us handle linearized subscripts; 2250 // also makes this test a useful backup to the various SIV tests. 2251 // 2252 // It occurs to me that the presence of loop-invariant variables 2253 // changes the nature of the test from "greatest common divisor" 2254 // to "a common divisor". 2255 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2256 const SCEV *Dst, 2257 FullDependence &Result) const { 2258 DEBUG(dbgs() << "starting gcd\n"); 2259 ++GCDapplications; 2260 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2261 APInt RunningGCD = APInt::getNullValue(BitWidth); 2262 2263 // Examine Src coefficients. 2264 // Compute running GCD and record source constant. 2265 // Because we're looking for the constant at the end of the chain, 2266 // we can't quit the loop just because the GCD == 1. 2267 const SCEV *Coefficients = Src; 2268 while (const SCEVAddRecExpr *AddRec = 2269 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2270 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2271 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2272 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2273 // If the coefficient is the product of a constant and other stuff, 2274 // we can use the constant in the GCD computation. 2275 Constant = getConstantPart(Product); 2276 if (!Constant) 2277 return false; 2278 APInt ConstCoeff = Constant->getValue()->getValue(); 2279 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2280 Coefficients = AddRec->getStart(); 2281 } 2282 const SCEV *SrcConst = Coefficients; 2283 2284 // Examine Dst coefficients. 2285 // Compute running GCD and record destination constant. 2286 // Because we're looking for the constant at the end of the chain, 2287 // we can't quit the loop just because the GCD == 1. 2288 Coefficients = Dst; 2289 while (const SCEVAddRecExpr *AddRec = 2290 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2291 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2292 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2293 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2294 // If the coefficient is the product of a constant and other stuff, 2295 // we can use the constant in the GCD computation. 2296 Constant = getConstantPart(Product); 2297 if (!Constant) 2298 return false; 2299 APInt ConstCoeff = Constant->getValue()->getValue(); 2300 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2301 Coefficients = AddRec->getStart(); 2302 } 2303 const SCEV *DstConst = Coefficients; 2304 2305 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2306 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2307 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2308 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2309 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2310 // If Delta is a sum of products, we may be able to make further progress. 2311 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2312 const SCEV *Operand = Sum->getOperand(Op); 2313 if (isa<SCEVConstant>(Operand)) { 2314 assert(!Constant && "Surprised to find multiple constants"); 2315 Constant = cast<SCEVConstant>(Operand); 2316 } 2317 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2318 // Search for constant operand to participate in GCD; 2319 // If none found; return false. 2320 const SCEVConstant *ConstOp = getConstantPart(Product); 2321 if (!ConstOp) 2322 return false; 2323 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2324 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2325 ConstOpValue.abs()); 2326 } 2327 else 2328 return false; 2329 } 2330 } 2331 if (!Constant) 2332 return false; 2333 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2334 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2335 if (ConstDelta == 0) 2336 return false; 2337 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2338 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2339 APInt Remainder = ConstDelta.srem(RunningGCD); 2340 if (Remainder != 0) { 2341 ++GCDindependence; 2342 return true; 2343 } 2344 2345 // Try to disprove equal directions. 2346 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2347 // the code above can't disprove the dependence because the GCD = 1. 2348 // So we consider what happen if i = i' and what happens if j = j'. 2349 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2350 // which is infeasible, so we can disallow the = direction for the i level. 2351 // Setting j = j' doesn't help matters, so we end up with a direction vector 2352 // of [<>, *] 2353 // 2354 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2355 // we need to remember that the constant part is 5 and the RunningGCD should 2356 // be initialized to ExtraGCD = 30. 2357 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2358 2359 bool Improved = false; 2360 Coefficients = Src; 2361 while (const SCEVAddRecExpr *AddRec = 2362 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2363 Coefficients = AddRec->getStart(); 2364 const Loop *CurLoop = AddRec->getLoop(); 2365 RunningGCD = ExtraGCD; 2366 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2367 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2368 const SCEV *Inner = Src; 2369 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2370 AddRec = cast<SCEVAddRecExpr>(Inner); 2371 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2372 if (CurLoop == AddRec->getLoop()) 2373 ; // SrcCoeff == Coeff 2374 else { 2375 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2376 // If the coefficient is the product of a constant and other stuff, 2377 // we can use the constant in the GCD computation. 2378 Constant = getConstantPart(Product); 2379 else 2380 Constant = cast<SCEVConstant>(Coeff); 2381 APInt ConstCoeff = Constant->getValue()->getValue(); 2382 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2383 } 2384 Inner = AddRec->getStart(); 2385 } 2386 Inner = Dst; 2387 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2388 AddRec = cast<SCEVAddRecExpr>(Inner); 2389 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2390 if (CurLoop == AddRec->getLoop()) 2391 DstCoeff = Coeff; 2392 else { 2393 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2394 // If the coefficient is the product of a constant and other stuff, 2395 // we can use the constant in the GCD computation. 2396 Constant = getConstantPart(Product); 2397 else 2398 Constant = cast<SCEVConstant>(Coeff); 2399 APInt ConstCoeff = Constant->getValue()->getValue(); 2400 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2401 } 2402 Inner = AddRec->getStart(); 2403 } 2404 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2405 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2406 // If the coefficient is the product of a constant and other stuff, 2407 // we can use the constant in the GCD computation. 2408 Constant = getConstantPart(Product); 2409 else if (isa<SCEVConstant>(Delta)) 2410 Constant = cast<SCEVConstant>(Delta); 2411 else { 2412 // The difference of the two coefficients might not be a product 2413 // or constant, in which case we give up on this direction. 2414 continue; 2415 } 2416 APInt ConstCoeff = Constant->getValue()->getValue(); 2417 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2418 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2419 if (RunningGCD != 0) { 2420 Remainder = ConstDelta.srem(RunningGCD); 2421 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2422 if (Remainder != 0) { 2423 unsigned Level = mapSrcLoop(CurLoop); 2424 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2425 Improved = true; 2426 } 2427 } 2428 } 2429 if (Improved) 2430 ++GCDsuccesses; 2431 DEBUG(dbgs() << "all done\n"); 2432 return false; 2433 } 2434 2435 2436 //===----------------------------------------------------------------------===// 2437 // banerjeeMIVtest - 2438 // Use Banerjee's Inequalities to test an MIV subscript pair. 2439 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2440 // Generally follows the discussion in Section 2.5.2 of 2441 // 2442 // Optimizing Supercompilers for Supercomputers 2443 // Michael Wolfe 2444 // 2445 // The inequalities given on page 25 are simplified in that loops are 2446 // normalized so that the lower bound is always 0 and the stride is always 1. 2447 // For example, Wolfe gives 2448 // 2449 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2450 // 2451 // where A_k is the coefficient of the kth index in the source subscript, 2452 // B_k is the coefficient of the kth index in the destination subscript, 2453 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2454 // index, and N_k is the stride of the kth index. Since all loops are normalized 2455 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2456 // equation to 2457 // 2458 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2459 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2460 // 2461 // Similar simplifications are possible for the other equations. 2462 // 2463 // When we can't determine the number of iterations for a loop, 2464 // we use NULL as an indicator for the worst case, infinity. 2465 // When computing the upper bound, NULL denotes +inf; 2466 // for the lower bound, NULL denotes -inf. 2467 // 2468 // Return true if dependence disproved. 2469 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2470 const SCEV *Dst, 2471 const SmallBitVector &Loops, 2472 FullDependence &Result) const { 2473 DEBUG(dbgs() << "starting Banerjee\n"); 2474 ++BanerjeeApplications; 2475 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2476 const SCEV *A0; 2477 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2478 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2479 const SCEV *B0; 2480 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2481 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2482 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2483 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2484 2485 // Compute bounds for all the * directions. 2486 DEBUG(dbgs() << "\tBounds[*]\n"); 2487 for (unsigned K = 1; K <= MaxLevels; ++K) { 2488 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2489 Bound[K].Direction = Dependence::DVEntry::ALL; 2490 Bound[K].DirSet = Dependence::DVEntry::NONE; 2491 findBoundsALL(A, B, Bound, K); 2492 #ifndef NDEBUG 2493 DEBUG(dbgs() << "\t " << K << '\t'); 2494 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2495 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2496 else 2497 DEBUG(dbgs() << "-inf\t"); 2498 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2499 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2500 else 2501 DEBUG(dbgs() << "+inf\n"); 2502 #endif 2503 } 2504 2505 // Test the *, *, *, ... case. 2506 bool Disproved = false; 2507 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2508 // Explore the direction vector hierarchy. 2509 unsigned DepthExpanded = 0; 2510 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2511 Loops, DepthExpanded, Delta); 2512 if (NewDeps > 0) { 2513 bool Improved = false; 2514 for (unsigned K = 1; K <= CommonLevels; ++K) { 2515 if (Loops[K]) { 2516 unsigned Old = Result.DV[K - 1].Direction; 2517 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2518 Improved |= Old != Result.DV[K - 1].Direction; 2519 if (!Result.DV[K - 1].Direction) { 2520 Improved = false; 2521 Disproved = true; 2522 break; 2523 } 2524 } 2525 } 2526 if (Improved) 2527 ++BanerjeeSuccesses; 2528 } 2529 else { 2530 ++BanerjeeIndependence; 2531 Disproved = true; 2532 } 2533 } 2534 else { 2535 ++BanerjeeIndependence; 2536 Disproved = true; 2537 } 2538 delete [] Bound; 2539 delete [] A; 2540 delete [] B; 2541 return Disproved; 2542 } 2543 2544 2545 // Hierarchically expands the direction vector 2546 // search space, combining the directions of discovered dependences 2547 // in the DirSet field of Bound. Returns the number of distinct 2548 // dependences discovered. If the dependence is disproved, 2549 // it will return 0. 2550 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2551 CoefficientInfo *A, 2552 CoefficientInfo *B, 2553 BoundInfo *Bound, 2554 const SmallBitVector &Loops, 2555 unsigned &DepthExpanded, 2556 const SCEV *Delta) const { 2557 if (Level > CommonLevels) { 2558 // record result 2559 DEBUG(dbgs() << "\t["); 2560 for (unsigned K = 1; K <= CommonLevels; ++K) { 2561 if (Loops[K]) { 2562 Bound[K].DirSet |= Bound[K].Direction; 2563 #ifndef NDEBUG 2564 switch (Bound[K].Direction) { 2565 case Dependence::DVEntry::LT: 2566 DEBUG(dbgs() << " <"); 2567 break; 2568 case Dependence::DVEntry::EQ: 2569 DEBUG(dbgs() << " ="); 2570 break; 2571 case Dependence::DVEntry::GT: 2572 DEBUG(dbgs() << " >"); 2573 break; 2574 case Dependence::DVEntry::ALL: 2575 DEBUG(dbgs() << " *"); 2576 break; 2577 default: 2578 llvm_unreachable("unexpected Bound[K].Direction"); 2579 } 2580 #endif 2581 } 2582 } 2583 DEBUG(dbgs() << " ]\n"); 2584 return 1; 2585 } 2586 if (Loops[Level]) { 2587 if (Level > DepthExpanded) { 2588 DepthExpanded = Level; 2589 // compute bounds for <, =, > at current level 2590 findBoundsLT(A, B, Bound, Level); 2591 findBoundsGT(A, B, Bound, Level); 2592 findBoundsEQ(A, B, Bound, Level); 2593 #ifndef NDEBUG 2594 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2595 DEBUG(dbgs() << "\t <\t"); 2596 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2597 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2598 else 2599 DEBUG(dbgs() << "-inf\t"); 2600 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2601 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2602 else 2603 DEBUG(dbgs() << "+inf\n"); 2604 DEBUG(dbgs() << "\t =\t"); 2605 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2606 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2607 else 2608 DEBUG(dbgs() << "-inf\t"); 2609 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2610 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2611 else 2612 DEBUG(dbgs() << "+inf\n"); 2613 DEBUG(dbgs() << "\t >\t"); 2614 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2615 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2616 else 2617 DEBUG(dbgs() << "-inf\t"); 2618 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2619 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2620 else 2621 DEBUG(dbgs() << "+inf\n"); 2622 #endif 2623 } 2624 2625 unsigned NewDeps = 0; 2626 2627 // test bounds for <, *, *, ... 2628 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2629 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2630 Loops, DepthExpanded, Delta); 2631 2632 // Test bounds for =, *, *, ... 2633 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2634 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2635 Loops, DepthExpanded, Delta); 2636 2637 // test bounds for >, *, *, ... 2638 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2639 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2640 Loops, DepthExpanded, Delta); 2641 2642 Bound[Level].Direction = Dependence::DVEntry::ALL; 2643 return NewDeps; 2644 } 2645 else 2646 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2647 } 2648 2649 2650 // Returns true iff the current bounds are plausible. 2651 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2652 unsigned Level, 2653 BoundInfo *Bound, 2654 const SCEV *Delta) const { 2655 Bound[Level].Direction = DirKind; 2656 if (const SCEV *LowerBound = getLowerBound(Bound)) 2657 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2658 return false; 2659 if (const SCEV *UpperBound = getUpperBound(Bound)) 2660 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2661 return false; 2662 return true; 2663 } 2664 2665 2666 // Computes the upper and lower bounds for level K 2667 // using the * direction. Records them in Bound. 2668 // Wolfe gives the equations 2669 // 2670 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2671 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2672 // 2673 // Since we normalize loops, we can simplify these equations to 2674 // 2675 // LB^*_k = (A^-_k - B^+_k)U_k 2676 // UB^*_k = (A^+_k - B^-_k)U_k 2677 // 2678 // We must be careful to handle the case where the upper bound is unknown. 2679 // Note that the lower bound is always <= 0 2680 // and the upper bound is always >= 0. 2681 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2682 CoefficientInfo *B, 2683 BoundInfo *Bound, 2684 unsigned K) const { 2685 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2686 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2687 if (Bound[K].Iterations) { 2688 Bound[K].Lower[Dependence::DVEntry::ALL] = 2689 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2690 Bound[K].Iterations); 2691 Bound[K].Upper[Dependence::DVEntry::ALL] = 2692 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2693 Bound[K].Iterations); 2694 } 2695 else { 2696 // If the difference is 0, we won't need to know the number of iterations. 2697 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2698 Bound[K].Lower[Dependence::DVEntry::ALL] = 2699 SE->getConstant(A[K].Coeff->getType(), 0); 2700 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2701 Bound[K].Upper[Dependence::DVEntry::ALL] = 2702 SE->getConstant(A[K].Coeff->getType(), 0); 2703 } 2704 } 2705 2706 2707 // Computes the upper and lower bounds for level K 2708 // using the = direction. Records them in Bound. 2709 // Wolfe gives the equations 2710 // 2711 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2712 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2713 // 2714 // Since we normalize loops, we can simplify these equations to 2715 // 2716 // LB^=_k = (A_k - B_k)^- U_k 2717 // UB^=_k = (A_k - B_k)^+ U_k 2718 // 2719 // We must be careful to handle the case where the upper bound is unknown. 2720 // Note that the lower bound is always <= 0 2721 // and the upper bound is always >= 0. 2722 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2723 CoefficientInfo *B, 2724 BoundInfo *Bound, 2725 unsigned K) const { 2726 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2727 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2728 if (Bound[K].Iterations) { 2729 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2730 const SCEV *NegativePart = getNegativePart(Delta); 2731 Bound[K].Lower[Dependence::DVEntry::EQ] = 2732 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2733 const SCEV *PositivePart = getPositivePart(Delta); 2734 Bound[K].Upper[Dependence::DVEntry::EQ] = 2735 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2736 } 2737 else { 2738 // If the positive/negative part of the difference is 0, 2739 // we won't need to know the number of iterations. 2740 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2741 const SCEV *NegativePart = getNegativePart(Delta); 2742 if (NegativePart->isZero()) 2743 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2744 const SCEV *PositivePart = getPositivePart(Delta); 2745 if (PositivePart->isZero()) 2746 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2747 } 2748 } 2749 2750 2751 // Computes the upper and lower bounds for level K 2752 // using the < direction. Records them in Bound. 2753 // Wolfe gives the equations 2754 // 2755 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2756 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2757 // 2758 // Since we normalize loops, we can simplify these equations to 2759 // 2760 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2761 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2762 // 2763 // We must be careful to handle the case where the upper bound is unknown. 2764 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2765 CoefficientInfo *B, 2766 BoundInfo *Bound, 2767 unsigned K) const { 2768 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2769 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2770 if (Bound[K].Iterations) { 2771 const SCEV *Iter_1 = 2772 SE->getMinusSCEV(Bound[K].Iterations, 2773 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2774 const SCEV *NegPart = 2775 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2776 Bound[K].Lower[Dependence::DVEntry::LT] = 2777 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2778 const SCEV *PosPart = 2779 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2780 Bound[K].Upper[Dependence::DVEntry::LT] = 2781 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2782 } 2783 else { 2784 // If the positive/negative part of the difference is 0, 2785 // we won't need to know the number of iterations. 2786 const SCEV *NegPart = 2787 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2788 if (NegPart->isZero()) 2789 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2790 const SCEV *PosPart = 2791 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2792 if (PosPart->isZero()) 2793 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2794 } 2795 } 2796 2797 2798 // Computes the upper and lower bounds for level K 2799 // using the > direction. Records them in Bound. 2800 // Wolfe gives the equations 2801 // 2802 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2803 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2804 // 2805 // Since we normalize loops, we can simplify these equations to 2806 // 2807 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2808 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2809 // 2810 // We must be careful to handle the case where the upper bound is unknown. 2811 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2812 CoefficientInfo *B, 2813 BoundInfo *Bound, 2814 unsigned K) const { 2815 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2816 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2817 if (Bound[K].Iterations) { 2818 const SCEV *Iter_1 = 2819 SE->getMinusSCEV(Bound[K].Iterations, 2820 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2821 const SCEV *NegPart = 2822 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2823 Bound[K].Lower[Dependence::DVEntry::GT] = 2824 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2825 const SCEV *PosPart = 2826 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2827 Bound[K].Upper[Dependence::DVEntry::GT] = 2828 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2829 } 2830 else { 2831 // If the positive/negative part of the difference is 0, 2832 // we won't need to know the number of iterations. 2833 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2834 if (NegPart->isZero()) 2835 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2836 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2837 if (PosPart->isZero()) 2838 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2839 } 2840 } 2841 2842 2843 // X^+ = max(X, 0) 2844 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2845 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2846 } 2847 2848 2849 // X^- = min(X, 0) 2850 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2851 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2852 } 2853 2854 2855 // Walks through the subscript, 2856 // collecting each coefficient, the associated loop bounds, 2857 // and recording its positive and negative parts for later use. 2858 DependenceAnalysis::CoefficientInfo * 2859 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2860 bool SrcFlag, 2861 const SCEV *&Constant) const { 2862 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2863 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2864 for (unsigned K = 1; K <= MaxLevels; ++K) { 2865 CI[K].Coeff = Zero; 2866 CI[K].PosPart = Zero; 2867 CI[K].NegPart = Zero; 2868 CI[K].Iterations = nullptr; 2869 } 2870 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2871 const Loop *L = AddRec->getLoop(); 2872 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2873 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2874 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2875 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2876 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2877 Subscript = AddRec->getStart(); 2878 } 2879 Constant = Subscript; 2880 #ifndef NDEBUG 2881 DEBUG(dbgs() << "\tCoefficient Info\n"); 2882 for (unsigned K = 1; K <= MaxLevels; ++K) { 2883 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2884 DEBUG(dbgs() << "\tPos Part = "); 2885 DEBUG(dbgs() << *CI[K].PosPart); 2886 DEBUG(dbgs() << "\tNeg Part = "); 2887 DEBUG(dbgs() << *CI[K].NegPart); 2888 DEBUG(dbgs() << "\tUpper Bound = "); 2889 if (CI[K].Iterations) 2890 DEBUG(dbgs() << *CI[K].Iterations); 2891 else 2892 DEBUG(dbgs() << "+inf"); 2893 DEBUG(dbgs() << '\n'); 2894 } 2895 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2896 #endif 2897 return CI; 2898 } 2899 2900 2901 // Looks through all the bounds info and 2902 // computes the lower bound given the current direction settings 2903 // at each level. If the lower bound for any level is -inf, 2904 // the result is -inf. 2905 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2906 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2907 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2908 if (Bound[K].Lower[Bound[K].Direction]) 2909 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2910 else 2911 Sum = nullptr; 2912 } 2913 return Sum; 2914 } 2915 2916 2917 // Looks through all the bounds info and 2918 // computes the upper bound given the current direction settings 2919 // at each level. If the upper bound at any level is +inf, 2920 // the result is +inf. 2921 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2922 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2923 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2924 if (Bound[K].Upper[Bound[K].Direction]) 2925 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2926 else 2927 Sum = nullptr; 2928 } 2929 return Sum; 2930 } 2931 2932 2933 //===----------------------------------------------------------------------===// 2934 // Constraint manipulation for Delta test. 2935 2936 // Given a linear SCEV, 2937 // return the coefficient (the step) 2938 // corresponding to the specified loop. 2939 // If there isn't one, return 0. 2940 // For example, given a*i + b*j + c*k, zeroing the coefficient 2941 // corresponding to the j loop would yield b. 2942 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2943 const Loop *TargetLoop) const { 2944 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2945 if (!AddRec) 2946 return SE->getConstant(Expr->getType(), 0); 2947 if (AddRec->getLoop() == TargetLoop) 2948 return AddRec->getStepRecurrence(*SE); 2949 return findCoefficient(AddRec->getStart(), TargetLoop); 2950 } 2951 2952 2953 // Given a linear SCEV, 2954 // return the SCEV given by zeroing out the coefficient 2955 // corresponding to the specified loop. 2956 // For example, given a*i + b*j + c*k, zeroing the coefficient 2957 // corresponding to the j loop would yield a*i + c*k. 2958 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2959 const Loop *TargetLoop) const { 2960 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2961 if (!AddRec) 2962 return Expr; // ignore 2963 if (AddRec->getLoop() == TargetLoop) 2964 return AddRec->getStart(); 2965 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2966 AddRec->getStepRecurrence(*SE), 2967 AddRec->getLoop(), 2968 AddRec->getNoWrapFlags()); 2969 } 2970 2971 2972 // Given a linear SCEV Expr, 2973 // return the SCEV given by adding some Value to the 2974 // coefficient corresponding to the specified TargetLoop. 2975 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2976 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2977 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2978 const Loop *TargetLoop, 2979 const SCEV *Value) const { 2980 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2981 if (!AddRec) // create a new addRec 2982 return SE->getAddRecExpr(Expr, 2983 Value, 2984 TargetLoop, 2985 SCEV::FlagAnyWrap); // Worst case, with no info. 2986 if (AddRec->getLoop() == TargetLoop) { 2987 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2988 if (Sum->isZero()) 2989 return AddRec->getStart(); 2990 return SE->getAddRecExpr(AddRec->getStart(), 2991 Sum, 2992 AddRec->getLoop(), 2993 AddRec->getNoWrapFlags()); 2994 } 2995 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2996 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2997 return SE->getAddRecExpr( 2998 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2999 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3000 AddRec->getNoWrapFlags()); 3001 } 3002 3003 3004 // Review the constraints, looking for opportunities 3005 // to simplify a subscript pair (Src and Dst). 3006 // Return true if some simplification occurs. 3007 // If the simplification isn't exact (that is, if it is conservative 3008 // in terms of dependence), set consistent to false. 3009 // Corresponds to Figure 5 from the paper 3010 // 3011 // Practical Dependence Testing 3012 // Goff, Kennedy, Tseng 3013 // PLDI 1991 3014 bool DependenceAnalysis::propagate(const SCEV *&Src, 3015 const SCEV *&Dst, 3016 SmallBitVector &Loops, 3017 SmallVectorImpl<Constraint> &Constraints, 3018 bool &Consistent) { 3019 bool Result = false; 3020 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3021 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3022 DEBUG(Constraints[LI].dump(dbgs())); 3023 if (Constraints[LI].isDistance()) 3024 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3025 else if (Constraints[LI].isLine()) 3026 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3027 else if (Constraints[LI].isPoint()) 3028 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3029 } 3030 return Result; 3031 } 3032 3033 3034 // Attempt to propagate a distance 3035 // constraint into a subscript pair (Src and Dst). 3036 // Return true if some simplification occurs. 3037 // If the simplification isn't exact (that is, if it is conservative 3038 // in terms of dependence), set consistent to false. 3039 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3040 const SCEV *&Dst, 3041 Constraint &CurConstraint, 3042 bool &Consistent) { 3043 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3044 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3045 const SCEV *A_K = findCoefficient(Src, CurLoop); 3046 if (A_K->isZero()) 3047 return false; 3048 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3049 Src = SE->getMinusSCEV(Src, DA_K); 3050 Src = zeroCoefficient(Src, CurLoop); 3051 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3052 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3053 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3054 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3055 if (!findCoefficient(Dst, CurLoop)->isZero()) 3056 Consistent = false; 3057 return true; 3058 } 3059 3060 3061 // Attempt to propagate a line 3062 // constraint into a subscript pair (Src and Dst). 3063 // Return true if some simplification occurs. 3064 // If the simplification isn't exact (that is, if it is conservative 3065 // in terms of dependence), set consistent to false. 3066 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3067 const SCEV *&Dst, 3068 Constraint &CurConstraint, 3069 bool &Consistent) { 3070 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3071 const SCEV *A = CurConstraint.getA(); 3072 const SCEV *B = CurConstraint.getB(); 3073 const SCEV *C = CurConstraint.getC(); 3074 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3075 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3076 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3077 if (A->isZero()) { 3078 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3079 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3080 if (!Bconst || !Cconst) return false; 3081 APInt Beta = Bconst->getValue()->getValue(); 3082 APInt Charlie = Cconst->getValue()->getValue(); 3083 APInt CdivB = Charlie.sdiv(Beta); 3084 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3085 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3086 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3087 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3088 Dst = zeroCoefficient(Dst, CurLoop); 3089 if (!findCoefficient(Src, CurLoop)->isZero()) 3090 Consistent = false; 3091 } 3092 else if (B->isZero()) { 3093 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3094 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3095 if (!Aconst || !Cconst) return false; 3096 APInt Alpha = Aconst->getValue()->getValue(); 3097 APInt Charlie = Cconst->getValue()->getValue(); 3098 APInt CdivA = Charlie.sdiv(Alpha); 3099 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3100 const SCEV *A_K = findCoefficient(Src, CurLoop); 3101 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3102 Src = zeroCoefficient(Src, CurLoop); 3103 if (!findCoefficient(Dst, CurLoop)->isZero()) 3104 Consistent = false; 3105 } 3106 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3107 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3108 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3109 if (!Aconst || !Cconst) return false; 3110 APInt Alpha = Aconst->getValue()->getValue(); 3111 APInt Charlie = Cconst->getValue()->getValue(); 3112 APInt CdivA = Charlie.sdiv(Alpha); 3113 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3114 const SCEV *A_K = findCoefficient(Src, CurLoop); 3115 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3116 Src = zeroCoefficient(Src, CurLoop); 3117 Dst = addToCoefficient(Dst, CurLoop, A_K); 3118 if (!findCoefficient(Dst, CurLoop)->isZero()) 3119 Consistent = false; 3120 } 3121 else { 3122 // paper is incorrect here, or perhaps just misleading 3123 const SCEV *A_K = findCoefficient(Src, CurLoop); 3124 Src = SE->getMulExpr(Src, A); 3125 Dst = SE->getMulExpr(Dst, A); 3126 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3127 Src = zeroCoefficient(Src, CurLoop); 3128 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3129 if (!findCoefficient(Dst, CurLoop)->isZero()) 3130 Consistent = false; 3131 } 3132 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3133 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3134 return true; 3135 } 3136 3137 3138 // Attempt to propagate a point 3139 // constraint into a subscript pair (Src and Dst). 3140 // Return true if some simplification occurs. 3141 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3142 const SCEV *&Dst, 3143 Constraint &CurConstraint) { 3144 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3145 const SCEV *A_K = findCoefficient(Src, CurLoop); 3146 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3147 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3148 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3149 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3150 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3151 Src = zeroCoefficient(Src, CurLoop); 3152 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3153 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3154 Dst = zeroCoefficient(Dst, CurLoop); 3155 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3156 return true; 3157 } 3158 3159 3160 // Update direction vector entry based on the current constraint. 3161 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3162 const Constraint &CurConstraint 3163 ) const { 3164 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3165 DEBUG(CurConstraint.dump(dbgs())); 3166 if (CurConstraint.isAny()) 3167 ; // use defaults 3168 else if (CurConstraint.isDistance()) { 3169 // this one is consistent, the others aren't 3170 Level.Scalar = false; 3171 Level.Distance = CurConstraint.getD(); 3172 unsigned NewDirection = Dependence::DVEntry::NONE; 3173 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3174 NewDirection = Dependence::DVEntry::EQ; 3175 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3176 NewDirection |= Dependence::DVEntry::LT; 3177 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3178 NewDirection |= Dependence::DVEntry::GT; 3179 Level.Direction &= NewDirection; 3180 } 3181 else if (CurConstraint.isLine()) { 3182 Level.Scalar = false; 3183 Level.Distance = nullptr; 3184 // direction should be accurate 3185 } 3186 else if (CurConstraint.isPoint()) { 3187 Level.Scalar = false; 3188 Level.Distance = nullptr; 3189 unsigned NewDirection = Dependence::DVEntry::NONE; 3190 if (!isKnownPredicate(CmpInst::ICMP_NE, 3191 CurConstraint.getY(), 3192 CurConstraint.getX())) 3193 // if X may be = Y 3194 NewDirection |= Dependence::DVEntry::EQ; 3195 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3196 CurConstraint.getY(), 3197 CurConstraint.getX())) 3198 // if Y may be > X 3199 NewDirection |= Dependence::DVEntry::LT; 3200 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3201 CurConstraint.getY(), 3202 CurConstraint.getX())) 3203 // if Y may be < X 3204 NewDirection |= Dependence::DVEntry::GT; 3205 Level.Direction &= NewDirection; 3206 } 3207 else 3208 llvm_unreachable("constraint has unexpected kind"); 3209 } 3210 3211 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3212 /// source and destination array references are recurrences on a nested loop, 3213 /// this function flattens the nested recurrences into separate recurrences 3214 /// for each loop level. 3215 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3216 const SCEV *DstSCEV, 3217 SmallVectorImpl<Subscript> &Pair, 3218 const SCEV *ElementSize) { 3219 const SCEVUnknown *SrcBase = 3220 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3221 const SCEVUnknown *DstBase = 3222 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3223 3224 if (!SrcBase || !DstBase || SrcBase != DstBase) 3225 return false; 3226 3227 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3228 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3229 3230 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3231 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3232 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3233 return false; 3234 3235 // First step: collect parametric terms in both array references. 3236 SmallVector<const SCEV *, 4> Terms; 3237 SrcAR->collectParametricTerms(*SE, Terms); 3238 DstAR->collectParametricTerms(*SE, Terms); 3239 3240 // Second step: find subscript sizes. 3241 SmallVector<const SCEV *, 4> Sizes; 3242 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3243 3244 // Third step: compute the access functions for each subscript. 3245 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3246 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3247 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3248 3249 // Fail when there is only a subscript: that's a linearized access function. 3250 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3251 SrcSubscripts.size() != DstSubscripts.size()) 3252 return false; 3253 3254 int size = SrcSubscripts.size(); 3255 3256 DEBUG({ 3257 dbgs() << "\nSrcSubscripts: "; 3258 for (int i = 0; i < size; i++) 3259 dbgs() << *SrcSubscripts[i]; 3260 dbgs() << "\nDstSubscripts: "; 3261 for (int i = 0; i < size; i++) 3262 dbgs() << *DstSubscripts[i]; 3263 }); 3264 3265 // The delinearization transforms a single-subscript MIV dependence test into 3266 // a multi-subscript SIV dependence test that is easier to compute. So we 3267 // resize Pair to contain as many pairs of subscripts as the delinearization 3268 // has found, and then initialize the pairs following the delinearization. 3269 Pair.resize(size); 3270 for (int i = 0; i < size; ++i) { 3271 Pair[i].Src = SrcSubscripts[i]; 3272 Pair[i].Dst = DstSubscripts[i]; 3273 unifySubscriptType(&Pair[i]); 3274 3275 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3276 // delinearization has found, and add these constraints to the dependence 3277 // check to avoid memory accesses overflow from one dimension into another. 3278 // This is related to the problem of determining the existence of data 3279 // dependences in array accesses using a different number of subscripts: in 3280 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3281 } 3282 3283 return true; 3284 } 3285 3286 //===----------------------------------------------------------------------===// 3287 3288 #ifndef NDEBUG 3289 // For debugging purposes, dump a small bit vector to dbgs(). 3290 static void dumpSmallBitVector(SmallBitVector &BV) { 3291 dbgs() << "{"; 3292 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3293 dbgs() << VI; 3294 if (BV.find_next(VI) >= 0) 3295 dbgs() << ' '; 3296 } 3297 dbgs() << "}\n"; 3298 } 3299 #endif 3300 3301 3302 // depends - 3303 // Returns NULL if there is no dependence. 3304 // Otherwise, return a Dependence with as many details as possible. 3305 // Corresponds to Section 3.1 in the paper 3306 // 3307 // Practical Dependence Testing 3308 // Goff, Kennedy, Tseng 3309 // PLDI 1991 3310 // 3311 // Care is required to keep the routine below, getSplitIteration(), 3312 // up to date with respect to this routine. 3313 std::unique_ptr<Dependence> 3314 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3315 bool PossiblyLoopIndependent) { 3316 if (Src == Dst) 3317 PossiblyLoopIndependent = false; 3318 3319 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3320 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3321 // if both instructions don't reference memory, there's no dependence 3322 return nullptr; 3323 3324 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3325 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3326 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3327 return make_unique<Dependence>(Src, Dst); 3328 } 3329 3330 Value *SrcPtr = getPointerOperand(Src); 3331 Value *DstPtr = getPointerOperand(Dst); 3332 3333 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3334 SrcPtr)) { 3335 case AliasAnalysis::MayAlias: 3336 case AliasAnalysis::PartialAlias: 3337 // cannot analyse objects if we don't understand their aliasing. 3338 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3339 return make_unique<Dependence>(Src, Dst); 3340 case AliasAnalysis::NoAlias: 3341 // If the objects noalias, they are distinct, accesses are independent. 3342 DEBUG(dbgs() << "no alias\n"); 3343 return nullptr; 3344 case AliasAnalysis::MustAlias: 3345 break; // The underlying objects alias; test accesses for dependence. 3346 } 3347 3348 // establish loop nesting levels 3349 establishNestingLevels(Src, Dst); 3350 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3351 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3352 3353 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3354 ++TotalArrayPairs; 3355 3356 // See if there are GEPs we can use. 3357 bool UsefulGEP = false; 3358 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3359 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3360 if (SrcGEP && DstGEP && 3361 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3362 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3363 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3364 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3365 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3366 3367 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3368 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3369 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3370 } 3371 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3372 SmallVector<Subscript, 4> Pair(Pairs); 3373 if (UsefulGEP) { 3374 DEBUG(dbgs() << " using GEPs\n"); 3375 unsigned P = 0; 3376 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3377 SrcEnd = SrcGEP->idx_end(), 3378 DstIdx = DstGEP->idx_begin(); 3379 SrcIdx != SrcEnd; 3380 ++SrcIdx, ++DstIdx, ++P) { 3381 Pair[P].Src = SE->getSCEV(*SrcIdx); 3382 Pair[P].Dst = SE->getSCEV(*DstIdx); 3383 unifySubscriptType(&Pair[P]); 3384 } 3385 } 3386 else { 3387 DEBUG(dbgs() << " ignoring GEPs\n"); 3388 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3389 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3390 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3391 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3392 Pair[0].Src = SrcSCEV; 3393 Pair[0].Dst = DstSCEV; 3394 } 3395 3396 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3397 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3398 DEBUG(dbgs() << " delinerized GEP\n"); 3399 Pairs = Pair.size(); 3400 } 3401 3402 for (unsigned P = 0; P < Pairs; ++P) { 3403 Pair[P].Loops.resize(MaxLevels + 1); 3404 Pair[P].GroupLoops.resize(MaxLevels + 1); 3405 Pair[P].Group.resize(Pairs); 3406 removeMatchingExtensions(&Pair[P]); 3407 Pair[P].Classification = 3408 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3409 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3410 Pair[P].Loops); 3411 Pair[P].GroupLoops = Pair[P].Loops; 3412 Pair[P].Group.set(P); 3413 DEBUG(dbgs() << " subscript " << P << "\n"); 3414 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3415 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3416 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3417 DEBUG(dbgs() << "\tloops = "); 3418 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3419 } 3420 3421 SmallBitVector Separable(Pairs); 3422 SmallBitVector Coupled(Pairs); 3423 3424 // Partition subscripts into separable and minimally-coupled groups 3425 // Algorithm in paper is algorithmically better; 3426 // this may be faster in practice. Check someday. 3427 // 3428 // Here's an example of how it works. Consider this code: 3429 // 3430 // for (i = ...) { 3431 // for (j = ...) { 3432 // for (k = ...) { 3433 // for (l = ...) { 3434 // for (m = ...) { 3435 // A[i][j][k][m] = ...; 3436 // ... = A[0][j][l][i + j]; 3437 // } 3438 // } 3439 // } 3440 // } 3441 // } 3442 // 3443 // There are 4 subscripts here: 3444 // 0 [i] and [0] 3445 // 1 [j] and [j] 3446 // 2 [k] and [l] 3447 // 3 [m] and [i + j] 3448 // 3449 // We've already classified each subscript pair as ZIV, SIV, etc., 3450 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3451 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3452 // and set Pair[P].Group = {P}. 3453 // 3454 // Src Dst Classification Loops GroupLoops Group 3455 // 0 [i] [0] SIV {1} {1} {0} 3456 // 1 [j] [j] SIV {2} {2} {1} 3457 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3458 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3459 // 3460 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3461 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3462 // 3463 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3464 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3465 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3466 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3467 // to either Separable or Coupled). 3468 // 3469 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3470 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3471 // so Pair[3].Group = {0, 1, 3} and Done = false. 3472 // 3473 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3474 // Since Done remains true, we add 2 to the set of Separable pairs. 3475 // 3476 // Finally, we consider 3. There's nothing to compare it with, 3477 // so Done remains true and we add it to the Coupled set. 3478 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3479 // 3480 // In the end, we've got 1 separable subscript and 1 coupled group. 3481 for (unsigned SI = 0; SI < Pairs; ++SI) { 3482 if (Pair[SI].Classification == Subscript::NonLinear) { 3483 // ignore these, but collect loops for later 3484 ++NonlinearSubscriptPairs; 3485 collectCommonLoops(Pair[SI].Src, 3486 LI->getLoopFor(Src->getParent()), 3487 Pair[SI].Loops); 3488 collectCommonLoops(Pair[SI].Dst, 3489 LI->getLoopFor(Dst->getParent()), 3490 Pair[SI].Loops); 3491 Result.Consistent = false; 3492 } else if (Pair[SI].Classification == Subscript::ZIV) { 3493 // always separable 3494 Separable.set(SI); 3495 } 3496 else { 3497 // SIV, RDIV, or MIV, so check for coupled group 3498 bool Done = true; 3499 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3500 SmallBitVector Intersection = Pair[SI].GroupLoops; 3501 Intersection &= Pair[SJ].GroupLoops; 3502 if (Intersection.any()) { 3503 // accumulate set of all the loops in group 3504 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3505 // accumulate set of all subscripts in group 3506 Pair[SJ].Group |= Pair[SI].Group; 3507 Done = false; 3508 } 3509 } 3510 if (Done) { 3511 if (Pair[SI].Group.count() == 1) { 3512 Separable.set(SI); 3513 ++SeparableSubscriptPairs; 3514 } 3515 else { 3516 Coupled.set(SI); 3517 ++CoupledSubscriptPairs; 3518 } 3519 } 3520 } 3521 } 3522 3523 DEBUG(dbgs() << " Separable = "); 3524 DEBUG(dumpSmallBitVector(Separable)); 3525 DEBUG(dbgs() << " Coupled = "); 3526 DEBUG(dumpSmallBitVector(Coupled)); 3527 3528 Constraint NewConstraint; 3529 NewConstraint.setAny(SE); 3530 3531 // test separable subscripts 3532 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3533 DEBUG(dbgs() << "testing subscript " << SI); 3534 switch (Pair[SI].Classification) { 3535 case Subscript::ZIV: 3536 DEBUG(dbgs() << ", ZIV\n"); 3537 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3538 return nullptr; 3539 break; 3540 case Subscript::SIV: { 3541 DEBUG(dbgs() << ", SIV\n"); 3542 unsigned Level; 3543 const SCEV *SplitIter = nullptr; 3544 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3545 SplitIter)) 3546 return nullptr; 3547 break; 3548 } 3549 case Subscript::RDIV: 3550 DEBUG(dbgs() << ", RDIV\n"); 3551 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3552 return nullptr; 3553 break; 3554 case Subscript::MIV: 3555 DEBUG(dbgs() << ", MIV\n"); 3556 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3557 return nullptr; 3558 break; 3559 default: 3560 llvm_unreachable("subscript has unexpected classification"); 3561 } 3562 } 3563 3564 if (Coupled.count()) { 3565 // test coupled subscript groups 3566 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3567 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3568 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3569 for (unsigned II = 0; II <= MaxLevels; ++II) 3570 Constraints[II].setAny(SE); 3571 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3572 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3573 SmallBitVector Group(Pair[SI].Group); 3574 SmallBitVector Sivs(Pairs); 3575 SmallBitVector Mivs(Pairs); 3576 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3577 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3578 DEBUG(dbgs() << SJ << " "); 3579 if (Pair[SJ].Classification == Subscript::SIV) 3580 Sivs.set(SJ); 3581 else 3582 Mivs.set(SJ); 3583 } 3584 DEBUG(dbgs() << "}\n"); 3585 while (Sivs.any()) { 3586 bool Changed = false; 3587 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3588 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3589 // SJ is an SIV subscript that's part of the current coupled group 3590 unsigned Level; 3591 const SCEV *SplitIter = nullptr; 3592 DEBUG(dbgs() << "SIV\n"); 3593 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3594 SplitIter)) 3595 return nullptr; 3596 ConstrainedLevels.set(Level); 3597 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3598 if (Constraints[Level].isEmpty()) { 3599 ++DeltaIndependence; 3600 return nullptr; 3601 } 3602 Changed = true; 3603 } 3604 Sivs.reset(SJ); 3605 } 3606 if (Changed) { 3607 // propagate, possibly creating new SIVs and ZIVs 3608 DEBUG(dbgs() << " propagating\n"); 3609 DEBUG(dbgs() << "\tMivs = "); 3610 DEBUG(dumpSmallBitVector(Mivs)); 3611 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3612 // SJ is an MIV subscript that's part of the current coupled group 3613 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3614 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3615 Constraints, Result.Consistent)) { 3616 DEBUG(dbgs() << "\t Changed\n"); 3617 ++DeltaPropagations; 3618 Pair[SJ].Classification = 3619 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3620 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3621 Pair[SJ].Loops); 3622 switch (Pair[SJ].Classification) { 3623 case Subscript::ZIV: 3624 DEBUG(dbgs() << "ZIV\n"); 3625 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3626 return nullptr; 3627 Mivs.reset(SJ); 3628 break; 3629 case Subscript::SIV: 3630 Sivs.set(SJ); 3631 Mivs.reset(SJ); 3632 break; 3633 case Subscript::RDIV: 3634 case Subscript::MIV: 3635 break; 3636 default: 3637 llvm_unreachable("bad subscript classification"); 3638 } 3639 } 3640 } 3641 } 3642 } 3643 3644 // test & propagate remaining RDIVs 3645 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3646 if (Pair[SJ].Classification == Subscript::RDIV) { 3647 DEBUG(dbgs() << "RDIV test\n"); 3648 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3649 return nullptr; 3650 // I don't yet understand how to propagate RDIV results 3651 Mivs.reset(SJ); 3652 } 3653 } 3654 3655 // test remaining MIVs 3656 // This code is temporary. 3657 // Better to somehow test all remaining subscripts simultaneously. 3658 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3659 if (Pair[SJ].Classification == Subscript::MIV) { 3660 DEBUG(dbgs() << "MIV test\n"); 3661 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3662 return nullptr; 3663 } 3664 else 3665 llvm_unreachable("expected only MIV subscripts at this point"); 3666 } 3667 3668 // update Result.DV from constraint vector 3669 DEBUG(dbgs() << " updating\n"); 3670 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3671 SJ = ConstrainedLevels.find_next(SJ)) { 3672 if (SJ > (int)CommonLevels) 3673 break; 3674 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3675 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3676 return nullptr; 3677 } 3678 } 3679 } 3680 3681 // Make sure the Scalar flags are set correctly. 3682 SmallBitVector CompleteLoops(MaxLevels + 1); 3683 for (unsigned SI = 0; SI < Pairs; ++SI) 3684 CompleteLoops |= Pair[SI].Loops; 3685 for (unsigned II = 1; II <= CommonLevels; ++II) 3686 if (CompleteLoops[II]) 3687 Result.DV[II - 1].Scalar = false; 3688 3689 if (PossiblyLoopIndependent) { 3690 // Make sure the LoopIndependent flag is set correctly. 3691 // All directions must include equal, otherwise no 3692 // loop-independent dependence is possible. 3693 for (unsigned II = 1; II <= CommonLevels; ++II) { 3694 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3695 Result.LoopIndependent = false; 3696 break; 3697 } 3698 } 3699 } 3700 else { 3701 // On the other hand, if all directions are equal and there's no 3702 // loop-independent dependence possible, then no dependence exists. 3703 bool AllEqual = true; 3704 for (unsigned II = 1; II <= CommonLevels; ++II) { 3705 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3706 AllEqual = false; 3707 break; 3708 } 3709 } 3710 if (AllEqual) 3711 return nullptr; 3712 } 3713 3714 auto Final = make_unique<FullDependence>(Result); 3715 Result.DV = nullptr; 3716 return std::move(Final); 3717 } 3718 3719 3720 3721 //===----------------------------------------------------------------------===// 3722 // getSplitIteration - 3723 // Rather than spend rarely-used space recording the splitting iteration 3724 // during the Weak-Crossing SIV test, we re-compute it on demand. 3725 // The re-computation is basically a repeat of the entire dependence test, 3726 // though simplified since we know that the dependence exists. 3727 // It's tedious, since we must go through all propagations, etc. 3728 // 3729 // Care is required to keep this code up to date with respect to the routine 3730 // above, depends(). 3731 // 3732 // Generally, the dependence analyzer will be used to build 3733 // a dependence graph for a function (basically a map from instructions 3734 // to dependences). Looking for cycles in the graph shows us loops 3735 // that cannot be trivially vectorized/parallelized. 3736 // 3737 // We can try to improve the situation by examining all the dependences 3738 // that make up the cycle, looking for ones we can break. 3739 // Sometimes, peeling the first or last iteration of a loop will break 3740 // dependences, and we've got flags for those possibilities. 3741 // Sometimes, splitting a loop at some other iteration will do the trick, 3742 // and we've got a flag for that case. Rather than waste the space to 3743 // record the exact iteration (since we rarely know), we provide 3744 // a method that calculates the iteration. It's a drag that it must work 3745 // from scratch, but wonderful in that it's possible. 3746 // 3747 // Here's an example: 3748 // 3749 // for (i = 0; i < 10; i++) 3750 // A[i] = ... 3751 // ... = A[11 - i] 3752 // 3753 // There's a loop-carried flow dependence from the store to the load, 3754 // found by the weak-crossing SIV test. The dependence will have a flag, 3755 // indicating that the dependence can be broken by splitting the loop. 3756 // Calling getSplitIteration will return 5. 3757 // Splitting the loop breaks the dependence, like so: 3758 // 3759 // for (i = 0; i <= 5; i++) 3760 // A[i] = ... 3761 // ... = A[11 - i] 3762 // for (i = 6; i < 10; i++) 3763 // A[i] = ... 3764 // ... = A[11 - i] 3765 // 3766 // breaks the dependence and allows us to vectorize/parallelize 3767 // both loops. 3768 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3769 unsigned SplitLevel) { 3770 assert(Dep.isSplitable(SplitLevel) && 3771 "Dep should be splitable at SplitLevel"); 3772 Instruction *Src = Dep.getSrc(); 3773 Instruction *Dst = Dep.getDst(); 3774 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3775 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3776 assert(isLoadOrStore(Src)); 3777 assert(isLoadOrStore(Dst)); 3778 Value *SrcPtr = getPointerOperand(Src); 3779 Value *DstPtr = getPointerOperand(Dst); 3780 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3781 SrcPtr) == AliasAnalysis::MustAlias); 3782 3783 // establish loop nesting levels 3784 establishNestingLevels(Src, Dst); 3785 3786 FullDependence Result(Src, Dst, false, CommonLevels); 3787 3788 // See if there are GEPs we can use. 3789 bool UsefulGEP = false; 3790 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3791 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3792 if (SrcGEP && DstGEP && 3793 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3794 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3795 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3796 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3797 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3798 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3799 } 3800 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3801 SmallVector<Subscript, 4> Pair(Pairs); 3802 if (UsefulGEP) { 3803 unsigned P = 0; 3804 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3805 SrcEnd = SrcGEP->idx_end(), 3806 DstIdx = DstGEP->idx_begin(); 3807 SrcIdx != SrcEnd; 3808 ++SrcIdx, ++DstIdx, ++P) { 3809 Pair[P].Src = SE->getSCEV(*SrcIdx); 3810 Pair[P].Dst = SE->getSCEV(*DstIdx); 3811 } 3812 } 3813 else { 3814 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3815 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3816 Pair[0].Src = SrcSCEV; 3817 Pair[0].Dst = DstSCEV; 3818 } 3819 3820 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3821 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3822 DEBUG(dbgs() << " delinerized GEP\n"); 3823 Pairs = Pair.size(); 3824 } 3825 3826 for (unsigned P = 0; P < Pairs; ++P) { 3827 Pair[P].Loops.resize(MaxLevels + 1); 3828 Pair[P].GroupLoops.resize(MaxLevels + 1); 3829 Pair[P].Group.resize(Pairs); 3830 removeMatchingExtensions(&Pair[P]); 3831 Pair[P].Classification = 3832 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3833 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3834 Pair[P].Loops); 3835 Pair[P].GroupLoops = Pair[P].Loops; 3836 Pair[P].Group.set(P); 3837 } 3838 3839 SmallBitVector Separable(Pairs); 3840 SmallBitVector Coupled(Pairs); 3841 3842 // partition subscripts into separable and minimally-coupled groups 3843 for (unsigned SI = 0; SI < Pairs; ++SI) { 3844 if (Pair[SI].Classification == Subscript::NonLinear) { 3845 // ignore these, but collect loops for later 3846 collectCommonLoops(Pair[SI].Src, 3847 LI->getLoopFor(Src->getParent()), 3848 Pair[SI].Loops); 3849 collectCommonLoops(Pair[SI].Dst, 3850 LI->getLoopFor(Dst->getParent()), 3851 Pair[SI].Loops); 3852 Result.Consistent = false; 3853 } 3854 else if (Pair[SI].Classification == Subscript::ZIV) 3855 Separable.set(SI); 3856 else { 3857 // SIV, RDIV, or MIV, so check for coupled group 3858 bool Done = true; 3859 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3860 SmallBitVector Intersection = Pair[SI].GroupLoops; 3861 Intersection &= Pair[SJ].GroupLoops; 3862 if (Intersection.any()) { 3863 // accumulate set of all the loops in group 3864 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3865 // accumulate set of all subscripts in group 3866 Pair[SJ].Group |= Pair[SI].Group; 3867 Done = false; 3868 } 3869 } 3870 if (Done) { 3871 if (Pair[SI].Group.count() == 1) 3872 Separable.set(SI); 3873 else 3874 Coupled.set(SI); 3875 } 3876 } 3877 } 3878 3879 Constraint NewConstraint; 3880 NewConstraint.setAny(SE); 3881 3882 // test separable subscripts 3883 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3884 switch (Pair[SI].Classification) { 3885 case Subscript::SIV: { 3886 unsigned Level; 3887 const SCEV *SplitIter = nullptr; 3888 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3889 Result, NewConstraint, SplitIter); 3890 if (Level == SplitLevel) { 3891 assert(SplitIter != nullptr); 3892 return SplitIter; 3893 } 3894 break; 3895 } 3896 case Subscript::ZIV: 3897 case Subscript::RDIV: 3898 case Subscript::MIV: 3899 break; 3900 default: 3901 llvm_unreachable("subscript has unexpected classification"); 3902 } 3903 } 3904 3905 if (Coupled.count()) { 3906 // test coupled subscript groups 3907 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3908 for (unsigned II = 0; II <= MaxLevels; ++II) 3909 Constraints[II].setAny(SE); 3910 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3911 SmallBitVector Group(Pair[SI].Group); 3912 SmallBitVector Sivs(Pairs); 3913 SmallBitVector Mivs(Pairs); 3914 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3915 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3916 if (Pair[SJ].Classification == Subscript::SIV) 3917 Sivs.set(SJ); 3918 else 3919 Mivs.set(SJ); 3920 } 3921 while (Sivs.any()) { 3922 bool Changed = false; 3923 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3924 // SJ is an SIV subscript that's part of the current coupled group 3925 unsigned Level; 3926 const SCEV *SplitIter = nullptr; 3927 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3928 Result, NewConstraint, SplitIter); 3929 if (Level == SplitLevel && SplitIter) 3930 return SplitIter; 3931 ConstrainedLevels.set(Level); 3932 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3933 Changed = true; 3934 Sivs.reset(SJ); 3935 } 3936 if (Changed) { 3937 // propagate, possibly creating new SIVs and ZIVs 3938 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3939 // SJ is an MIV subscript that's part of the current coupled group 3940 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3941 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3942 Pair[SJ].Classification = 3943 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3944 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3945 Pair[SJ].Loops); 3946 switch (Pair[SJ].Classification) { 3947 case Subscript::ZIV: 3948 Mivs.reset(SJ); 3949 break; 3950 case Subscript::SIV: 3951 Sivs.set(SJ); 3952 Mivs.reset(SJ); 3953 break; 3954 case Subscript::RDIV: 3955 case Subscript::MIV: 3956 break; 3957 default: 3958 llvm_unreachable("bad subscript classification"); 3959 } 3960 } 3961 } 3962 } 3963 } 3964 } 3965 } 3966 llvm_unreachable("somehow reached end of routine"); 3967 return nullptr; 3968 } 3969