1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 118 "Dependence Analysis", true, true) 119 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 120 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 121 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 122 INITIALIZE_PASS_END(DependenceAnalysis, "da", 123 "Dependence Analysis", true, true) 124 125 char DependenceAnalysis::ID = 0; 126 127 128 FunctionPass *llvm::createDependenceAnalysisPass() { 129 return new DependenceAnalysis(); 130 } 131 132 133 bool DependenceAnalysis::runOnFunction(Function &F) { 134 this->F = &F; 135 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 136 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 137 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 138 return false; 139 } 140 141 142 void DependenceAnalysis::releaseMemory() { 143 } 144 145 146 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 147 AU.setPreservesAll(); 148 AU.addRequiredTransitive<AAResultsWrapperPass>(); 149 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 150 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 151 } 152 153 154 // Used to test the dependence analyzer. 155 // Looks through the function, noting loads and stores. 156 // Calls depends() on every possible pair and prints out the result. 157 // Ignores all other instructions. 158 static 159 void dumpExampleDependence(raw_ostream &OS, Function *F, 160 DependenceAnalysis *DA) { 161 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 162 SrcI != SrcE; ++SrcI) { 163 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 164 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 165 DstI != DstE; ++DstI) { 166 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 167 OS << "da analyze - "; 168 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 169 D->dump(OS); 170 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 171 if (D->isSplitable(Level)) { 172 OS << "da analyze - split level = " << Level; 173 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 174 OS << "!\n"; 175 } 176 } 177 } 178 else 179 OS << "none!\n"; 180 } 181 } 182 } 183 } 184 } 185 186 187 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 188 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 189 } 190 191 //===----------------------------------------------------------------------===// 192 // Dependence methods 193 194 // Returns true if this is an input dependence. 195 bool Dependence::isInput() const { 196 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 197 } 198 199 200 // Returns true if this is an output dependence. 201 bool Dependence::isOutput() const { 202 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 203 } 204 205 206 // Returns true if this is an flow (aka true) dependence. 207 bool Dependence::isFlow() const { 208 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 209 } 210 211 212 // Returns true if this is an anti dependence. 213 bool Dependence::isAnti() const { 214 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 215 } 216 217 218 // Returns true if a particular level is scalar; that is, 219 // if no subscript in the source or destination mention the induction 220 // variable associated with the loop at this level. 221 // Leave this out of line, so it will serve as a virtual method anchor 222 bool Dependence::isScalar(unsigned level) const { 223 return false; 224 } 225 226 227 //===----------------------------------------------------------------------===// 228 // FullDependence methods 229 230 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 231 bool PossiblyLoopIndependent, 232 unsigned CommonLevels) 233 : Dependence(Source, Destination), Levels(CommonLevels), 234 LoopIndependent(PossiblyLoopIndependent) { 235 Consistent = true; 236 if (CommonLevels) 237 DV = make_unique<DVEntry[]>(CommonLevels); 238 } 239 240 // The rest are simple getters that hide the implementation. 241 242 // getDirection - Returns the direction associated with a particular level. 243 unsigned FullDependence::getDirection(unsigned Level) const { 244 assert(0 < Level && Level <= Levels && "Level out of range"); 245 return DV[Level - 1].Direction; 246 } 247 248 249 // Returns the distance (or NULL) associated with a particular level. 250 const SCEV *FullDependence::getDistance(unsigned Level) const { 251 assert(0 < Level && Level <= Levels && "Level out of range"); 252 return DV[Level - 1].Distance; 253 } 254 255 256 // Returns true if a particular level is scalar; that is, 257 // if no subscript in the source or destination mention the induction 258 // variable associated with the loop at this level. 259 bool FullDependence::isScalar(unsigned Level) const { 260 assert(0 < Level && Level <= Levels && "Level out of range"); 261 return DV[Level - 1].Scalar; 262 } 263 264 265 // Returns true if peeling the first iteration from this loop 266 // will break this dependence. 267 bool FullDependence::isPeelFirst(unsigned Level) const { 268 assert(0 < Level && Level <= Levels && "Level out of range"); 269 return DV[Level - 1].PeelFirst; 270 } 271 272 273 // Returns true if peeling the last iteration from this loop 274 // will break this dependence. 275 bool FullDependence::isPeelLast(unsigned Level) const { 276 assert(0 < Level && Level <= Levels && "Level out of range"); 277 return DV[Level - 1].PeelLast; 278 } 279 280 281 // Returns true if splitting this loop will break the dependence. 282 bool FullDependence::isSplitable(unsigned Level) const { 283 assert(0 < Level && Level <= Levels && "Level out of range"); 284 return DV[Level - 1].Splitable; 285 } 286 287 288 //===----------------------------------------------------------------------===// 289 // DependenceAnalysis::Constraint methods 290 291 // If constraint is a point <X, Y>, returns X. 292 // Otherwise assert. 293 const SCEV *DependenceAnalysis::Constraint::getX() const { 294 assert(Kind == Point && "Kind should be Point"); 295 return A; 296 } 297 298 299 // If constraint is a point <X, Y>, returns Y. 300 // Otherwise assert. 301 const SCEV *DependenceAnalysis::Constraint::getY() const { 302 assert(Kind == Point && "Kind should be Point"); 303 return B; 304 } 305 306 307 // If constraint is a line AX + BY = C, returns A. 308 // Otherwise assert. 309 const SCEV *DependenceAnalysis::Constraint::getA() const { 310 assert((Kind == Line || Kind == Distance) && 311 "Kind should be Line (or Distance)"); 312 return A; 313 } 314 315 316 // If constraint is a line AX + BY = C, returns B. 317 // Otherwise assert. 318 const SCEV *DependenceAnalysis::Constraint::getB() const { 319 assert((Kind == Line || Kind == Distance) && 320 "Kind should be Line (or Distance)"); 321 return B; 322 } 323 324 325 // If constraint is a line AX + BY = C, returns C. 326 // Otherwise assert. 327 const SCEV *DependenceAnalysis::Constraint::getC() const { 328 assert((Kind == Line || Kind == Distance) && 329 "Kind should be Line (or Distance)"); 330 return C; 331 } 332 333 334 // If constraint is a distance, returns D. 335 // Otherwise assert. 336 const SCEV *DependenceAnalysis::Constraint::getD() const { 337 assert(Kind == Distance && "Kind should be Distance"); 338 return SE->getNegativeSCEV(C); 339 } 340 341 342 // Returns the loop associated with this constraint. 343 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 344 assert((Kind == Distance || Kind == Line || Kind == Point) && 345 "Kind should be Distance, Line, or Point"); 346 return AssociatedLoop; 347 } 348 349 350 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 351 const SCEV *Y, 352 const Loop *CurLoop) { 353 Kind = Point; 354 A = X; 355 B = Y; 356 AssociatedLoop = CurLoop; 357 } 358 359 360 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 361 const SCEV *BB, 362 const SCEV *CC, 363 const Loop *CurLoop) { 364 Kind = Line; 365 A = AA; 366 B = BB; 367 C = CC; 368 AssociatedLoop = CurLoop; 369 } 370 371 372 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getConstant(D->getType(), 1); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 382 void DependenceAnalysis::Constraint::setEmpty() { 383 Kind = Empty; 384 } 385 386 387 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 388 SE = NewSE; 389 Kind = Any; 390 } 391 392 393 // For debugging purposes. Dumps the constraint out to OS. 394 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 395 if (isEmpty()) 396 OS << " Empty\n"; 397 else if (isAny()) 398 OS << " Any\n"; 399 else if (isPoint()) 400 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 401 else if (isDistance()) 402 OS << " Distance is " << *getD() << 403 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 404 else if (isLine()) 405 OS << " Line is " << *getA() << "*X + " << 406 *getB() << "*Y = " << *getC() << "\n"; 407 else 408 llvm_unreachable("unknown constraint type in Constraint::dump"); 409 } 410 411 412 // Updates X with the intersection 413 // of the Constraints X and Y. Returns true if X has changed. 414 // Corresponds to Figure 4 from the paper 415 // 416 // Practical Dependence Testing 417 // Goff, Kennedy, Tseng 418 // PLDI 1991 419 bool DependenceAnalysis::intersectConstraints(Constraint *X, 420 const Constraint *Y) { 421 ++DeltaApplications; 422 DEBUG(dbgs() << "\tintersect constraints\n"); 423 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 424 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 425 assert(!Y->isPoint() && "Y must not be a Point"); 426 if (X->isAny()) { 427 if (Y->isAny()) 428 return false; 429 *X = *Y; 430 return true; 431 } 432 if (X->isEmpty()) 433 return false; 434 if (Y->isEmpty()) { 435 X->setEmpty(); 436 return true; 437 } 438 439 if (X->isDistance() && Y->isDistance()) { 440 DEBUG(dbgs() << "\t intersect 2 distances\n"); 441 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 442 return false; 443 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 444 X->setEmpty(); 445 ++DeltaSuccesses; 446 return true; 447 } 448 // Hmmm, interesting situation. 449 // I guess if either is constant, keep it and ignore the other. 450 if (isa<SCEVConstant>(Y->getD())) { 451 *X = *Y; 452 return true; 453 } 454 return false; 455 } 456 457 // At this point, the pseudo-code in Figure 4 of the paper 458 // checks if (X->isPoint() && Y->isPoint()). 459 // This case can't occur in our implementation, 460 // since a Point can only arise as the result of intersecting 461 // two Line constraints, and the right-hand value, Y, is never 462 // the result of an intersection. 463 assert(!(X->isPoint() && Y->isPoint()) && 464 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 465 466 if (X->isLine() && Y->isLine()) { 467 DEBUG(dbgs() << "\t intersect 2 lines\n"); 468 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 469 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 471 // slopes are equal, so lines are parallel 472 DEBUG(dbgs() << "\t\tsame slope\n"); 473 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 474 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 475 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 476 return false; 477 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 478 X->setEmpty(); 479 ++DeltaSuccesses; 480 return true; 481 } 482 return false; 483 } 484 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 485 // slopes differ, so lines intersect 486 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 487 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 488 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 489 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 490 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 491 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 492 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 493 const SCEVConstant *C1A2_C2A1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 495 const SCEVConstant *C1B2_C2B1 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 497 const SCEVConstant *A1B2_A2B1 = 498 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 499 const SCEVConstant *A2B1_A1B2 = 500 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 501 if (!C1B2_C2B1 || !C1A2_C2A1 || 502 !A1B2_A2B1 || !A2B1_A1B2) 503 return false; 504 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 505 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 506 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 507 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 508 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 509 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 510 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 511 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 512 APInt Xq = Xtop; // these need to be initialized, even 513 APInt Xr = Xtop; // though they're just going to be overwritten 514 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 515 APInt Yq = Ytop; 516 APInt Yr = Ytop; 517 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 518 if (Xr != 0 || Yr != 0) { 519 X->setEmpty(); 520 ++DeltaSuccesses; 521 return true; 522 } 523 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 524 if (Xq.slt(0) || Yq.slt(0)) { 525 X->setEmpty(); 526 ++DeltaSuccesses; 527 return true; 528 } 529 if (const SCEVConstant *CUB = 530 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 531 APInt UpperBound = CUB->getValue()->getValue(); 532 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 533 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 534 X->setEmpty(); 535 ++DeltaSuccesses; 536 return true; 537 } 538 } 539 X->setPoint(SE->getConstant(Xq), 540 SE->getConstant(Yq), 541 X->getAssociatedLoop()); 542 ++DeltaSuccesses; 543 return true; 544 } 545 return false; 546 } 547 548 // if (X->isLine() && Y->isPoint()) This case can't occur. 549 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 550 551 if (X->isPoint() && Y->isLine()) { 552 DEBUG(dbgs() << "\t intersect Point and Line\n"); 553 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 554 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 555 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 556 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 557 return false; 558 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 559 X->setEmpty(); 560 ++DeltaSuccesses; 561 return true; 562 } 563 return false; 564 } 565 566 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 567 return false; 568 } 569 570 571 //===----------------------------------------------------------------------===// 572 // DependenceAnalysis methods 573 574 // For debugging purposes. Dumps a dependence to OS. 575 void Dependence::dump(raw_ostream &OS) const { 576 bool Splitable = false; 577 if (isConfused()) 578 OS << "confused"; 579 else { 580 if (isConsistent()) 581 OS << "consistent "; 582 if (isFlow()) 583 OS << "flow"; 584 else if (isOutput()) 585 OS << "output"; 586 else if (isAnti()) 587 OS << "anti"; 588 else if (isInput()) 589 OS << "input"; 590 unsigned Levels = getLevels(); 591 OS << " ["; 592 for (unsigned II = 1; II <= Levels; ++II) { 593 if (isSplitable(II)) 594 Splitable = true; 595 if (isPeelFirst(II)) 596 OS << 'p'; 597 const SCEV *Distance = getDistance(II); 598 if (Distance) 599 OS << *Distance; 600 else if (isScalar(II)) 601 OS << "S"; 602 else { 603 unsigned Direction = getDirection(II); 604 if (Direction == DVEntry::ALL) 605 OS << "*"; 606 else { 607 if (Direction & DVEntry::LT) 608 OS << "<"; 609 if (Direction & DVEntry::EQ) 610 OS << "="; 611 if (Direction & DVEntry::GT) 612 OS << ">"; 613 } 614 } 615 if (isPeelLast(II)) 616 OS << 'p'; 617 if (II < Levels) 618 OS << " "; 619 } 620 if (isLoopIndependent()) 621 OS << "|<"; 622 OS << "]"; 623 if (Splitable) 624 OS << " splitable"; 625 } 626 OS << "!\n"; 627 } 628 629 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 630 const DataLayout &DL, const Value *A, 631 const Value *B) { 632 const Value *AObj = GetUnderlyingObject(A, DL); 633 const Value *BObj = GetUnderlyingObject(B, DL); 634 return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), 635 BObj, DL.getTypeStoreSize(BObj->getType())); 636 } 637 638 639 // Returns true if the load or store can be analyzed. Atomic and volatile 640 // operations have properties which this analysis does not understand. 641 static 642 bool isLoadOrStore(const Instruction *I) { 643 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 644 return LI->isUnordered(); 645 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 646 return SI->isUnordered(); 647 return false; 648 } 649 650 651 static 652 Value *getPointerOperand(Instruction *I) { 653 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 654 return LI->getPointerOperand(); 655 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 656 return SI->getPointerOperand(); 657 llvm_unreachable("Value is not load or store instruction"); 658 return nullptr; 659 } 660 661 662 // Examines the loop nesting of the Src and Dst 663 // instructions and establishes their shared loops. Sets the variables 664 // CommonLevels, SrcLevels, and MaxLevels. 665 // The source and destination instructions needn't be contained in the same 666 // loop. The routine establishNestingLevels finds the level of most deeply 667 // nested loop that contains them both, CommonLevels. An instruction that's 668 // not contained in a loop is at level = 0. MaxLevels is equal to the level 669 // of the source plus the level of the destination, minus CommonLevels. 670 // This lets us allocate vectors MaxLevels in length, with room for every 671 // distinct loop referenced in both the source and destination subscripts. 672 // The variable SrcLevels is the nesting depth of the source instruction. 673 // It's used to help calculate distinct loops referenced by the destination. 674 // Here's the map from loops to levels: 675 // 0 - unused 676 // 1 - outermost common loop 677 // ... - other common loops 678 // CommonLevels - innermost common loop 679 // ... - loops containing Src but not Dst 680 // SrcLevels - innermost loop containing Src but not Dst 681 // ... - loops containing Dst but not Src 682 // MaxLevels - innermost loops containing Dst but not Src 683 // Consider the follow code fragment: 684 // for (a = ...) { 685 // for (b = ...) { 686 // for (c = ...) { 687 // for (d = ...) { 688 // A[] = ...; 689 // } 690 // } 691 // for (e = ...) { 692 // for (f = ...) { 693 // for (g = ...) { 694 // ... = A[]; 695 // } 696 // } 697 // } 698 // } 699 // } 700 // If we're looking at the possibility of a dependence between the store 701 // to A (the Src) and the load from A (the Dst), we'll note that they 702 // have 2 loops in common, so CommonLevels will equal 2 and the direction 703 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 704 // A map from loop names to loop numbers would look like 705 // a - 1 706 // b - 2 = CommonLevels 707 // c - 3 708 // d - 4 = SrcLevels 709 // e - 5 710 // f - 6 711 // g - 7 = MaxLevels 712 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 713 const Instruction *Dst) { 714 const BasicBlock *SrcBlock = Src->getParent(); 715 const BasicBlock *DstBlock = Dst->getParent(); 716 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 717 unsigned DstLevel = LI->getLoopDepth(DstBlock); 718 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 719 const Loop *DstLoop = LI->getLoopFor(DstBlock); 720 SrcLevels = SrcLevel; 721 MaxLevels = SrcLevel + DstLevel; 722 while (SrcLevel > DstLevel) { 723 SrcLoop = SrcLoop->getParentLoop(); 724 SrcLevel--; 725 } 726 while (DstLevel > SrcLevel) { 727 DstLoop = DstLoop->getParentLoop(); 728 DstLevel--; 729 } 730 while (SrcLoop != DstLoop) { 731 SrcLoop = SrcLoop->getParentLoop(); 732 DstLoop = DstLoop->getParentLoop(); 733 SrcLevel--; 734 } 735 CommonLevels = SrcLevel; 736 MaxLevels -= CommonLevels; 737 } 738 739 740 // Given one of the loops containing the source, return 741 // its level index in our numbering scheme. 742 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 743 return SrcLoop->getLoopDepth(); 744 } 745 746 747 // Given one of the loops containing the destination, 748 // return its level index in our numbering scheme. 749 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 750 unsigned D = DstLoop->getLoopDepth(); 751 if (D > CommonLevels) 752 return D - CommonLevels + SrcLevels; 753 else 754 return D; 755 } 756 757 758 // Returns true if Expression is loop invariant in LoopNest. 759 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 760 const Loop *LoopNest) const { 761 if (!LoopNest) 762 return true; 763 return SE->isLoopInvariant(Expression, LoopNest) && 764 isLoopInvariant(Expression, LoopNest->getParentLoop()); 765 } 766 767 768 769 // Finds the set of loops from the LoopNest that 770 // have a level <= CommonLevels and are referred to by the SCEV Expression. 771 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 772 const Loop *LoopNest, 773 SmallBitVector &Loops) const { 774 while (LoopNest) { 775 unsigned Level = LoopNest->getLoopDepth(); 776 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 777 Loops.set(Level); 778 LoopNest = LoopNest->getParentLoop(); 779 } 780 } 781 782 void DependenceAnalysis::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 783 784 unsigned widestWidthSeen = 0; 785 Type *widestType; 786 787 // Go through each pair and find the widest bit to which we need 788 // to extend all of them. 789 for (unsigned i = 0; i < Pairs.size(); i++) { 790 const SCEV *Src = Pairs[i]->Src; 791 const SCEV *Dst = Pairs[i]->Dst; 792 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 793 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 794 if (SrcTy == nullptr || DstTy == nullptr) { 795 assert(SrcTy == DstTy && "This function only unify integer types and " 796 "expect Src and Dst share the same type " 797 "otherwise."); 798 continue; 799 } 800 if (SrcTy->getBitWidth() > widestWidthSeen) { 801 widestWidthSeen = SrcTy->getBitWidth(); 802 widestType = SrcTy; 803 } 804 if (DstTy->getBitWidth() > widestWidthSeen) { 805 widestWidthSeen = DstTy->getBitWidth(); 806 widestType = DstTy; 807 } 808 } 809 810 811 assert(widestWidthSeen > 0); 812 813 // Now extend each pair to the widest seen. 814 for (unsigned i = 0; i < Pairs.size(); i++) { 815 const SCEV *Src = Pairs[i]->Src; 816 const SCEV *Dst = Pairs[i]->Dst; 817 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 818 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 819 if (SrcTy == nullptr || DstTy == nullptr) { 820 assert(SrcTy == DstTy && "This function only unify integer types and " 821 "expect Src and Dst share the same type " 822 "otherwise."); 823 continue; 824 } 825 if (SrcTy->getBitWidth() < widestWidthSeen) 826 // Sign-extend Src to widestType 827 Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType); 828 if (DstTy->getBitWidth() < widestWidthSeen) { 829 // Sign-extend Dst to widestType 830 Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType); 831 } 832 } 833 } 834 835 // removeMatchingExtensions - Examines a subscript pair. 836 // If the source and destination are identically sign (or zero) 837 // extended, it strips off the extension in an effect to simplify 838 // the actual analysis. 839 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 840 const SCEV *Src = Pair->Src; 841 const SCEV *Dst = Pair->Dst; 842 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 843 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 844 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 845 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 846 const SCEV *SrcCastOp = SrcCast->getOperand(); 847 const SCEV *DstCastOp = DstCast->getOperand(); 848 if (SrcCastOp->getType() == DstCastOp->getType()) { 849 Pair->Src = SrcCastOp; 850 Pair->Dst = DstCastOp; 851 } 852 } 853 } 854 855 856 // Examine the scev and return true iff it's linear. 857 // Collect any loops mentioned in the set of "Loops". 858 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 859 const Loop *LoopNest, 860 SmallBitVector &Loops) { 861 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 862 if (!AddRec) 863 return isLoopInvariant(Src, LoopNest); 864 const SCEV *Start = AddRec->getStart(); 865 const SCEV *Step = AddRec->getStepRecurrence(*SE); 866 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 867 if (!isa<SCEVCouldNotCompute>(UB)) { 868 if (SE->getTypeSizeInBits(Start->getType()) < 869 SE->getTypeSizeInBits(UB->getType())) { 870 if (!AddRec->getNoWrapFlags()) 871 return false; 872 } 873 } 874 if (!isLoopInvariant(Step, LoopNest)) 875 return false; 876 Loops.set(mapSrcLoop(AddRec->getLoop())); 877 return checkSrcSubscript(Start, LoopNest, Loops); 878 } 879 880 881 882 // Examine the scev and return true iff it's linear. 883 // Collect any loops mentioned in the set of "Loops". 884 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 885 const Loop *LoopNest, 886 SmallBitVector &Loops) { 887 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 888 if (!AddRec) 889 return isLoopInvariant(Dst, LoopNest); 890 const SCEV *Start = AddRec->getStart(); 891 const SCEV *Step = AddRec->getStepRecurrence(*SE); 892 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 893 if (!isa<SCEVCouldNotCompute>(UB)) { 894 if (SE->getTypeSizeInBits(Start->getType()) < 895 SE->getTypeSizeInBits(UB->getType())) { 896 if (!AddRec->getNoWrapFlags()) 897 return false; 898 } 899 } 900 if (!isLoopInvariant(Step, LoopNest)) 901 return false; 902 Loops.set(mapDstLoop(AddRec->getLoop())); 903 return checkDstSubscript(Start, LoopNest, Loops); 904 } 905 906 907 // Examines the subscript pair (the Src and Dst SCEVs) 908 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 909 // Collects the associated loops in a set. 910 DependenceAnalysis::Subscript::ClassificationKind 911 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 912 const SCEV *Dst, const Loop *DstLoopNest, 913 SmallBitVector &Loops) { 914 SmallBitVector SrcLoops(MaxLevels + 1); 915 SmallBitVector DstLoops(MaxLevels + 1); 916 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 917 return Subscript::NonLinear; 918 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 919 return Subscript::NonLinear; 920 Loops = SrcLoops; 921 Loops |= DstLoops; 922 unsigned N = Loops.count(); 923 if (N == 0) 924 return Subscript::ZIV; 925 if (N == 1) 926 return Subscript::SIV; 927 if (N == 2 && (SrcLoops.count() == 0 || 928 DstLoops.count() == 0 || 929 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 930 return Subscript::RDIV; 931 return Subscript::MIV; 932 } 933 934 935 // A wrapper around SCEV::isKnownPredicate. 936 // Looks for cases where we're interested in comparing for equality. 937 // If both X and Y have been identically sign or zero extended, 938 // it strips off the (confusing) extensions before invoking 939 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 940 // will be similarly updated. 941 // 942 // If SCEV::isKnownPredicate can't prove the predicate, 943 // we try simple subtraction, which seems to help in some cases 944 // involving symbolics. 945 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 946 const SCEV *X, 947 const SCEV *Y) const { 948 if (Pred == CmpInst::ICMP_EQ || 949 Pred == CmpInst::ICMP_NE) { 950 if ((isa<SCEVSignExtendExpr>(X) && 951 isa<SCEVSignExtendExpr>(Y)) || 952 (isa<SCEVZeroExtendExpr>(X) && 953 isa<SCEVZeroExtendExpr>(Y))) { 954 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 955 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 956 const SCEV *Xop = CX->getOperand(); 957 const SCEV *Yop = CY->getOperand(); 958 if (Xop->getType() == Yop->getType()) { 959 X = Xop; 960 Y = Yop; 961 } 962 } 963 } 964 if (SE->isKnownPredicate(Pred, X, Y)) 965 return true; 966 // If SE->isKnownPredicate can't prove the condition, 967 // we try the brute-force approach of subtracting 968 // and testing the difference. 969 // By testing with SE->isKnownPredicate first, we avoid 970 // the possibility of overflow when the arguments are constants. 971 const SCEV *Delta = SE->getMinusSCEV(X, Y); 972 switch (Pred) { 973 case CmpInst::ICMP_EQ: 974 return Delta->isZero(); 975 case CmpInst::ICMP_NE: 976 return SE->isKnownNonZero(Delta); 977 case CmpInst::ICMP_SGE: 978 return SE->isKnownNonNegative(Delta); 979 case CmpInst::ICMP_SLE: 980 return SE->isKnownNonPositive(Delta); 981 case CmpInst::ICMP_SGT: 982 return SE->isKnownPositive(Delta); 983 case CmpInst::ICMP_SLT: 984 return SE->isKnownNegative(Delta); 985 default: 986 llvm_unreachable("unexpected predicate in isKnownPredicate"); 987 } 988 } 989 990 991 // All subscripts are all the same type. 992 // Loop bound may be smaller (e.g., a char). 993 // Should zero extend loop bound, since it's always >= 0. 994 // This routine collects upper bound and extends or truncates if needed. 995 // Truncating is safe when subscripts are known not to wrap. Cases without 996 // nowrap flags should have been rejected earlier. 997 // Return null if no bound available. 998 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 999 Type *T) const { 1000 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1001 const SCEV *UB = SE->getBackedgeTakenCount(L); 1002 return SE->getTruncateOrZeroExtend(UB, T); 1003 } 1004 return nullptr; 1005 } 1006 1007 1008 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1009 // If the cast fails, returns NULL. 1010 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 1011 Type *T 1012 ) const { 1013 if (const SCEV *UB = collectUpperBound(L, T)) 1014 return dyn_cast<SCEVConstant>(UB); 1015 return nullptr; 1016 } 1017 1018 1019 // testZIV - 1020 // When we have a pair of subscripts of the form [c1] and [c2], 1021 // where c1 and c2 are both loop invariant, we attack it using 1022 // the ZIV test. Basically, we test by comparing the two values, 1023 // but there are actually three possible results: 1024 // 1) the values are equal, so there's a dependence 1025 // 2) the values are different, so there's no dependence 1026 // 3) the values might be equal, so we have to assume a dependence. 1027 // 1028 // Return true if dependence disproved. 1029 bool DependenceAnalysis::testZIV(const SCEV *Src, 1030 const SCEV *Dst, 1031 FullDependence &Result) const { 1032 DEBUG(dbgs() << " src = " << *Src << "\n"); 1033 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1034 ++ZIVapplications; 1035 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1036 DEBUG(dbgs() << " provably dependent\n"); 1037 return false; // provably dependent 1038 } 1039 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1040 DEBUG(dbgs() << " provably independent\n"); 1041 ++ZIVindependence; 1042 return true; // provably independent 1043 } 1044 DEBUG(dbgs() << " possibly dependent\n"); 1045 Result.Consistent = false; 1046 return false; // possibly dependent 1047 } 1048 1049 1050 // strongSIVtest - 1051 // From the paper, Practical Dependence Testing, Section 4.2.1 1052 // 1053 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1054 // where i is an induction variable, c1 and c2 are loop invariant, 1055 // and a is a constant, we can solve it exactly using the Strong SIV test. 1056 // 1057 // Can prove independence. Failing that, can compute distance (and direction). 1058 // In the presence of symbolic terms, we can sometimes make progress. 1059 // 1060 // If there's a dependence, 1061 // 1062 // c1 + a*i = c2 + a*i' 1063 // 1064 // The dependence distance is 1065 // 1066 // d = i' - i = (c1 - c2)/a 1067 // 1068 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1069 // loop's upper bound. If a dependence exists, the dependence direction is 1070 // defined as 1071 // 1072 // { < if d > 0 1073 // direction = { = if d = 0 1074 // { > if d < 0 1075 // 1076 // Return true if dependence disproved. 1077 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1078 const SCEV *SrcConst, 1079 const SCEV *DstConst, 1080 const Loop *CurLoop, 1081 unsigned Level, 1082 FullDependence &Result, 1083 Constraint &NewConstraint) const { 1084 DEBUG(dbgs() << "\tStrong SIV test\n"); 1085 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1086 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1087 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1088 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1089 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1090 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1091 ++StrongSIVapplications; 1092 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1093 Level--; 1094 1095 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1096 DEBUG(dbgs() << "\t Delta = " << *Delta); 1097 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1098 1099 // check that |Delta| < iteration count 1100 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1101 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1102 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1103 const SCEV *AbsDelta = 1104 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1105 const SCEV *AbsCoeff = 1106 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1107 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1108 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1109 // Distance greater than trip count - no dependence 1110 ++StrongSIVindependence; 1111 ++StrongSIVsuccesses; 1112 return true; 1113 } 1114 } 1115 1116 // Can we compute distance? 1117 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1118 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1119 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1120 APInt Distance = ConstDelta; // these need to be initialized 1121 APInt Remainder = ConstDelta; 1122 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1123 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1124 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1125 // Make sure Coeff divides Delta exactly 1126 if (Remainder != 0) { 1127 // Coeff doesn't divide Distance, no dependence 1128 ++StrongSIVindependence; 1129 ++StrongSIVsuccesses; 1130 return true; 1131 } 1132 Result.DV[Level].Distance = SE->getConstant(Distance); 1133 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1134 if (Distance.sgt(0)) 1135 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1136 else if (Distance.slt(0)) 1137 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1138 else 1139 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1140 ++StrongSIVsuccesses; 1141 } 1142 else if (Delta->isZero()) { 1143 // since 0/X == 0 1144 Result.DV[Level].Distance = Delta; 1145 NewConstraint.setDistance(Delta, CurLoop); 1146 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1147 ++StrongSIVsuccesses; 1148 } 1149 else { 1150 if (Coeff->isOne()) { 1151 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1152 Result.DV[Level].Distance = Delta; // since X/1 == X 1153 NewConstraint.setDistance(Delta, CurLoop); 1154 } 1155 else { 1156 Result.Consistent = false; 1157 NewConstraint.setLine(Coeff, 1158 SE->getNegativeSCEV(Coeff), 1159 SE->getNegativeSCEV(Delta), CurLoop); 1160 } 1161 1162 // maybe we can get a useful direction 1163 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1164 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1165 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1166 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1167 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1168 // The double negatives above are confusing. 1169 // It helps to read !SE->isKnownNonZero(Delta) 1170 // as "Delta might be Zero" 1171 unsigned NewDirection = Dependence::DVEntry::NONE; 1172 if ((DeltaMaybePositive && CoeffMaybePositive) || 1173 (DeltaMaybeNegative && CoeffMaybeNegative)) 1174 NewDirection = Dependence::DVEntry::LT; 1175 if (DeltaMaybeZero) 1176 NewDirection |= Dependence::DVEntry::EQ; 1177 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1178 (DeltaMaybePositive && CoeffMaybeNegative)) 1179 NewDirection |= Dependence::DVEntry::GT; 1180 if (NewDirection < Result.DV[Level].Direction) 1181 ++StrongSIVsuccesses; 1182 Result.DV[Level].Direction &= NewDirection; 1183 } 1184 return false; 1185 } 1186 1187 1188 // weakCrossingSIVtest - 1189 // From the paper, Practical Dependence Testing, Section 4.2.2 1190 // 1191 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1192 // where i is an induction variable, c1 and c2 are loop invariant, 1193 // and a is a constant, we can solve it exactly using the 1194 // Weak-Crossing SIV test. 1195 // 1196 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1197 // the two lines, where i = i', yielding 1198 // 1199 // c1 + a*i = c2 - a*i 1200 // 2a*i = c2 - c1 1201 // i = (c2 - c1)/2a 1202 // 1203 // If i < 0, there is no dependence. 1204 // If i > upperbound, there is no dependence. 1205 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1206 // If i = upperbound, there's a dependence with distance = 0. 1207 // If i is integral, there's a dependence (all directions). 1208 // If the non-integer part = 1/2, there's a dependence (<> directions). 1209 // Otherwise, there's no dependence. 1210 // 1211 // Can prove independence. Failing that, 1212 // can sometimes refine the directions. 1213 // Can determine iteration for splitting. 1214 // 1215 // Return true if dependence disproved. 1216 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1217 const SCEV *SrcConst, 1218 const SCEV *DstConst, 1219 const Loop *CurLoop, 1220 unsigned Level, 1221 FullDependence &Result, 1222 Constraint &NewConstraint, 1223 const SCEV *&SplitIter) const { 1224 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1225 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1226 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1227 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1228 ++WeakCrossingSIVapplications; 1229 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1230 Level--; 1231 Result.Consistent = false; 1232 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1233 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1234 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1235 if (Delta->isZero()) { 1236 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1237 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1238 ++WeakCrossingSIVsuccesses; 1239 if (!Result.DV[Level].Direction) { 1240 ++WeakCrossingSIVindependence; 1241 return true; 1242 } 1243 Result.DV[Level].Distance = Delta; // = 0 1244 return false; 1245 } 1246 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1247 if (!ConstCoeff) 1248 return false; 1249 1250 Result.DV[Level].Splitable = true; 1251 if (SE->isKnownNegative(ConstCoeff)) { 1252 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1253 assert(ConstCoeff && 1254 "dynamic cast of negative of ConstCoeff should yield constant"); 1255 Delta = SE->getNegativeSCEV(Delta); 1256 } 1257 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1258 1259 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1260 SplitIter = 1261 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1262 Delta), 1263 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1264 ConstCoeff)); 1265 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1266 1267 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1268 if (!ConstDelta) 1269 return false; 1270 1271 // We're certain that ConstCoeff > 0; therefore, 1272 // if Delta < 0, then no dependence. 1273 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1274 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1275 if (SE->isKnownNegative(Delta)) { 1276 // No dependence, Delta < 0 1277 ++WeakCrossingSIVindependence; 1278 ++WeakCrossingSIVsuccesses; 1279 return true; 1280 } 1281 1282 // We're certain that Delta > 0 and ConstCoeff > 0. 1283 // Check Delta/(2*ConstCoeff) against upper loop bound 1284 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1285 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1286 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1287 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1288 ConstantTwo); 1289 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1290 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1291 // Delta too big, no dependence 1292 ++WeakCrossingSIVindependence; 1293 ++WeakCrossingSIVsuccesses; 1294 return true; 1295 } 1296 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1297 // i = i' = UB 1298 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1299 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1300 ++WeakCrossingSIVsuccesses; 1301 if (!Result.DV[Level].Direction) { 1302 ++WeakCrossingSIVindependence; 1303 return true; 1304 } 1305 Result.DV[Level].Splitable = false; 1306 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1307 return false; 1308 } 1309 } 1310 1311 // check that Coeff divides Delta 1312 APInt APDelta = ConstDelta->getValue()->getValue(); 1313 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1314 APInt Distance = APDelta; // these need to be initialzed 1315 APInt Remainder = APDelta; 1316 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1317 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1318 if (Remainder != 0) { 1319 // Coeff doesn't divide Delta, no dependence 1320 ++WeakCrossingSIVindependence; 1321 ++WeakCrossingSIVsuccesses; 1322 return true; 1323 } 1324 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1325 1326 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1327 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1328 Remainder = Distance.srem(Two); 1329 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1330 if (Remainder != 0) { 1331 // Equal direction isn't possible 1332 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1333 ++WeakCrossingSIVsuccesses; 1334 } 1335 return false; 1336 } 1337 1338 1339 // Kirch's algorithm, from 1340 // 1341 // Optimizing Supercompilers for Supercomputers 1342 // Michael Wolfe 1343 // MIT Press, 1989 1344 // 1345 // Program 2.1, page 29. 1346 // Computes the GCD of AM and BM. 1347 // Also finds a solution to the equation ax - by = gcd(a, b). 1348 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1349 static 1350 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1351 APInt &G, APInt &X, APInt &Y) { 1352 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1353 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1354 APInt G0 = AM.abs(); 1355 APInt G1 = BM.abs(); 1356 APInt Q = G0; // these need to be initialized 1357 APInt R = G0; 1358 APInt::sdivrem(G0, G1, Q, R); 1359 while (R != 0) { 1360 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1361 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1362 G0 = G1; G1 = R; 1363 APInt::sdivrem(G0, G1, Q, R); 1364 } 1365 G = G1; 1366 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1367 X = AM.slt(0) ? -A1 : A1; 1368 Y = BM.slt(0) ? B1 : -B1; 1369 1370 // make sure gcd divides Delta 1371 R = Delta.srem(G); 1372 if (R != 0) 1373 return true; // gcd doesn't divide Delta, no dependence 1374 Q = Delta.sdiv(G); 1375 X *= Q; 1376 Y *= Q; 1377 return false; 1378 } 1379 1380 1381 static 1382 APInt floorOfQuotient(APInt A, APInt B) { 1383 APInt Q = A; // these need to be initialized 1384 APInt R = A; 1385 APInt::sdivrem(A, B, Q, R); 1386 if (R == 0) 1387 return Q; 1388 if ((A.sgt(0) && B.sgt(0)) || 1389 (A.slt(0) && B.slt(0))) 1390 return Q; 1391 else 1392 return Q - 1; 1393 } 1394 1395 1396 static 1397 APInt ceilingOfQuotient(APInt A, APInt B) { 1398 APInt Q = A; // these need to be initialized 1399 APInt R = A; 1400 APInt::sdivrem(A, B, Q, R); 1401 if (R == 0) 1402 return Q; 1403 if ((A.sgt(0) && B.sgt(0)) || 1404 (A.slt(0) && B.slt(0))) 1405 return Q + 1; 1406 else 1407 return Q; 1408 } 1409 1410 1411 static 1412 APInt maxAPInt(APInt A, APInt B) { 1413 return A.sgt(B) ? A : B; 1414 } 1415 1416 1417 static 1418 APInt minAPInt(APInt A, APInt B) { 1419 return A.slt(B) ? A : B; 1420 } 1421 1422 1423 // exactSIVtest - 1424 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1425 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1426 // and a2 are constant, we can solve it exactly using an algorithm developed 1427 // by Banerjee and Wolfe. See Section 2.5.3 in 1428 // 1429 // Optimizing Supercompilers for Supercomputers 1430 // Michael Wolfe 1431 // MIT Press, 1989 1432 // 1433 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1434 // so use them if possible. They're also a bit better with symbolics and, 1435 // in the case of the strong SIV test, can compute Distances. 1436 // 1437 // Return true if dependence disproved. 1438 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1439 const SCEV *DstCoeff, 1440 const SCEV *SrcConst, 1441 const SCEV *DstConst, 1442 const Loop *CurLoop, 1443 unsigned Level, 1444 FullDependence &Result, 1445 Constraint &NewConstraint) const { 1446 DEBUG(dbgs() << "\tExact SIV test\n"); 1447 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1448 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1449 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1450 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1451 ++ExactSIVapplications; 1452 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1453 Level--; 1454 Result.Consistent = false; 1455 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1456 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1457 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1458 Delta, CurLoop); 1459 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1460 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1461 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1462 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1463 return false; 1464 1465 // find gcd 1466 APInt G, X, Y; 1467 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1468 APInt BM = ConstDstCoeff->getValue()->getValue(); 1469 unsigned Bits = AM.getBitWidth(); 1470 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1471 // gcd doesn't divide Delta, no dependence 1472 ++ExactSIVindependence; 1473 ++ExactSIVsuccesses; 1474 return true; 1475 } 1476 1477 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1478 1479 // since SCEV construction normalizes, LM = 0 1480 APInt UM(Bits, 1, true); 1481 bool UMvalid = false; 1482 // UM is perhaps unavailable, let's check 1483 if (const SCEVConstant *CUB = 1484 collectConstantUpperBound(CurLoop, Delta->getType())) { 1485 UM = CUB->getValue()->getValue(); 1486 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1487 UMvalid = true; 1488 } 1489 1490 APInt TU(APInt::getSignedMaxValue(Bits)); 1491 APInt TL(APInt::getSignedMinValue(Bits)); 1492 1493 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1494 APInt TMUL = BM.sdiv(G); 1495 if (TMUL.sgt(0)) { 1496 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1497 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1498 if (UMvalid) { 1499 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1500 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1501 } 1502 } 1503 else { 1504 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1505 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1506 if (UMvalid) { 1507 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1508 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1509 } 1510 } 1511 1512 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1513 TMUL = AM.sdiv(G); 1514 if (TMUL.sgt(0)) { 1515 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1516 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1517 if (UMvalid) { 1518 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1519 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1520 } 1521 } 1522 else { 1523 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1524 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1525 if (UMvalid) { 1526 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1527 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1528 } 1529 } 1530 if (TL.sgt(TU)) { 1531 ++ExactSIVindependence; 1532 ++ExactSIVsuccesses; 1533 return true; 1534 } 1535 1536 // explore directions 1537 unsigned NewDirection = Dependence::DVEntry::NONE; 1538 1539 // less than 1540 APInt SaveTU(TU); // save these 1541 APInt SaveTL(TL); 1542 DEBUG(dbgs() << "\t exploring LT direction\n"); 1543 TMUL = AM - BM; 1544 if (TMUL.sgt(0)) { 1545 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1546 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1547 } 1548 else { 1549 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1550 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1551 } 1552 if (TL.sle(TU)) { 1553 NewDirection |= Dependence::DVEntry::LT; 1554 ++ExactSIVsuccesses; 1555 } 1556 1557 // equal 1558 TU = SaveTU; // restore 1559 TL = SaveTL; 1560 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1561 if (TMUL.sgt(0)) { 1562 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1563 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1564 } 1565 else { 1566 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1567 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1568 } 1569 TMUL = BM - AM; 1570 if (TMUL.sgt(0)) { 1571 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1572 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1573 } 1574 else { 1575 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1576 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1577 } 1578 if (TL.sle(TU)) { 1579 NewDirection |= Dependence::DVEntry::EQ; 1580 ++ExactSIVsuccesses; 1581 } 1582 1583 // greater than 1584 TU = SaveTU; // restore 1585 TL = SaveTL; 1586 DEBUG(dbgs() << "\t exploring GT direction\n"); 1587 if (TMUL.sgt(0)) { 1588 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1589 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1590 } 1591 else { 1592 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1593 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1594 } 1595 if (TL.sle(TU)) { 1596 NewDirection |= Dependence::DVEntry::GT; 1597 ++ExactSIVsuccesses; 1598 } 1599 1600 // finished 1601 Result.DV[Level].Direction &= NewDirection; 1602 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1603 ++ExactSIVindependence; 1604 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1605 } 1606 1607 1608 1609 // Return true if the divisor evenly divides the dividend. 1610 static 1611 bool isRemainderZero(const SCEVConstant *Dividend, 1612 const SCEVConstant *Divisor) { 1613 APInt ConstDividend = Dividend->getValue()->getValue(); 1614 APInt ConstDivisor = Divisor->getValue()->getValue(); 1615 return ConstDividend.srem(ConstDivisor) == 0; 1616 } 1617 1618 1619 // weakZeroSrcSIVtest - 1620 // From the paper, Practical Dependence Testing, Section 4.2.2 1621 // 1622 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1623 // where i is an induction variable, c1 and c2 are loop invariant, 1624 // and a is a constant, we can solve it exactly using the 1625 // Weak-Zero SIV test. 1626 // 1627 // Given 1628 // 1629 // c1 = c2 + a*i 1630 // 1631 // we get 1632 // 1633 // (c1 - c2)/a = i 1634 // 1635 // If i is not an integer, there's no dependence. 1636 // If i < 0 or > UB, there's no dependence. 1637 // If i = 0, the direction is <= and peeling the 1638 // 1st iteration will break the dependence. 1639 // If i = UB, the direction is >= and peeling the 1640 // last iteration will break the dependence. 1641 // Otherwise, the direction is *. 1642 // 1643 // Can prove independence. Failing that, we can sometimes refine 1644 // the directions. Can sometimes show that first or last 1645 // iteration carries all the dependences (so worth peeling). 1646 // 1647 // (see also weakZeroDstSIVtest) 1648 // 1649 // Return true if dependence disproved. 1650 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1651 const SCEV *SrcConst, 1652 const SCEV *DstConst, 1653 const Loop *CurLoop, 1654 unsigned Level, 1655 FullDependence &Result, 1656 Constraint &NewConstraint) const { 1657 // For the WeakSIV test, it's possible the loop isn't common to 1658 // the Src and Dst loops. If it isn't, then there's no need to 1659 // record a direction. 1660 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1661 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1662 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1663 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1664 ++WeakZeroSIVapplications; 1665 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1666 Level--; 1667 Result.Consistent = false; 1668 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1669 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1670 DstCoeff, Delta, CurLoop); 1671 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1672 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1673 if (Level < CommonLevels) { 1674 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1675 Result.DV[Level].PeelFirst = true; 1676 ++WeakZeroSIVsuccesses; 1677 } 1678 return false; // dependences caused by first iteration 1679 } 1680 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1681 if (!ConstCoeff) 1682 return false; 1683 const SCEV *AbsCoeff = 1684 SE->isKnownNegative(ConstCoeff) ? 1685 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1686 const SCEV *NewDelta = 1687 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1688 1689 // check that Delta/SrcCoeff < iteration count 1690 // really check NewDelta < count*AbsCoeff 1691 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1692 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1693 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1694 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1695 ++WeakZeroSIVindependence; 1696 ++WeakZeroSIVsuccesses; 1697 return true; 1698 } 1699 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1700 // dependences caused by last iteration 1701 if (Level < CommonLevels) { 1702 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1703 Result.DV[Level].PeelLast = true; 1704 ++WeakZeroSIVsuccesses; 1705 } 1706 return false; 1707 } 1708 } 1709 1710 // check that Delta/SrcCoeff >= 0 1711 // really check that NewDelta >= 0 1712 if (SE->isKnownNegative(NewDelta)) { 1713 // No dependence, newDelta < 0 1714 ++WeakZeroSIVindependence; 1715 ++WeakZeroSIVsuccesses; 1716 return true; 1717 } 1718 1719 // if SrcCoeff doesn't divide Delta, then no dependence 1720 if (isa<SCEVConstant>(Delta) && 1721 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1722 ++WeakZeroSIVindependence; 1723 ++WeakZeroSIVsuccesses; 1724 return true; 1725 } 1726 return false; 1727 } 1728 1729 1730 // weakZeroDstSIVtest - 1731 // From the paper, Practical Dependence Testing, Section 4.2.2 1732 // 1733 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1734 // where i is an induction variable, c1 and c2 are loop invariant, 1735 // and a is a constant, we can solve it exactly using the 1736 // Weak-Zero SIV test. 1737 // 1738 // Given 1739 // 1740 // c1 + a*i = c2 1741 // 1742 // we get 1743 // 1744 // i = (c2 - c1)/a 1745 // 1746 // If i is not an integer, there's no dependence. 1747 // If i < 0 or > UB, there's no dependence. 1748 // If i = 0, the direction is <= and peeling the 1749 // 1st iteration will break the dependence. 1750 // If i = UB, the direction is >= and peeling the 1751 // last iteration will break the dependence. 1752 // Otherwise, the direction is *. 1753 // 1754 // Can prove independence. Failing that, we can sometimes refine 1755 // the directions. Can sometimes show that first or last 1756 // iteration carries all the dependences (so worth peeling). 1757 // 1758 // (see also weakZeroSrcSIVtest) 1759 // 1760 // Return true if dependence disproved. 1761 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1762 const SCEV *SrcConst, 1763 const SCEV *DstConst, 1764 const Loop *CurLoop, 1765 unsigned Level, 1766 FullDependence &Result, 1767 Constraint &NewConstraint) const { 1768 // For the WeakSIV test, it's possible the loop isn't common to the 1769 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1770 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1771 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1772 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1773 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1774 ++WeakZeroSIVapplications; 1775 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1776 Level--; 1777 Result.Consistent = false; 1778 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1779 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1780 Delta, CurLoop); 1781 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1782 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1783 if (Level < CommonLevels) { 1784 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1785 Result.DV[Level].PeelFirst = true; 1786 ++WeakZeroSIVsuccesses; 1787 } 1788 return false; // dependences caused by first iteration 1789 } 1790 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1791 if (!ConstCoeff) 1792 return false; 1793 const SCEV *AbsCoeff = 1794 SE->isKnownNegative(ConstCoeff) ? 1795 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1796 const SCEV *NewDelta = 1797 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1798 1799 // check that Delta/SrcCoeff < iteration count 1800 // really check NewDelta < count*AbsCoeff 1801 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1802 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1803 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1804 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1805 ++WeakZeroSIVindependence; 1806 ++WeakZeroSIVsuccesses; 1807 return true; 1808 } 1809 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1810 // dependences caused by last iteration 1811 if (Level < CommonLevels) { 1812 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1813 Result.DV[Level].PeelLast = true; 1814 ++WeakZeroSIVsuccesses; 1815 } 1816 return false; 1817 } 1818 } 1819 1820 // check that Delta/SrcCoeff >= 0 1821 // really check that NewDelta >= 0 1822 if (SE->isKnownNegative(NewDelta)) { 1823 // No dependence, newDelta < 0 1824 ++WeakZeroSIVindependence; 1825 ++WeakZeroSIVsuccesses; 1826 return true; 1827 } 1828 1829 // if SrcCoeff doesn't divide Delta, then no dependence 1830 if (isa<SCEVConstant>(Delta) && 1831 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1832 ++WeakZeroSIVindependence; 1833 ++WeakZeroSIVsuccesses; 1834 return true; 1835 } 1836 return false; 1837 } 1838 1839 1840 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1841 // Things of the form [c1 + a*i] and [c2 + b*j], 1842 // where i and j are induction variable, c1 and c2 are loop invariant, 1843 // and a and b are constants. 1844 // Returns true if any possible dependence is disproved. 1845 // Marks the result as inconsistent. 1846 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1847 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1848 const SCEV *DstCoeff, 1849 const SCEV *SrcConst, 1850 const SCEV *DstConst, 1851 const Loop *SrcLoop, 1852 const Loop *DstLoop, 1853 FullDependence &Result) const { 1854 DEBUG(dbgs() << "\tExact RDIV test\n"); 1855 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1856 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1857 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1858 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1859 ++ExactRDIVapplications; 1860 Result.Consistent = false; 1861 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1862 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1863 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1864 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1865 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1866 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1867 return false; 1868 1869 // find gcd 1870 APInt G, X, Y; 1871 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1872 APInt BM = ConstDstCoeff->getValue()->getValue(); 1873 unsigned Bits = AM.getBitWidth(); 1874 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1875 // gcd doesn't divide Delta, no dependence 1876 ++ExactRDIVindependence; 1877 return true; 1878 } 1879 1880 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1881 1882 // since SCEV construction seems to normalize, LM = 0 1883 APInt SrcUM(Bits, 1, true); 1884 bool SrcUMvalid = false; 1885 // SrcUM is perhaps unavailable, let's check 1886 if (const SCEVConstant *UpperBound = 1887 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1888 SrcUM = UpperBound->getValue()->getValue(); 1889 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1890 SrcUMvalid = true; 1891 } 1892 1893 APInt DstUM(Bits, 1, true); 1894 bool DstUMvalid = false; 1895 // UM is perhaps unavailable, let's check 1896 if (const SCEVConstant *UpperBound = 1897 collectConstantUpperBound(DstLoop, Delta->getType())) { 1898 DstUM = UpperBound->getValue()->getValue(); 1899 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1900 DstUMvalid = true; 1901 } 1902 1903 APInt TU(APInt::getSignedMaxValue(Bits)); 1904 APInt TL(APInt::getSignedMinValue(Bits)); 1905 1906 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1907 APInt TMUL = BM.sdiv(G); 1908 if (TMUL.sgt(0)) { 1909 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1910 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1911 if (SrcUMvalid) { 1912 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1913 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1914 } 1915 } 1916 else { 1917 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1918 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1919 if (SrcUMvalid) { 1920 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1921 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1922 } 1923 } 1924 1925 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1926 TMUL = AM.sdiv(G); 1927 if (TMUL.sgt(0)) { 1928 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1929 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1930 if (DstUMvalid) { 1931 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1932 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1933 } 1934 } 1935 else { 1936 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1937 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1938 if (DstUMvalid) { 1939 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1940 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1941 } 1942 } 1943 if (TL.sgt(TU)) 1944 ++ExactRDIVindependence; 1945 return TL.sgt(TU); 1946 } 1947 1948 1949 // symbolicRDIVtest - 1950 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1951 // introduce a special case of Banerjee's Inequalities (also called the 1952 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1953 // particularly cases with symbolics. Since it's only able to disprove 1954 // dependence (not compute distances or directions), we'll use it as a 1955 // fall back for the other tests. 1956 // 1957 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1958 // where i and j are induction variables and c1 and c2 are loop invariants, 1959 // we can use the symbolic tests to disprove some dependences, serving as a 1960 // backup for the RDIV test. Note that i and j can be the same variable, 1961 // letting this test serve as a backup for the various SIV tests. 1962 // 1963 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1964 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1965 // loop bounds for the i and j loops, respectively. So, ... 1966 // 1967 // c1 + a1*i = c2 + a2*j 1968 // a1*i - a2*j = c2 - c1 1969 // 1970 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1971 // range of the maximum and minimum possible values of a1*i - a2*j. 1972 // Considering the signs of a1 and a2, we have 4 possible cases: 1973 // 1974 // 1) If a1 >= 0 and a2 >= 0, then 1975 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1976 // -a2*N2 <= c2 - c1 <= a1*N1 1977 // 1978 // 2) If a1 >= 0 and a2 <= 0, then 1979 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1980 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1981 // 1982 // 3) If a1 <= 0 and a2 >= 0, then 1983 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1984 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1985 // 1986 // 4) If a1 <= 0 and a2 <= 0, then 1987 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1988 // a1*N1 <= c2 - c1 <= -a2*N2 1989 // 1990 // return true if dependence disproved 1991 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1992 const SCEV *A2, 1993 const SCEV *C1, 1994 const SCEV *C2, 1995 const Loop *Loop1, 1996 const Loop *Loop2) const { 1997 ++SymbolicRDIVapplications; 1998 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1999 DEBUG(dbgs() << "\t A1 = " << *A1); 2000 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2001 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2002 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2003 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2004 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2005 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2006 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2007 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2008 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2009 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2010 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2011 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2012 if (SE->isKnownNonNegative(A1)) { 2013 if (SE->isKnownNonNegative(A2)) { 2014 // A1 >= 0 && A2 >= 0 2015 if (N1) { 2016 // make sure that c2 - c1 <= a1*N1 2017 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2018 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2019 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2020 ++SymbolicRDIVindependence; 2021 return true; 2022 } 2023 } 2024 if (N2) { 2025 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2026 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2027 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2028 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2029 ++SymbolicRDIVindependence; 2030 return true; 2031 } 2032 } 2033 } 2034 else if (SE->isKnownNonPositive(A2)) { 2035 // a1 >= 0 && a2 <= 0 2036 if (N1 && N2) { 2037 // make sure that c2 - c1 <= a1*N1 - a2*N2 2038 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2039 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2040 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2041 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2042 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2043 ++SymbolicRDIVindependence; 2044 return true; 2045 } 2046 } 2047 // make sure that 0 <= c2 - c1 2048 if (SE->isKnownNegative(C2_C1)) { 2049 ++SymbolicRDIVindependence; 2050 return true; 2051 } 2052 } 2053 } 2054 else if (SE->isKnownNonPositive(A1)) { 2055 if (SE->isKnownNonNegative(A2)) { 2056 // a1 <= 0 && a2 >= 0 2057 if (N1 && N2) { 2058 // make sure that a1*N1 - a2*N2 <= c2 - c1 2059 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2060 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2061 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2062 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2063 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2064 ++SymbolicRDIVindependence; 2065 return true; 2066 } 2067 } 2068 // make sure that c2 - c1 <= 0 2069 if (SE->isKnownPositive(C2_C1)) { 2070 ++SymbolicRDIVindependence; 2071 return true; 2072 } 2073 } 2074 else if (SE->isKnownNonPositive(A2)) { 2075 // a1 <= 0 && a2 <= 0 2076 if (N1) { 2077 // make sure that a1*N1 <= c2 - c1 2078 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2079 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2080 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2081 ++SymbolicRDIVindependence; 2082 return true; 2083 } 2084 } 2085 if (N2) { 2086 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2087 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2088 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2089 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2090 ++SymbolicRDIVindependence; 2091 return true; 2092 } 2093 } 2094 } 2095 } 2096 return false; 2097 } 2098 2099 2100 // testSIV - 2101 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2102 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2103 // a2 are constant, we attack it with an SIV test. While they can all be 2104 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2105 // they apply; they're cheaper and sometimes more precise. 2106 // 2107 // Return true if dependence disproved. 2108 bool DependenceAnalysis::testSIV(const SCEV *Src, 2109 const SCEV *Dst, 2110 unsigned &Level, 2111 FullDependence &Result, 2112 Constraint &NewConstraint, 2113 const SCEV *&SplitIter) const { 2114 DEBUG(dbgs() << " src = " << *Src << "\n"); 2115 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2116 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2117 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2118 if (SrcAddRec && DstAddRec) { 2119 const SCEV *SrcConst = SrcAddRec->getStart(); 2120 const SCEV *DstConst = DstAddRec->getStart(); 2121 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2122 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2123 const Loop *CurLoop = SrcAddRec->getLoop(); 2124 assert(CurLoop == DstAddRec->getLoop() && 2125 "both loops in SIV should be same"); 2126 Level = mapSrcLoop(CurLoop); 2127 bool disproven; 2128 if (SrcCoeff == DstCoeff) 2129 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2130 Level, Result, NewConstraint); 2131 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2132 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2133 Level, Result, NewConstraint, SplitIter); 2134 else 2135 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2136 Level, Result, NewConstraint); 2137 return disproven || 2138 gcdMIVtest(Src, Dst, Result) || 2139 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2140 } 2141 if (SrcAddRec) { 2142 const SCEV *SrcConst = SrcAddRec->getStart(); 2143 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2144 const SCEV *DstConst = Dst; 2145 const Loop *CurLoop = SrcAddRec->getLoop(); 2146 Level = mapSrcLoop(CurLoop); 2147 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2148 Level, Result, NewConstraint) || 2149 gcdMIVtest(Src, Dst, Result); 2150 } 2151 if (DstAddRec) { 2152 const SCEV *DstConst = DstAddRec->getStart(); 2153 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2154 const SCEV *SrcConst = Src; 2155 const Loop *CurLoop = DstAddRec->getLoop(); 2156 Level = mapDstLoop(CurLoop); 2157 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2158 CurLoop, Level, Result, NewConstraint) || 2159 gcdMIVtest(Src, Dst, Result); 2160 } 2161 llvm_unreachable("SIV test expected at least one AddRec"); 2162 return false; 2163 } 2164 2165 2166 // testRDIV - 2167 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2168 // where i and j are induction variables, c1 and c2 are loop invariant, 2169 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2170 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2171 // It doesn't make sense to talk about distance or direction in this case, 2172 // so there's no point in making special versions of the Strong SIV test or 2173 // the Weak-crossing SIV test. 2174 // 2175 // With minor algebra, this test can also be used for things like 2176 // [c1 + a1*i + a2*j][c2]. 2177 // 2178 // Return true if dependence disproved. 2179 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2180 const SCEV *Dst, 2181 FullDependence &Result) const { 2182 // we have 3 possible situations here: 2183 // 1) [a*i + b] and [c*j + d] 2184 // 2) [a*i + c*j + b] and [d] 2185 // 3) [b] and [a*i + c*j + d] 2186 // We need to find what we've got and get organized 2187 2188 const SCEV *SrcConst, *DstConst; 2189 const SCEV *SrcCoeff, *DstCoeff; 2190 const Loop *SrcLoop, *DstLoop; 2191 2192 DEBUG(dbgs() << " src = " << *Src << "\n"); 2193 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2194 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2195 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2196 if (SrcAddRec && DstAddRec) { 2197 SrcConst = SrcAddRec->getStart(); 2198 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2199 SrcLoop = SrcAddRec->getLoop(); 2200 DstConst = DstAddRec->getStart(); 2201 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2202 DstLoop = DstAddRec->getLoop(); 2203 } 2204 else if (SrcAddRec) { 2205 if (const SCEVAddRecExpr *tmpAddRec = 2206 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2207 SrcConst = tmpAddRec->getStart(); 2208 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2209 SrcLoop = tmpAddRec->getLoop(); 2210 DstConst = Dst; 2211 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2212 DstLoop = SrcAddRec->getLoop(); 2213 } 2214 else 2215 llvm_unreachable("RDIV reached by surprising SCEVs"); 2216 } 2217 else if (DstAddRec) { 2218 if (const SCEVAddRecExpr *tmpAddRec = 2219 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2220 DstConst = tmpAddRec->getStart(); 2221 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2222 DstLoop = tmpAddRec->getLoop(); 2223 SrcConst = Src; 2224 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2225 SrcLoop = DstAddRec->getLoop(); 2226 } 2227 else 2228 llvm_unreachable("RDIV reached by surprising SCEVs"); 2229 } 2230 else 2231 llvm_unreachable("RDIV expected at least one AddRec"); 2232 return exactRDIVtest(SrcCoeff, DstCoeff, 2233 SrcConst, DstConst, 2234 SrcLoop, DstLoop, 2235 Result) || 2236 gcdMIVtest(Src, Dst, Result) || 2237 symbolicRDIVtest(SrcCoeff, DstCoeff, 2238 SrcConst, DstConst, 2239 SrcLoop, DstLoop); 2240 } 2241 2242 2243 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2244 // Return true if dependence disproved. 2245 // Can sometimes refine direction vectors. 2246 bool DependenceAnalysis::testMIV(const SCEV *Src, 2247 const SCEV *Dst, 2248 const SmallBitVector &Loops, 2249 FullDependence &Result) const { 2250 DEBUG(dbgs() << " src = " << *Src << "\n"); 2251 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2252 Result.Consistent = false; 2253 return gcdMIVtest(Src, Dst, Result) || 2254 banerjeeMIVtest(Src, Dst, Loops, Result); 2255 } 2256 2257 2258 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2259 // in this case 10. If there is no constant part, returns NULL. 2260 static 2261 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2262 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2263 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2264 return Constant; 2265 } 2266 return nullptr; 2267 } 2268 2269 2270 //===----------------------------------------------------------------------===// 2271 // gcdMIVtest - 2272 // Tests an MIV subscript pair for dependence. 2273 // Returns true if any possible dependence is disproved. 2274 // Marks the result as inconsistent. 2275 // Can sometimes disprove the equal direction for 1 or more loops, 2276 // as discussed in Michael Wolfe's book, 2277 // High Performance Compilers for Parallel Computing, page 235. 2278 // 2279 // We spend some effort (code!) to handle cases like 2280 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2281 // but M and N are just loop-invariant variables. 2282 // This should help us handle linearized subscripts; 2283 // also makes this test a useful backup to the various SIV tests. 2284 // 2285 // It occurs to me that the presence of loop-invariant variables 2286 // changes the nature of the test from "greatest common divisor" 2287 // to "a common divisor". 2288 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2289 const SCEV *Dst, 2290 FullDependence &Result) const { 2291 DEBUG(dbgs() << "starting gcd\n"); 2292 ++GCDapplications; 2293 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2294 APInt RunningGCD = APInt::getNullValue(BitWidth); 2295 2296 // Examine Src coefficients. 2297 // Compute running GCD and record source constant. 2298 // Because we're looking for the constant at the end of the chain, 2299 // we can't quit the loop just because the GCD == 1. 2300 const SCEV *Coefficients = Src; 2301 while (const SCEVAddRecExpr *AddRec = 2302 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2303 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2304 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2305 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2306 // If the coefficient is the product of a constant and other stuff, 2307 // we can use the constant in the GCD computation. 2308 Constant = getConstantPart(Product); 2309 if (!Constant) 2310 return false; 2311 APInt ConstCoeff = Constant->getValue()->getValue(); 2312 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2313 Coefficients = AddRec->getStart(); 2314 } 2315 const SCEV *SrcConst = Coefficients; 2316 2317 // Examine Dst coefficients. 2318 // Compute running GCD and record destination constant. 2319 // Because we're looking for the constant at the end of the chain, 2320 // we can't quit the loop just because the GCD == 1. 2321 Coefficients = Dst; 2322 while (const SCEVAddRecExpr *AddRec = 2323 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2324 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2325 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2326 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2327 // If the coefficient is the product of a constant and other stuff, 2328 // we can use the constant in the GCD computation. 2329 Constant = getConstantPart(Product); 2330 if (!Constant) 2331 return false; 2332 APInt ConstCoeff = Constant->getValue()->getValue(); 2333 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2334 Coefficients = AddRec->getStart(); 2335 } 2336 const SCEV *DstConst = Coefficients; 2337 2338 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2339 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2340 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2341 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2342 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2343 // If Delta is a sum of products, we may be able to make further progress. 2344 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2345 const SCEV *Operand = Sum->getOperand(Op); 2346 if (isa<SCEVConstant>(Operand)) { 2347 assert(!Constant && "Surprised to find multiple constants"); 2348 Constant = cast<SCEVConstant>(Operand); 2349 } 2350 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2351 // Search for constant operand to participate in GCD; 2352 // If none found; return false. 2353 const SCEVConstant *ConstOp = getConstantPart(Product); 2354 if (!ConstOp) 2355 return false; 2356 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2357 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2358 ConstOpValue.abs()); 2359 } 2360 else 2361 return false; 2362 } 2363 } 2364 if (!Constant) 2365 return false; 2366 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2367 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2368 if (ConstDelta == 0) 2369 return false; 2370 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2371 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2372 APInt Remainder = ConstDelta.srem(RunningGCD); 2373 if (Remainder != 0) { 2374 ++GCDindependence; 2375 return true; 2376 } 2377 2378 // Try to disprove equal directions. 2379 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2380 // the code above can't disprove the dependence because the GCD = 1. 2381 // So we consider what happen if i = i' and what happens if j = j'. 2382 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2383 // which is infeasible, so we can disallow the = direction for the i level. 2384 // Setting j = j' doesn't help matters, so we end up with a direction vector 2385 // of [<>, *] 2386 // 2387 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2388 // we need to remember that the constant part is 5 and the RunningGCD should 2389 // be initialized to ExtraGCD = 30. 2390 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2391 2392 bool Improved = false; 2393 Coefficients = Src; 2394 while (const SCEVAddRecExpr *AddRec = 2395 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2396 Coefficients = AddRec->getStart(); 2397 const Loop *CurLoop = AddRec->getLoop(); 2398 RunningGCD = ExtraGCD; 2399 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2400 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2401 const SCEV *Inner = Src; 2402 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2403 AddRec = cast<SCEVAddRecExpr>(Inner); 2404 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2405 if (CurLoop == AddRec->getLoop()) 2406 ; // SrcCoeff == Coeff 2407 else { 2408 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2409 // If the coefficient is the product of a constant and other stuff, 2410 // we can use the constant in the GCD computation. 2411 Constant = getConstantPart(Product); 2412 else 2413 Constant = cast<SCEVConstant>(Coeff); 2414 APInt ConstCoeff = Constant->getValue()->getValue(); 2415 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2416 } 2417 Inner = AddRec->getStart(); 2418 } 2419 Inner = Dst; 2420 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2421 AddRec = cast<SCEVAddRecExpr>(Inner); 2422 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2423 if (CurLoop == AddRec->getLoop()) 2424 DstCoeff = Coeff; 2425 else { 2426 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2427 // If the coefficient is the product of a constant and other stuff, 2428 // we can use the constant in the GCD computation. 2429 Constant = getConstantPart(Product); 2430 else 2431 Constant = cast<SCEVConstant>(Coeff); 2432 APInt ConstCoeff = Constant->getValue()->getValue(); 2433 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2434 } 2435 Inner = AddRec->getStart(); 2436 } 2437 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2438 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2439 // If the coefficient is the product of a constant and other stuff, 2440 // we can use the constant in the GCD computation. 2441 Constant = getConstantPart(Product); 2442 else if (isa<SCEVConstant>(Delta)) 2443 Constant = cast<SCEVConstant>(Delta); 2444 else { 2445 // The difference of the two coefficients might not be a product 2446 // or constant, in which case we give up on this direction. 2447 continue; 2448 } 2449 APInt ConstCoeff = Constant->getValue()->getValue(); 2450 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2451 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2452 if (RunningGCD != 0) { 2453 Remainder = ConstDelta.srem(RunningGCD); 2454 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2455 if (Remainder != 0) { 2456 unsigned Level = mapSrcLoop(CurLoop); 2457 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2458 Improved = true; 2459 } 2460 } 2461 } 2462 if (Improved) 2463 ++GCDsuccesses; 2464 DEBUG(dbgs() << "all done\n"); 2465 return false; 2466 } 2467 2468 2469 //===----------------------------------------------------------------------===// 2470 // banerjeeMIVtest - 2471 // Use Banerjee's Inequalities to test an MIV subscript pair. 2472 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2473 // Generally follows the discussion in Section 2.5.2 of 2474 // 2475 // Optimizing Supercompilers for Supercomputers 2476 // Michael Wolfe 2477 // 2478 // The inequalities given on page 25 are simplified in that loops are 2479 // normalized so that the lower bound is always 0 and the stride is always 1. 2480 // For example, Wolfe gives 2481 // 2482 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2483 // 2484 // where A_k is the coefficient of the kth index in the source subscript, 2485 // B_k is the coefficient of the kth index in the destination subscript, 2486 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2487 // index, and N_k is the stride of the kth index. Since all loops are normalized 2488 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2489 // equation to 2490 // 2491 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2492 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2493 // 2494 // Similar simplifications are possible for the other equations. 2495 // 2496 // When we can't determine the number of iterations for a loop, 2497 // we use NULL as an indicator for the worst case, infinity. 2498 // When computing the upper bound, NULL denotes +inf; 2499 // for the lower bound, NULL denotes -inf. 2500 // 2501 // Return true if dependence disproved. 2502 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2503 const SCEV *Dst, 2504 const SmallBitVector &Loops, 2505 FullDependence &Result) const { 2506 DEBUG(dbgs() << "starting Banerjee\n"); 2507 ++BanerjeeApplications; 2508 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2509 const SCEV *A0; 2510 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2511 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2512 const SCEV *B0; 2513 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2514 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2515 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2516 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2517 2518 // Compute bounds for all the * directions. 2519 DEBUG(dbgs() << "\tBounds[*]\n"); 2520 for (unsigned K = 1; K <= MaxLevels; ++K) { 2521 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2522 Bound[K].Direction = Dependence::DVEntry::ALL; 2523 Bound[K].DirSet = Dependence::DVEntry::NONE; 2524 findBoundsALL(A, B, Bound, K); 2525 #ifndef NDEBUG 2526 DEBUG(dbgs() << "\t " << K << '\t'); 2527 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2528 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2529 else 2530 DEBUG(dbgs() << "-inf\t"); 2531 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2532 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2533 else 2534 DEBUG(dbgs() << "+inf\n"); 2535 #endif 2536 } 2537 2538 // Test the *, *, *, ... case. 2539 bool Disproved = false; 2540 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2541 // Explore the direction vector hierarchy. 2542 unsigned DepthExpanded = 0; 2543 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2544 Loops, DepthExpanded, Delta); 2545 if (NewDeps > 0) { 2546 bool Improved = false; 2547 for (unsigned K = 1; K <= CommonLevels; ++K) { 2548 if (Loops[K]) { 2549 unsigned Old = Result.DV[K - 1].Direction; 2550 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2551 Improved |= Old != Result.DV[K - 1].Direction; 2552 if (!Result.DV[K - 1].Direction) { 2553 Improved = false; 2554 Disproved = true; 2555 break; 2556 } 2557 } 2558 } 2559 if (Improved) 2560 ++BanerjeeSuccesses; 2561 } 2562 else { 2563 ++BanerjeeIndependence; 2564 Disproved = true; 2565 } 2566 } 2567 else { 2568 ++BanerjeeIndependence; 2569 Disproved = true; 2570 } 2571 delete [] Bound; 2572 delete [] A; 2573 delete [] B; 2574 return Disproved; 2575 } 2576 2577 2578 // Hierarchically expands the direction vector 2579 // search space, combining the directions of discovered dependences 2580 // in the DirSet field of Bound. Returns the number of distinct 2581 // dependences discovered. If the dependence is disproved, 2582 // it will return 0. 2583 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2584 CoefficientInfo *A, 2585 CoefficientInfo *B, 2586 BoundInfo *Bound, 2587 const SmallBitVector &Loops, 2588 unsigned &DepthExpanded, 2589 const SCEV *Delta) const { 2590 if (Level > CommonLevels) { 2591 // record result 2592 DEBUG(dbgs() << "\t["); 2593 for (unsigned K = 1; K <= CommonLevels; ++K) { 2594 if (Loops[K]) { 2595 Bound[K].DirSet |= Bound[K].Direction; 2596 #ifndef NDEBUG 2597 switch (Bound[K].Direction) { 2598 case Dependence::DVEntry::LT: 2599 DEBUG(dbgs() << " <"); 2600 break; 2601 case Dependence::DVEntry::EQ: 2602 DEBUG(dbgs() << " ="); 2603 break; 2604 case Dependence::DVEntry::GT: 2605 DEBUG(dbgs() << " >"); 2606 break; 2607 case Dependence::DVEntry::ALL: 2608 DEBUG(dbgs() << " *"); 2609 break; 2610 default: 2611 llvm_unreachable("unexpected Bound[K].Direction"); 2612 } 2613 #endif 2614 } 2615 } 2616 DEBUG(dbgs() << " ]\n"); 2617 return 1; 2618 } 2619 if (Loops[Level]) { 2620 if (Level > DepthExpanded) { 2621 DepthExpanded = Level; 2622 // compute bounds for <, =, > at current level 2623 findBoundsLT(A, B, Bound, Level); 2624 findBoundsGT(A, B, Bound, Level); 2625 findBoundsEQ(A, B, Bound, Level); 2626 #ifndef NDEBUG 2627 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2628 DEBUG(dbgs() << "\t <\t"); 2629 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2630 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2631 else 2632 DEBUG(dbgs() << "-inf\t"); 2633 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2634 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2635 else 2636 DEBUG(dbgs() << "+inf\n"); 2637 DEBUG(dbgs() << "\t =\t"); 2638 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2639 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2640 else 2641 DEBUG(dbgs() << "-inf\t"); 2642 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2643 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2644 else 2645 DEBUG(dbgs() << "+inf\n"); 2646 DEBUG(dbgs() << "\t >\t"); 2647 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2648 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2649 else 2650 DEBUG(dbgs() << "-inf\t"); 2651 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2652 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2653 else 2654 DEBUG(dbgs() << "+inf\n"); 2655 #endif 2656 } 2657 2658 unsigned NewDeps = 0; 2659 2660 // test bounds for <, *, *, ... 2661 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2662 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2663 Loops, DepthExpanded, Delta); 2664 2665 // Test bounds for =, *, *, ... 2666 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2667 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2668 Loops, DepthExpanded, Delta); 2669 2670 // test bounds for >, *, *, ... 2671 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2672 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2673 Loops, DepthExpanded, Delta); 2674 2675 Bound[Level].Direction = Dependence::DVEntry::ALL; 2676 return NewDeps; 2677 } 2678 else 2679 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2680 } 2681 2682 2683 // Returns true iff the current bounds are plausible. 2684 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2685 unsigned Level, 2686 BoundInfo *Bound, 2687 const SCEV *Delta) const { 2688 Bound[Level].Direction = DirKind; 2689 if (const SCEV *LowerBound = getLowerBound(Bound)) 2690 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2691 return false; 2692 if (const SCEV *UpperBound = getUpperBound(Bound)) 2693 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2694 return false; 2695 return true; 2696 } 2697 2698 2699 // Computes the upper and lower bounds for level K 2700 // using the * direction. Records them in Bound. 2701 // Wolfe gives the equations 2702 // 2703 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2704 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2705 // 2706 // Since we normalize loops, we can simplify these equations to 2707 // 2708 // LB^*_k = (A^-_k - B^+_k)U_k 2709 // UB^*_k = (A^+_k - B^-_k)U_k 2710 // 2711 // We must be careful to handle the case where the upper bound is unknown. 2712 // Note that the lower bound is always <= 0 2713 // and the upper bound is always >= 0. 2714 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2715 CoefficientInfo *B, 2716 BoundInfo *Bound, 2717 unsigned K) const { 2718 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2719 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2720 if (Bound[K].Iterations) { 2721 Bound[K].Lower[Dependence::DVEntry::ALL] = 2722 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2723 Bound[K].Iterations); 2724 Bound[K].Upper[Dependence::DVEntry::ALL] = 2725 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2726 Bound[K].Iterations); 2727 } 2728 else { 2729 // If the difference is 0, we won't need to know the number of iterations. 2730 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2731 Bound[K].Lower[Dependence::DVEntry::ALL] = 2732 SE->getConstant(A[K].Coeff->getType(), 0); 2733 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2734 Bound[K].Upper[Dependence::DVEntry::ALL] = 2735 SE->getConstant(A[K].Coeff->getType(), 0); 2736 } 2737 } 2738 2739 2740 // Computes the upper and lower bounds for level K 2741 // using the = direction. Records them in Bound. 2742 // Wolfe gives the equations 2743 // 2744 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2745 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2746 // 2747 // Since we normalize loops, we can simplify these equations to 2748 // 2749 // LB^=_k = (A_k - B_k)^- U_k 2750 // UB^=_k = (A_k - B_k)^+ U_k 2751 // 2752 // We must be careful to handle the case where the upper bound is unknown. 2753 // Note that the lower bound is always <= 0 2754 // and the upper bound is always >= 0. 2755 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2756 CoefficientInfo *B, 2757 BoundInfo *Bound, 2758 unsigned K) const { 2759 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2760 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2761 if (Bound[K].Iterations) { 2762 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2763 const SCEV *NegativePart = getNegativePart(Delta); 2764 Bound[K].Lower[Dependence::DVEntry::EQ] = 2765 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2766 const SCEV *PositivePart = getPositivePart(Delta); 2767 Bound[K].Upper[Dependence::DVEntry::EQ] = 2768 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2769 } 2770 else { 2771 // If the positive/negative part of the difference is 0, 2772 // we won't need to know the number of iterations. 2773 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2774 const SCEV *NegativePart = getNegativePart(Delta); 2775 if (NegativePart->isZero()) 2776 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2777 const SCEV *PositivePart = getPositivePart(Delta); 2778 if (PositivePart->isZero()) 2779 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2780 } 2781 } 2782 2783 2784 // Computes the upper and lower bounds for level K 2785 // using the < direction. Records them in Bound. 2786 // Wolfe gives the equations 2787 // 2788 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2789 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2790 // 2791 // Since we normalize loops, we can simplify these equations to 2792 // 2793 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2794 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2795 // 2796 // We must be careful to handle the case where the upper bound is unknown. 2797 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2798 CoefficientInfo *B, 2799 BoundInfo *Bound, 2800 unsigned K) const { 2801 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2802 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2803 if (Bound[K].Iterations) { 2804 const SCEV *Iter_1 = 2805 SE->getMinusSCEV(Bound[K].Iterations, 2806 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2807 const SCEV *NegPart = 2808 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2809 Bound[K].Lower[Dependence::DVEntry::LT] = 2810 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2811 const SCEV *PosPart = 2812 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2813 Bound[K].Upper[Dependence::DVEntry::LT] = 2814 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2815 } 2816 else { 2817 // If the positive/negative part of the difference is 0, 2818 // we won't need to know the number of iterations. 2819 const SCEV *NegPart = 2820 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2821 if (NegPart->isZero()) 2822 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2823 const SCEV *PosPart = 2824 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2825 if (PosPart->isZero()) 2826 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2827 } 2828 } 2829 2830 2831 // Computes the upper and lower bounds for level K 2832 // using the > direction. Records them in Bound. 2833 // Wolfe gives the equations 2834 // 2835 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2836 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2837 // 2838 // Since we normalize loops, we can simplify these equations to 2839 // 2840 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2841 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2842 // 2843 // We must be careful to handle the case where the upper bound is unknown. 2844 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2845 CoefficientInfo *B, 2846 BoundInfo *Bound, 2847 unsigned K) const { 2848 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2849 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2850 if (Bound[K].Iterations) { 2851 const SCEV *Iter_1 = 2852 SE->getMinusSCEV(Bound[K].Iterations, 2853 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2854 const SCEV *NegPart = 2855 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2856 Bound[K].Lower[Dependence::DVEntry::GT] = 2857 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2858 const SCEV *PosPart = 2859 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2860 Bound[K].Upper[Dependence::DVEntry::GT] = 2861 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2862 } 2863 else { 2864 // If the positive/negative part of the difference is 0, 2865 // we won't need to know the number of iterations. 2866 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2867 if (NegPart->isZero()) 2868 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2869 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2870 if (PosPart->isZero()) 2871 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2872 } 2873 } 2874 2875 2876 // X^+ = max(X, 0) 2877 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2878 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2879 } 2880 2881 2882 // X^- = min(X, 0) 2883 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2884 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2885 } 2886 2887 2888 // Walks through the subscript, 2889 // collecting each coefficient, the associated loop bounds, 2890 // and recording its positive and negative parts for later use. 2891 DependenceAnalysis::CoefficientInfo * 2892 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2893 bool SrcFlag, 2894 const SCEV *&Constant) const { 2895 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2896 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2897 for (unsigned K = 1; K <= MaxLevels; ++K) { 2898 CI[K].Coeff = Zero; 2899 CI[K].PosPart = Zero; 2900 CI[K].NegPart = Zero; 2901 CI[K].Iterations = nullptr; 2902 } 2903 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2904 const Loop *L = AddRec->getLoop(); 2905 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2906 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2907 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2908 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2909 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2910 Subscript = AddRec->getStart(); 2911 } 2912 Constant = Subscript; 2913 #ifndef NDEBUG 2914 DEBUG(dbgs() << "\tCoefficient Info\n"); 2915 for (unsigned K = 1; K <= MaxLevels; ++K) { 2916 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2917 DEBUG(dbgs() << "\tPos Part = "); 2918 DEBUG(dbgs() << *CI[K].PosPart); 2919 DEBUG(dbgs() << "\tNeg Part = "); 2920 DEBUG(dbgs() << *CI[K].NegPart); 2921 DEBUG(dbgs() << "\tUpper Bound = "); 2922 if (CI[K].Iterations) 2923 DEBUG(dbgs() << *CI[K].Iterations); 2924 else 2925 DEBUG(dbgs() << "+inf"); 2926 DEBUG(dbgs() << '\n'); 2927 } 2928 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2929 #endif 2930 return CI; 2931 } 2932 2933 2934 // Looks through all the bounds info and 2935 // computes the lower bound given the current direction settings 2936 // at each level. If the lower bound for any level is -inf, 2937 // the result is -inf. 2938 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2939 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2940 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2941 if (Bound[K].Lower[Bound[K].Direction]) 2942 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2943 else 2944 Sum = nullptr; 2945 } 2946 return Sum; 2947 } 2948 2949 2950 // Looks through all the bounds info and 2951 // computes the upper bound given the current direction settings 2952 // at each level. If the upper bound at any level is +inf, 2953 // the result is +inf. 2954 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2955 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2956 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2957 if (Bound[K].Upper[Bound[K].Direction]) 2958 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2959 else 2960 Sum = nullptr; 2961 } 2962 return Sum; 2963 } 2964 2965 2966 //===----------------------------------------------------------------------===// 2967 // Constraint manipulation for Delta test. 2968 2969 // Given a linear SCEV, 2970 // return the coefficient (the step) 2971 // corresponding to the specified loop. 2972 // If there isn't one, return 0. 2973 // For example, given a*i + b*j + c*k, finding the coefficient 2974 // corresponding to the j loop would yield b. 2975 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2976 const Loop *TargetLoop) const { 2977 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2978 if (!AddRec) 2979 return SE->getConstant(Expr->getType(), 0); 2980 if (AddRec->getLoop() == TargetLoop) 2981 return AddRec->getStepRecurrence(*SE); 2982 return findCoefficient(AddRec->getStart(), TargetLoop); 2983 } 2984 2985 2986 // Given a linear SCEV, 2987 // return the SCEV given by zeroing out the coefficient 2988 // corresponding to the specified loop. 2989 // For example, given a*i + b*j + c*k, zeroing the coefficient 2990 // corresponding to the j loop would yield a*i + c*k. 2991 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2992 const Loop *TargetLoop) const { 2993 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2994 if (!AddRec) 2995 return Expr; // ignore 2996 if (AddRec->getLoop() == TargetLoop) 2997 return AddRec->getStart(); 2998 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2999 AddRec->getStepRecurrence(*SE), 3000 AddRec->getLoop(), 3001 AddRec->getNoWrapFlags()); 3002 } 3003 3004 3005 // Given a linear SCEV Expr, 3006 // return the SCEV given by adding some Value to the 3007 // coefficient corresponding to the specified TargetLoop. 3008 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3009 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3010 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 3011 const Loop *TargetLoop, 3012 const SCEV *Value) const { 3013 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3014 if (!AddRec) // create a new addRec 3015 return SE->getAddRecExpr(Expr, 3016 Value, 3017 TargetLoop, 3018 SCEV::FlagAnyWrap); // Worst case, with no info. 3019 if (AddRec->getLoop() == TargetLoop) { 3020 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3021 if (Sum->isZero()) 3022 return AddRec->getStart(); 3023 return SE->getAddRecExpr(AddRec->getStart(), 3024 Sum, 3025 AddRec->getLoop(), 3026 AddRec->getNoWrapFlags()); 3027 } 3028 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3029 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3030 return SE->getAddRecExpr( 3031 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3032 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3033 AddRec->getNoWrapFlags()); 3034 } 3035 3036 3037 // Review the constraints, looking for opportunities 3038 // to simplify a subscript pair (Src and Dst). 3039 // Return true if some simplification occurs. 3040 // If the simplification isn't exact (that is, if it is conservative 3041 // in terms of dependence), set consistent to false. 3042 // Corresponds to Figure 5 from the paper 3043 // 3044 // Practical Dependence Testing 3045 // Goff, Kennedy, Tseng 3046 // PLDI 1991 3047 bool DependenceAnalysis::propagate(const SCEV *&Src, 3048 const SCEV *&Dst, 3049 SmallBitVector &Loops, 3050 SmallVectorImpl<Constraint> &Constraints, 3051 bool &Consistent) { 3052 bool Result = false; 3053 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3054 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3055 DEBUG(Constraints[LI].dump(dbgs())); 3056 if (Constraints[LI].isDistance()) 3057 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3058 else if (Constraints[LI].isLine()) 3059 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3060 else if (Constraints[LI].isPoint()) 3061 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3062 } 3063 return Result; 3064 } 3065 3066 3067 // Attempt to propagate a distance 3068 // constraint into a subscript pair (Src and Dst). 3069 // Return true if some simplification occurs. 3070 // If the simplification isn't exact (that is, if it is conservative 3071 // in terms of dependence), set consistent to false. 3072 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3073 const SCEV *&Dst, 3074 Constraint &CurConstraint, 3075 bool &Consistent) { 3076 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3077 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3078 const SCEV *A_K = findCoefficient(Src, CurLoop); 3079 if (A_K->isZero()) 3080 return false; 3081 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3082 Src = SE->getMinusSCEV(Src, DA_K); 3083 Src = zeroCoefficient(Src, CurLoop); 3084 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3085 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3086 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3087 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3088 if (!findCoefficient(Dst, CurLoop)->isZero()) 3089 Consistent = false; 3090 return true; 3091 } 3092 3093 3094 // Attempt to propagate a line 3095 // constraint into a subscript pair (Src and Dst). 3096 // Return true if some simplification occurs. 3097 // If the simplification isn't exact (that is, if it is conservative 3098 // in terms of dependence), set consistent to false. 3099 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3100 const SCEV *&Dst, 3101 Constraint &CurConstraint, 3102 bool &Consistent) { 3103 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3104 const SCEV *A = CurConstraint.getA(); 3105 const SCEV *B = CurConstraint.getB(); 3106 const SCEV *C = CurConstraint.getC(); 3107 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3108 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3109 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3110 if (A->isZero()) { 3111 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3112 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3113 if (!Bconst || !Cconst) return false; 3114 APInt Beta = Bconst->getValue()->getValue(); 3115 APInt Charlie = Cconst->getValue()->getValue(); 3116 APInt CdivB = Charlie.sdiv(Beta); 3117 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3118 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3119 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3120 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3121 Dst = zeroCoefficient(Dst, CurLoop); 3122 if (!findCoefficient(Src, CurLoop)->isZero()) 3123 Consistent = false; 3124 } 3125 else if (B->isZero()) { 3126 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3127 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3128 if (!Aconst || !Cconst) return false; 3129 APInt Alpha = Aconst->getValue()->getValue(); 3130 APInt Charlie = Cconst->getValue()->getValue(); 3131 APInt CdivA = Charlie.sdiv(Alpha); 3132 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3133 const SCEV *A_K = findCoefficient(Src, CurLoop); 3134 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3135 Src = zeroCoefficient(Src, CurLoop); 3136 if (!findCoefficient(Dst, CurLoop)->isZero()) 3137 Consistent = false; 3138 } 3139 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3140 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3141 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3142 if (!Aconst || !Cconst) return false; 3143 APInt Alpha = Aconst->getValue()->getValue(); 3144 APInt Charlie = Cconst->getValue()->getValue(); 3145 APInt CdivA = Charlie.sdiv(Alpha); 3146 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3147 const SCEV *A_K = findCoefficient(Src, CurLoop); 3148 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3149 Src = zeroCoefficient(Src, CurLoop); 3150 Dst = addToCoefficient(Dst, CurLoop, A_K); 3151 if (!findCoefficient(Dst, CurLoop)->isZero()) 3152 Consistent = false; 3153 } 3154 else { 3155 // paper is incorrect here, or perhaps just misleading 3156 const SCEV *A_K = findCoefficient(Src, CurLoop); 3157 Src = SE->getMulExpr(Src, A); 3158 Dst = SE->getMulExpr(Dst, A); 3159 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3160 Src = zeroCoefficient(Src, CurLoop); 3161 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3162 if (!findCoefficient(Dst, CurLoop)->isZero()) 3163 Consistent = false; 3164 } 3165 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3166 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3167 return true; 3168 } 3169 3170 3171 // Attempt to propagate a point 3172 // constraint into a subscript pair (Src and Dst). 3173 // Return true if some simplification occurs. 3174 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3175 const SCEV *&Dst, 3176 Constraint &CurConstraint) { 3177 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3178 const SCEV *A_K = findCoefficient(Src, CurLoop); 3179 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3180 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3181 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3182 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3183 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3184 Src = zeroCoefficient(Src, CurLoop); 3185 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3186 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3187 Dst = zeroCoefficient(Dst, CurLoop); 3188 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3189 return true; 3190 } 3191 3192 3193 // Update direction vector entry based on the current constraint. 3194 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3195 const Constraint &CurConstraint 3196 ) const { 3197 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3198 DEBUG(CurConstraint.dump(dbgs())); 3199 if (CurConstraint.isAny()) 3200 ; // use defaults 3201 else if (CurConstraint.isDistance()) { 3202 // this one is consistent, the others aren't 3203 Level.Scalar = false; 3204 Level.Distance = CurConstraint.getD(); 3205 unsigned NewDirection = Dependence::DVEntry::NONE; 3206 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3207 NewDirection = Dependence::DVEntry::EQ; 3208 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3209 NewDirection |= Dependence::DVEntry::LT; 3210 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3211 NewDirection |= Dependence::DVEntry::GT; 3212 Level.Direction &= NewDirection; 3213 } 3214 else if (CurConstraint.isLine()) { 3215 Level.Scalar = false; 3216 Level.Distance = nullptr; 3217 // direction should be accurate 3218 } 3219 else if (CurConstraint.isPoint()) { 3220 Level.Scalar = false; 3221 Level.Distance = nullptr; 3222 unsigned NewDirection = Dependence::DVEntry::NONE; 3223 if (!isKnownPredicate(CmpInst::ICMP_NE, 3224 CurConstraint.getY(), 3225 CurConstraint.getX())) 3226 // if X may be = Y 3227 NewDirection |= Dependence::DVEntry::EQ; 3228 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3229 CurConstraint.getY(), 3230 CurConstraint.getX())) 3231 // if Y may be > X 3232 NewDirection |= Dependence::DVEntry::LT; 3233 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3234 CurConstraint.getY(), 3235 CurConstraint.getX())) 3236 // if Y may be < X 3237 NewDirection |= Dependence::DVEntry::GT; 3238 Level.Direction &= NewDirection; 3239 } 3240 else 3241 llvm_unreachable("constraint has unexpected kind"); 3242 } 3243 3244 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3245 /// source and destination array references are recurrences on a nested loop, 3246 /// this function flattens the nested recurrences into separate recurrences 3247 /// for each loop level. 3248 bool DependenceAnalysis::tryDelinearize(Instruction *Src, 3249 Instruction *Dst, 3250 SmallVectorImpl<Subscript> &Pair) 3251 { 3252 Value *SrcPtr = getPointerOperand(Src); 3253 Value *DstPtr = getPointerOperand(Dst); 3254 3255 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3256 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3257 3258 // Below code mimics the code in Delinearization.cpp 3259 const SCEV *SrcAccessFn = 3260 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3261 const SCEV *DstAccessFn = 3262 SE->getSCEVAtScope(DstPtr, DstLoop); 3263 3264 const SCEVUnknown *SrcBase = 3265 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3266 const SCEVUnknown *DstBase = 3267 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3268 3269 if (!SrcBase || !DstBase || SrcBase != DstBase) 3270 return false; 3271 3272 const SCEV *ElementSize = SE->getElementSize(Src); 3273 if (ElementSize != SE->getElementSize(Dst)) 3274 return false; 3275 3276 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3277 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3278 3279 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3280 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3281 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3282 return false; 3283 3284 // First step: collect parametric terms in both array references. 3285 SmallVector<const SCEV *, 4> Terms; 3286 SE->collectParametricTerms(SrcAR, Terms); 3287 SE->collectParametricTerms(DstAR, Terms); 3288 3289 // Second step: find subscript sizes. 3290 SmallVector<const SCEV *, 4> Sizes; 3291 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3292 3293 // Third step: compute the access functions for each subscript. 3294 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3295 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3296 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3297 3298 // Fail when there is only a subscript: that's a linearized access function. 3299 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3300 SrcSubscripts.size() != DstSubscripts.size()) 3301 return false; 3302 3303 int size = SrcSubscripts.size(); 3304 3305 DEBUG({ 3306 dbgs() << "\nSrcSubscripts: "; 3307 for (int i = 0; i < size; i++) 3308 dbgs() << *SrcSubscripts[i]; 3309 dbgs() << "\nDstSubscripts: "; 3310 for (int i = 0; i < size; i++) 3311 dbgs() << *DstSubscripts[i]; 3312 }); 3313 3314 // The delinearization transforms a single-subscript MIV dependence test into 3315 // a multi-subscript SIV dependence test that is easier to compute. So we 3316 // resize Pair to contain as many pairs of subscripts as the delinearization 3317 // has found, and then initialize the pairs following the delinearization. 3318 Pair.resize(size); 3319 for (int i = 0; i < size; ++i) { 3320 Pair[i].Src = SrcSubscripts[i]; 3321 Pair[i].Dst = DstSubscripts[i]; 3322 unifySubscriptType(&Pair[i]); 3323 3324 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3325 // delinearization has found, and add these constraints to the dependence 3326 // check to avoid memory accesses overflow from one dimension into another. 3327 // This is related to the problem of determining the existence of data 3328 // dependences in array accesses using a different number of subscripts: in 3329 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3330 } 3331 3332 return true; 3333 } 3334 3335 //===----------------------------------------------------------------------===// 3336 3337 #ifndef NDEBUG 3338 // For debugging purposes, dump a small bit vector to dbgs(). 3339 static void dumpSmallBitVector(SmallBitVector &BV) { 3340 dbgs() << "{"; 3341 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3342 dbgs() << VI; 3343 if (BV.find_next(VI) >= 0) 3344 dbgs() << ' '; 3345 } 3346 dbgs() << "}\n"; 3347 } 3348 #endif 3349 3350 // depends - 3351 // Returns NULL if there is no dependence. 3352 // Otherwise, return a Dependence with as many details as possible. 3353 // Corresponds to Section 3.1 in the paper 3354 // 3355 // Practical Dependence Testing 3356 // Goff, Kennedy, Tseng 3357 // PLDI 1991 3358 // 3359 // Care is required to keep the routine below, getSplitIteration(), 3360 // up to date with respect to this routine. 3361 std::unique_ptr<Dependence> 3362 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3363 bool PossiblyLoopIndependent) { 3364 if (Src == Dst) 3365 PossiblyLoopIndependent = false; 3366 3367 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3368 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3369 // if both instructions don't reference memory, there's no dependence 3370 return nullptr; 3371 3372 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3373 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3374 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3375 return make_unique<Dependence>(Src, Dst); 3376 } 3377 3378 Value *SrcPtr = getPointerOperand(Src); 3379 Value *DstPtr = getPointerOperand(Dst); 3380 3381 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3382 SrcPtr)) { 3383 case MayAlias: 3384 case PartialAlias: 3385 // cannot analyse objects if we don't understand their aliasing. 3386 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3387 return make_unique<Dependence>(Src, Dst); 3388 case NoAlias: 3389 // If the objects noalias, they are distinct, accesses are independent. 3390 DEBUG(dbgs() << "no alias\n"); 3391 return nullptr; 3392 case MustAlias: 3393 break; // The underlying objects alias; test accesses for dependence. 3394 } 3395 3396 // establish loop nesting levels 3397 establishNestingLevels(Src, Dst); 3398 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3399 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3400 3401 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3402 ++TotalArrayPairs; 3403 3404 // See if there are GEPs we can use. 3405 bool UsefulGEP = false; 3406 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3407 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3408 if (SrcGEP && DstGEP && 3409 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3410 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3411 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3412 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3413 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3414 3415 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3416 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3417 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3418 } 3419 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3420 SmallVector<Subscript, 4> Pair(Pairs); 3421 if (UsefulGEP) { 3422 DEBUG(dbgs() << " using GEPs\n"); 3423 unsigned P = 0; 3424 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3425 SrcEnd = SrcGEP->idx_end(), 3426 DstIdx = DstGEP->idx_begin(); 3427 SrcIdx != SrcEnd; 3428 ++SrcIdx, ++DstIdx, ++P) { 3429 Pair[P].Src = SE->getSCEV(*SrcIdx); 3430 Pair[P].Dst = SE->getSCEV(*DstIdx); 3431 unifySubscriptType(&Pair[P]); 3432 } 3433 } 3434 else { 3435 DEBUG(dbgs() << " ignoring GEPs\n"); 3436 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3437 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3438 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3439 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3440 Pair[0].Src = SrcSCEV; 3441 Pair[0].Dst = DstSCEV; 3442 } 3443 3444 if (Delinearize && CommonLevels > 1) { 3445 if (tryDelinearize(Src, Dst, Pair)) { 3446 DEBUG(dbgs() << " delinerized GEP\n"); 3447 Pairs = Pair.size(); 3448 } 3449 } 3450 3451 for (unsigned P = 0; P < Pairs; ++P) { 3452 Pair[P].Loops.resize(MaxLevels + 1); 3453 Pair[P].GroupLoops.resize(MaxLevels + 1); 3454 Pair[P].Group.resize(Pairs); 3455 removeMatchingExtensions(&Pair[P]); 3456 Pair[P].Classification = 3457 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3458 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3459 Pair[P].Loops); 3460 Pair[P].GroupLoops = Pair[P].Loops; 3461 Pair[P].Group.set(P); 3462 DEBUG(dbgs() << " subscript " << P << "\n"); 3463 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3464 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3465 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3466 DEBUG(dbgs() << "\tloops = "); 3467 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3468 } 3469 3470 SmallBitVector Separable(Pairs); 3471 SmallBitVector Coupled(Pairs); 3472 3473 // Partition subscripts into separable and minimally-coupled groups 3474 // Algorithm in paper is algorithmically better; 3475 // this may be faster in practice. Check someday. 3476 // 3477 // Here's an example of how it works. Consider this code: 3478 // 3479 // for (i = ...) { 3480 // for (j = ...) { 3481 // for (k = ...) { 3482 // for (l = ...) { 3483 // for (m = ...) { 3484 // A[i][j][k][m] = ...; 3485 // ... = A[0][j][l][i + j]; 3486 // } 3487 // } 3488 // } 3489 // } 3490 // } 3491 // 3492 // There are 4 subscripts here: 3493 // 0 [i] and [0] 3494 // 1 [j] and [j] 3495 // 2 [k] and [l] 3496 // 3 [m] and [i + j] 3497 // 3498 // We've already classified each subscript pair as ZIV, SIV, etc., 3499 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3500 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3501 // and set Pair[P].Group = {P}. 3502 // 3503 // Src Dst Classification Loops GroupLoops Group 3504 // 0 [i] [0] SIV {1} {1} {0} 3505 // 1 [j] [j] SIV {2} {2} {1} 3506 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3507 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3508 // 3509 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3510 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3511 // 3512 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3513 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3514 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3515 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3516 // to either Separable or Coupled). 3517 // 3518 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3519 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3520 // so Pair[3].Group = {0, 1, 3} and Done = false. 3521 // 3522 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3523 // Since Done remains true, we add 2 to the set of Separable pairs. 3524 // 3525 // Finally, we consider 3. There's nothing to compare it with, 3526 // so Done remains true and we add it to the Coupled set. 3527 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3528 // 3529 // In the end, we've got 1 separable subscript and 1 coupled group. 3530 for (unsigned SI = 0; SI < Pairs; ++SI) { 3531 if (Pair[SI].Classification == Subscript::NonLinear) { 3532 // ignore these, but collect loops for later 3533 ++NonlinearSubscriptPairs; 3534 collectCommonLoops(Pair[SI].Src, 3535 LI->getLoopFor(Src->getParent()), 3536 Pair[SI].Loops); 3537 collectCommonLoops(Pair[SI].Dst, 3538 LI->getLoopFor(Dst->getParent()), 3539 Pair[SI].Loops); 3540 Result.Consistent = false; 3541 } else if (Pair[SI].Classification == Subscript::ZIV) { 3542 // always separable 3543 Separable.set(SI); 3544 } 3545 else { 3546 // SIV, RDIV, or MIV, so check for coupled group 3547 bool Done = true; 3548 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3549 SmallBitVector Intersection = Pair[SI].GroupLoops; 3550 Intersection &= Pair[SJ].GroupLoops; 3551 if (Intersection.any()) { 3552 // accumulate set of all the loops in group 3553 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3554 // accumulate set of all subscripts in group 3555 Pair[SJ].Group |= Pair[SI].Group; 3556 Done = false; 3557 } 3558 } 3559 if (Done) { 3560 if (Pair[SI].Group.count() == 1) { 3561 Separable.set(SI); 3562 ++SeparableSubscriptPairs; 3563 } 3564 else { 3565 Coupled.set(SI); 3566 ++CoupledSubscriptPairs; 3567 } 3568 } 3569 } 3570 } 3571 3572 DEBUG(dbgs() << " Separable = "); 3573 DEBUG(dumpSmallBitVector(Separable)); 3574 DEBUG(dbgs() << " Coupled = "); 3575 DEBUG(dumpSmallBitVector(Coupled)); 3576 3577 Constraint NewConstraint; 3578 NewConstraint.setAny(SE); 3579 3580 // test separable subscripts 3581 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3582 DEBUG(dbgs() << "testing subscript " << SI); 3583 switch (Pair[SI].Classification) { 3584 case Subscript::ZIV: 3585 DEBUG(dbgs() << ", ZIV\n"); 3586 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3587 return nullptr; 3588 break; 3589 case Subscript::SIV: { 3590 DEBUG(dbgs() << ", SIV\n"); 3591 unsigned Level; 3592 const SCEV *SplitIter = nullptr; 3593 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3594 SplitIter)) 3595 return nullptr; 3596 break; 3597 } 3598 case Subscript::RDIV: 3599 DEBUG(dbgs() << ", RDIV\n"); 3600 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3601 return nullptr; 3602 break; 3603 case Subscript::MIV: 3604 DEBUG(dbgs() << ", MIV\n"); 3605 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3606 return nullptr; 3607 break; 3608 default: 3609 llvm_unreachable("subscript has unexpected classification"); 3610 } 3611 } 3612 3613 if (Coupled.count()) { 3614 // test coupled subscript groups 3615 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3616 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3617 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3618 for (unsigned II = 0; II <= MaxLevels; ++II) 3619 Constraints[II].setAny(SE); 3620 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3621 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3622 SmallBitVector Group(Pair[SI].Group); 3623 SmallBitVector Sivs(Pairs); 3624 SmallBitVector Mivs(Pairs); 3625 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3626 SmallVector<Subscript *, 4> PairsInGroup; 3627 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3628 DEBUG(dbgs() << SJ << " "); 3629 if (Pair[SJ].Classification == Subscript::SIV) 3630 Sivs.set(SJ); 3631 else 3632 Mivs.set(SJ); 3633 PairsInGroup.push_back(&Pair[SJ]); 3634 } 3635 unifySubscriptType(PairsInGroup); 3636 DEBUG(dbgs() << "}\n"); 3637 while (Sivs.any()) { 3638 bool Changed = false; 3639 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3640 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3641 // SJ is an SIV subscript that's part of the current coupled group 3642 unsigned Level; 3643 const SCEV *SplitIter = nullptr; 3644 DEBUG(dbgs() << "SIV\n"); 3645 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3646 SplitIter)) 3647 return nullptr; 3648 ConstrainedLevels.set(Level); 3649 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3650 if (Constraints[Level].isEmpty()) { 3651 ++DeltaIndependence; 3652 return nullptr; 3653 } 3654 Changed = true; 3655 } 3656 Sivs.reset(SJ); 3657 } 3658 if (Changed) { 3659 // propagate, possibly creating new SIVs and ZIVs 3660 DEBUG(dbgs() << " propagating\n"); 3661 DEBUG(dbgs() << "\tMivs = "); 3662 DEBUG(dumpSmallBitVector(Mivs)); 3663 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3664 // SJ is an MIV subscript that's part of the current coupled group 3665 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3666 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3667 Constraints, Result.Consistent)) { 3668 DEBUG(dbgs() << "\t Changed\n"); 3669 ++DeltaPropagations; 3670 Pair[SJ].Classification = 3671 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3672 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3673 Pair[SJ].Loops); 3674 switch (Pair[SJ].Classification) { 3675 case Subscript::ZIV: 3676 DEBUG(dbgs() << "ZIV\n"); 3677 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3678 return nullptr; 3679 Mivs.reset(SJ); 3680 break; 3681 case Subscript::SIV: 3682 Sivs.set(SJ); 3683 Mivs.reset(SJ); 3684 break; 3685 case Subscript::RDIV: 3686 case Subscript::MIV: 3687 break; 3688 default: 3689 llvm_unreachable("bad subscript classification"); 3690 } 3691 } 3692 } 3693 } 3694 } 3695 3696 // test & propagate remaining RDIVs 3697 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3698 if (Pair[SJ].Classification == Subscript::RDIV) { 3699 DEBUG(dbgs() << "RDIV test\n"); 3700 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3701 return nullptr; 3702 // I don't yet understand how to propagate RDIV results 3703 Mivs.reset(SJ); 3704 } 3705 } 3706 3707 // test remaining MIVs 3708 // This code is temporary. 3709 // Better to somehow test all remaining subscripts simultaneously. 3710 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3711 if (Pair[SJ].Classification == Subscript::MIV) { 3712 DEBUG(dbgs() << "MIV test\n"); 3713 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3714 return nullptr; 3715 } 3716 else 3717 llvm_unreachable("expected only MIV subscripts at this point"); 3718 } 3719 3720 // update Result.DV from constraint vector 3721 DEBUG(dbgs() << " updating\n"); 3722 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3723 SJ = ConstrainedLevels.find_next(SJ)) { 3724 if (SJ > (int)CommonLevels) 3725 break; 3726 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3727 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3728 return nullptr; 3729 } 3730 } 3731 } 3732 3733 // Make sure the Scalar flags are set correctly. 3734 SmallBitVector CompleteLoops(MaxLevels + 1); 3735 for (unsigned SI = 0; SI < Pairs; ++SI) 3736 CompleteLoops |= Pair[SI].Loops; 3737 for (unsigned II = 1; II <= CommonLevels; ++II) 3738 if (CompleteLoops[II]) 3739 Result.DV[II - 1].Scalar = false; 3740 3741 if (PossiblyLoopIndependent) { 3742 // Make sure the LoopIndependent flag is set correctly. 3743 // All directions must include equal, otherwise no 3744 // loop-independent dependence is possible. 3745 for (unsigned II = 1; II <= CommonLevels; ++II) { 3746 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3747 Result.LoopIndependent = false; 3748 break; 3749 } 3750 } 3751 } 3752 else { 3753 // On the other hand, if all directions are equal and there's no 3754 // loop-independent dependence possible, then no dependence exists. 3755 bool AllEqual = true; 3756 for (unsigned II = 1; II <= CommonLevels; ++II) { 3757 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3758 AllEqual = false; 3759 break; 3760 } 3761 } 3762 if (AllEqual) 3763 return nullptr; 3764 } 3765 3766 return make_unique<FullDependence>(std::move(Result)); 3767 } 3768 3769 3770 3771 //===----------------------------------------------------------------------===// 3772 // getSplitIteration - 3773 // Rather than spend rarely-used space recording the splitting iteration 3774 // during the Weak-Crossing SIV test, we re-compute it on demand. 3775 // The re-computation is basically a repeat of the entire dependence test, 3776 // though simplified since we know that the dependence exists. 3777 // It's tedious, since we must go through all propagations, etc. 3778 // 3779 // Care is required to keep this code up to date with respect to the routine 3780 // above, depends(). 3781 // 3782 // Generally, the dependence analyzer will be used to build 3783 // a dependence graph for a function (basically a map from instructions 3784 // to dependences). Looking for cycles in the graph shows us loops 3785 // that cannot be trivially vectorized/parallelized. 3786 // 3787 // We can try to improve the situation by examining all the dependences 3788 // that make up the cycle, looking for ones we can break. 3789 // Sometimes, peeling the first or last iteration of a loop will break 3790 // dependences, and we've got flags for those possibilities. 3791 // Sometimes, splitting a loop at some other iteration will do the trick, 3792 // and we've got a flag for that case. Rather than waste the space to 3793 // record the exact iteration (since we rarely know), we provide 3794 // a method that calculates the iteration. It's a drag that it must work 3795 // from scratch, but wonderful in that it's possible. 3796 // 3797 // Here's an example: 3798 // 3799 // for (i = 0; i < 10; i++) 3800 // A[i] = ... 3801 // ... = A[11 - i] 3802 // 3803 // There's a loop-carried flow dependence from the store to the load, 3804 // found by the weak-crossing SIV test. The dependence will have a flag, 3805 // indicating that the dependence can be broken by splitting the loop. 3806 // Calling getSplitIteration will return 5. 3807 // Splitting the loop breaks the dependence, like so: 3808 // 3809 // for (i = 0; i <= 5; i++) 3810 // A[i] = ... 3811 // ... = A[11 - i] 3812 // for (i = 6; i < 10; i++) 3813 // A[i] = ... 3814 // ... = A[11 - i] 3815 // 3816 // breaks the dependence and allows us to vectorize/parallelize 3817 // both loops. 3818 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3819 unsigned SplitLevel) { 3820 assert(Dep.isSplitable(SplitLevel) && 3821 "Dep should be splitable at SplitLevel"); 3822 Instruction *Src = Dep.getSrc(); 3823 Instruction *Dst = Dep.getDst(); 3824 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3825 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3826 assert(isLoadOrStore(Src)); 3827 assert(isLoadOrStore(Dst)); 3828 Value *SrcPtr = getPointerOperand(Src); 3829 Value *DstPtr = getPointerOperand(Dst); 3830 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3831 SrcPtr) == MustAlias); 3832 3833 // establish loop nesting levels 3834 establishNestingLevels(Src, Dst); 3835 3836 FullDependence Result(Src, Dst, false, CommonLevels); 3837 3838 // See if there are GEPs we can use. 3839 bool UsefulGEP = false; 3840 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3841 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3842 if (SrcGEP && DstGEP && 3843 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3844 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3845 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3846 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3847 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3848 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3849 } 3850 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3851 SmallVector<Subscript, 4> Pair(Pairs); 3852 if (UsefulGEP) { 3853 unsigned P = 0; 3854 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3855 SrcEnd = SrcGEP->idx_end(), 3856 DstIdx = DstGEP->idx_begin(); 3857 SrcIdx != SrcEnd; 3858 ++SrcIdx, ++DstIdx, ++P) { 3859 Pair[P].Src = SE->getSCEV(*SrcIdx); 3860 Pair[P].Dst = SE->getSCEV(*DstIdx); 3861 } 3862 } 3863 else { 3864 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3865 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3866 Pair[0].Src = SrcSCEV; 3867 Pair[0].Dst = DstSCEV; 3868 } 3869 3870 if (Delinearize && CommonLevels > 1) { 3871 if (tryDelinearize(Src, Dst, Pair)) { 3872 DEBUG(dbgs() << " delinerized GEP\n"); 3873 Pairs = Pair.size(); 3874 } 3875 } 3876 3877 for (unsigned P = 0; P < Pairs; ++P) { 3878 Pair[P].Loops.resize(MaxLevels + 1); 3879 Pair[P].GroupLoops.resize(MaxLevels + 1); 3880 Pair[P].Group.resize(Pairs); 3881 removeMatchingExtensions(&Pair[P]); 3882 Pair[P].Classification = 3883 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3884 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3885 Pair[P].Loops); 3886 Pair[P].GroupLoops = Pair[P].Loops; 3887 Pair[P].Group.set(P); 3888 } 3889 3890 SmallBitVector Separable(Pairs); 3891 SmallBitVector Coupled(Pairs); 3892 3893 // partition subscripts into separable and minimally-coupled groups 3894 for (unsigned SI = 0; SI < Pairs; ++SI) { 3895 if (Pair[SI].Classification == Subscript::NonLinear) { 3896 // ignore these, but collect loops for later 3897 collectCommonLoops(Pair[SI].Src, 3898 LI->getLoopFor(Src->getParent()), 3899 Pair[SI].Loops); 3900 collectCommonLoops(Pair[SI].Dst, 3901 LI->getLoopFor(Dst->getParent()), 3902 Pair[SI].Loops); 3903 Result.Consistent = false; 3904 } 3905 else if (Pair[SI].Classification == Subscript::ZIV) 3906 Separable.set(SI); 3907 else { 3908 // SIV, RDIV, or MIV, so check for coupled group 3909 bool Done = true; 3910 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3911 SmallBitVector Intersection = Pair[SI].GroupLoops; 3912 Intersection &= Pair[SJ].GroupLoops; 3913 if (Intersection.any()) { 3914 // accumulate set of all the loops in group 3915 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3916 // accumulate set of all subscripts in group 3917 Pair[SJ].Group |= Pair[SI].Group; 3918 Done = false; 3919 } 3920 } 3921 if (Done) { 3922 if (Pair[SI].Group.count() == 1) 3923 Separable.set(SI); 3924 else 3925 Coupled.set(SI); 3926 } 3927 } 3928 } 3929 3930 Constraint NewConstraint; 3931 NewConstraint.setAny(SE); 3932 3933 // test separable subscripts 3934 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3935 switch (Pair[SI].Classification) { 3936 case Subscript::SIV: { 3937 unsigned Level; 3938 const SCEV *SplitIter = nullptr; 3939 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3940 Result, NewConstraint, SplitIter); 3941 if (Level == SplitLevel) { 3942 assert(SplitIter != nullptr); 3943 return SplitIter; 3944 } 3945 break; 3946 } 3947 case Subscript::ZIV: 3948 case Subscript::RDIV: 3949 case Subscript::MIV: 3950 break; 3951 default: 3952 llvm_unreachable("subscript has unexpected classification"); 3953 } 3954 } 3955 3956 if (Coupled.count()) { 3957 // test coupled subscript groups 3958 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3959 for (unsigned II = 0; II <= MaxLevels; ++II) 3960 Constraints[II].setAny(SE); 3961 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3962 SmallBitVector Group(Pair[SI].Group); 3963 SmallBitVector Sivs(Pairs); 3964 SmallBitVector Mivs(Pairs); 3965 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3966 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3967 if (Pair[SJ].Classification == Subscript::SIV) 3968 Sivs.set(SJ); 3969 else 3970 Mivs.set(SJ); 3971 } 3972 while (Sivs.any()) { 3973 bool Changed = false; 3974 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3975 // SJ is an SIV subscript that's part of the current coupled group 3976 unsigned Level; 3977 const SCEV *SplitIter = nullptr; 3978 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3979 Result, NewConstraint, SplitIter); 3980 if (Level == SplitLevel && SplitIter) 3981 return SplitIter; 3982 ConstrainedLevels.set(Level); 3983 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3984 Changed = true; 3985 Sivs.reset(SJ); 3986 } 3987 if (Changed) { 3988 // propagate, possibly creating new SIVs and ZIVs 3989 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3990 // SJ is an MIV subscript that's part of the current coupled group 3991 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3992 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3993 Pair[SJ].Classification = 3994 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3995 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3996 Pair[SJ].Loops); 3997 switch (Pair[SJ].Classification) { 3998 case Subscript::ZIV: 3999 Mivs.reset(SJ); 4000 break; 4001 case Subscript::SIV: 4002 Sivs.set(SJ); 4003 Mivs.reset(SJ); 4004 break; 4005 case Subscript::RDIV: 4006 case Subscript::MIV: 4007 break; 4008 default: 4009 llvm_unreachable("bad subscript classification"); 4010 } 4011 } 4012 } 4013 } 4014 } 4015 } 4016 } 4017 llvm_unreachable("somehow reached end of routine"); 4018 return nullptr; 4019 } 4020