1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 113 //===----------------------------------------------------------------------===// 114 // basics 115 116 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 117 "Dependence Analysis", true, true) 118 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 119 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 120 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 121 INITIALIZE_PASS_END(DependenceAnalysis, "da", 122 "Dependence Analysis", true, true) 123 124 char DependenceAnalysis::ID = 0; 125 126 127 FunctionPass *llvm::createDependenceAnalysisPass() { 128 return new DependenceAnalysis(); 129 } 130 131 132 bool DependenceAnalysis::runOnFunction(Function &F) { 133 this->F = &F; 134 AA = &getAnalysis<AliasAnalysis>(); 135 SE = &getAnalysis<ScalarEvolution>(); 136 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 137 return false; 138 } 139 140 141 void DependenceAnalysis::releaseMemory() { 142 } 143 144 145 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 146 AU.setPreservesAll(); 147 AU.addRequiredTransitive<AliasAnalysis>(); 148 AU.addRequiredTransitive<ScalarEvolution>(); 149 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 150 } 151 152 153 // Used to test the dependence analyzer. 154 // Looks through the function, noting loads and stores. 155 // Calls depends() on every possible pair and prints out the result. 156 // Ignores all other instructions. 157 static 158 void dumpExampleDependence(raw_ostream &OS, Function *F, 159 DependenceAnalysis *DA) { 160 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 161 SrcI != SrcE; ++SrcI) { 162 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 163 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 164 DstI != DstE; ++DstI) { 165 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 166 OS << "da analyze - "; 167 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 168 D->dump(OS); 169 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 170 if (D->isSplitable(Level)) { 171 OS << "da analyze - split level = " << Level; 172 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 173 OS << "!\n"; 174 } 175 } 176 } 177 else 178 OS << "none!\n"; 179 } 180 } 181 } 182 } 183 } 184 185 186 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 187 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 188 } 189 190 //===----------------------------------------------------------------------===// 191 // Dependence methods 192 193 // Returns true if this is an input dependence. 194 bool Dependence::isInput() const { 195 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 196 } 197 198 199 // Returns true if this is an output dependence. 200 bool Dependence::isOutput() const { 201 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 202 } 203 204 205 // Returns true if this is an flow (aka true) dependence. 206 bool Dependence::isFlow() const { 207 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 208 } 209 210 211 // Returns true if this is an anti dependence. 212 bool Dependence::isAnti() const { 213 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 214 } 215 216 217 // Returns true if a particular level is scalar; that is, 218 // if no subscript in the source or destination mention the induction 219 // variable associated with the loop at this level. 220 // Leave this out of line, so it will serve as a virtual method anchor 221 bool Dependence::isScalar(unsigned level) const { 222 return false; 223 } 224 225 226 //===----------------------------------------------------------------------===// 227 // FullDependence methods 228 229 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 230 bool PossiblyLoopIndependent, 231 unsigned CommonLevels) 232 : Dependence(Source, Destination), Levels(CommonLevels), 233 LoopIndependent(PossiblyLoopIndependent), Consistent(true), 234 DV(CommonLevels ? new DVEntry[CommonLevels] : nullptr) {} 235 236 // The rest are simple getters that hide the implementation. 237 238 // getDirection - Returns the direction associated with a particular level. 239 unsigned FullDependence::getDirection(unsigned Level) const { 240 assert(0 < Level && Level <= Levels && "Level out of range"); 241 return DV[Level - 1].Direction; 242 } 243 244 245 // Returns the distance (or NULL) associated with a particular level. 246 const SCEV *FullDependence::getDistance(unsigned Level) const { 247 assert(0 < Level && Level <= Levels && "Level out of range"); 248 return DV[Level - 1].Distance; 249 } 250 251 252 // Returns true if a particular level is scalar; that is, 253 // if no subscript in the source or destination mention the induction 254 // variable associated with the loop at this level. 255 bool FullDependence::isScalar(unsigned Level) const { 256 assert(0 < Level && Level <= Levels && "Level out of range"); 257 return DV[Level - 1].Scalar; 258 } 259 260 261 // Returns true if peeling the first iteration from this loop 262 // will break this dependence. 263 bool FullDependence::isPeelFirst(unsigned Level) const { 264 assert(0 < Level && Level <= Levels && "Level out of range"); 265 return DV[Level - 1].PeelFirst; 266 } 267 268 269 // Returns true if peeling the last iteration from this loop 270 // will break this dependence. 271 bool FullDependence::isPeelLast(unsigned Level) const { 272 assert(0 < Level && Level <= Levels && "Level out of range"); 273 return DV[Level - 1].PeelLast; 274 } 275 276 277 // Returns true if splitting this loop will break the dependence. 278 bool FullDependence::isSplitable(unsigned Level) const { 279 assert(0 < Level && Level <= Levels && "Level out of range"); 280 return DV[Level - 1].Splitable; 281 } 282 283 284 //===----------------------------------------------------------------------===// 285 // DependenceAnalysis::Constraint methods 286 287 // If constraint is a point <X, Y>, returns X. 288 // Otherwise assert. 289 const SCEV *DependenceAnalysis::Constraint::getX() const { 290 assert(Kind == Point && "Kind should be Point"); 291 return A; 292 } 293 294 295 // If constraint is a point <X, Y>, returns Y. 296 // Otherwise assert. 297 const SCEV *DependenceAnalysis::Constraint::getY() const { 298 assert(Kind == Point && "Kind should be Point"); 299 return B; 300 } 301 302 303 // If constraint is a line AX + BY = C, returns A. 304 // Otherwise assert. 305 const SCEV *DependenceAnalysis::Constraint::getA() const { 306 assert((Kind == Line || Kind == Distance) && 307 "Kind should be Line (or Distance)"); 308 return A; 309 } 310 311 312 // If constraint is a line AX + BY = C, returns B. 313 // Otherwise assert. 314 const SCEV *DependenceAnalysis::Constraint::getB() const { 315 assert((Kind == Line || Kind == Distance) && 316 "Kind should be Line (or Distance)"); 317 return B; 318 } 319 320 321 // If constraint is a line AX + BY = C, returns C. 322 // Otherwise assert. 323 const SCEV *DependenceAnalysis::Constraint::getC() const { 324 assert((Kind == Line || Kind == Distance) && 325 "Kind should be Line (or Distance)"); 326 return C; 327 } 328 329 330 // If constraint is a distance, returns D. 331 // Otherwise assert. 332 const SCEV *DependenceAnalysis::Constraint::getD() const { 333 assert(Kind == Distance && "Kind should be Distance"); 334 return SE->getNegativeSCEV(C); 335 } 336 337 338 // Returns the loop associated with this constraint. 339 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 340 assert((Kind == Distance || Kind == Line || Kind == Point) && 341 "Kind should be Distance, Line, or Point"); 342 return AssociatedLoop; 343 } 344 345 346 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 347 const SCEV *Y, 348 const Loop *CurLoop) { 349 Kind = Point; 350 A = X; 351 B = Y; 352 AssociatedLoop = CurLoop; 353 } 354 355 356 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 357 const SCEV *BB, 358 const SCEV *CC, 359 const Loop *CurLoop) { 360 Kind = Line; 361 A = AA; 362 B = BB; 363 C = CC; 364 AssociatedLoop = CurLoop; 365 } 366 367 368 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 369 const Loop *CurLoop) { 370 Kind = Distance; 371 A = SE->getConstant(D->getType(), 1); 372 B = SE->getNegativeSCEV(A); 373 C = SE->getNegativeSCEV(D); 374 AssociatedLoop = CurLoop; 375 } 376 377 378 void DependenceAnalysis::Constraint::setEmpty() { 379 Kind = Empty; 380 } 381 382 383 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 384 SE = NewSE; 385 Kind = Any; 386 } 387 388 389 // For debugging purposes. Dumps the constraint out to OS. 390 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 391 if (isEmpty()) 392 OS << " Empty\n"; 393 else if (isAny()) 394 OS << " Any\n"; 395 else if (isPoint()) 396 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 397 else if (isDistance()) 398 OS << " Distance is " << *getD() << 399 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 400 else if (isLine()) 401 OS << " Line is " << *getA() << "*X + " << 402 *getB() << "*Y = " << *getC() << "\n"; 403 else 404 llvm_unreachable("unknown constraint type in Constraint::dump"); 405 } 406 407 408 // Updates X with the intersection 409 // of the Constraints X and Y. Returns true if X has changed. 410 // Corresponds to Figure 4 from the paper 411 // 412 // Practical Dependence Testing 413 // Goff, Kennedy, Tseng 414 // PLDI 1991 415 bool DependenceAnalysis::intersectConstraints(Constraint *X, 416 const Constraint *Y) { 417 ++DeltaApplications; 418 DEBUG(dbgs() << "\tintersect constraints\n"); 419 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 420 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 421 assert(!Y->isPoint() && "Y must not be a Point"); 422 if (X->isAny()) { 423 if (Y->isAny()) 424 return false; 425 *X = *Y; 426 return true; 427 } 428 if (X->isEmpty()) 429 return false; 430 if (Y->isEmpty()) { 431 X->setEmpty(); 432 return true; 433 } 434 435 if (X->isDistance() && Y->isDistance()) { 436 DEBUG(dbgs() << "\t intersect 2 distances\n"); 437 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 438 return false; 439 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 440 X->setEmpty(); 441 ++DeltaSuccesses; 442 return true; 443 } 444 // Hmmm, interesting situation. 445 // I guess if either is constant, keep it and ignore the other. 446 if (isa<SCEVConstant>(Y->getD())) { 447 *X = *Y; 448 return true; 449 } 450 return false; 451 } 452 453 // At this point, the pseudo-code in Figure 4 of the paper 454 // checks if (X->isPoint() && Y->isPoint()). 455 // This case can't occur in our implementation, 456 // since a Point can only arise as the result of intersecting 457 // two Line constraints, and the right-hand value, Y, is never 458 // the result of an intersection. 459 assert(!(X->isPoint() && Y->isPoint()) && 460 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 461 462 if (X->isLine() && Y->isLine()) { 463 DEBUG(dbgs() << "\t intersect 2 lines\n"); 464 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 465 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 466 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 467 // slopes are equal, so lines are parallel 468 DEBUG(dbgs() << "\t\tsame slope\n"); 469 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 470 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 471 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 472 return false; 473 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 474 X->setEmpty(); 475 ++DeltaSuccesses; 476 return true; 477 } 478 return false; 479 } 480 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 481 // slopes differ, so lines intersect 482 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 483 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 484 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 485 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 486 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 487 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 488 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 489 const SCEVConstant *C1A2_C2A1 = 490 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 491 const SCEVConstant *C1B2_C2B1 = 492 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 493 const SCEVConstant *A1B2_A2B1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 495 const SCEVConstant *A2B1_A1B2 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 497 if (!C1B2_C2B1 || !C1A2_C2A1 || 498 !A1B2_A2B1 || !A2B1_A1B2) 499 return false; 500 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 501 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 502 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 503 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 504 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 505 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 506 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 507 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 508 APInt Xq = Xtop; // these need to be initialized, even 509 APInt Xr = Xtop; // though they're just going to be overwritten 510 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 511 APInt Yq = Ytop; 512 APInt Yr = Ytop; 513 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 514 if (Xr != 0 || Yr != 0) { 515 X->setEmpty(); 516 ++DeltaSuccesses; 517 return true; 518 } 519 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 520 if (Xq.slt(0) || Yq.slt(0)) { 521 X->setEmpty(); 522 ++DeltaSuccesses; 523 return true; 524 } 525 if (const SCEVConstant *CUB = 526 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 527 APInt UpperBound = CUB->getValue()->getValue(); 528 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 529 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 530 X->setEmpty(); 531 ++DeltaSuccesses; 532 return true; 533 } 534 } 535 X->setPoint(SE->getConstant(Xq), 536 SE->getConstant(Yq), 537 X->getAssociatedLoop()); 538 ++DeltaSuccesses; 539 return true; 540 } 541 return false; 542 } 543 544 // if (X->isLine() && Y->isPoint()) This case can't occur. 545 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 546 547 if (X->isPoint() && Y->isLine()) { 548 DEBUG(dbgs() << "\t intersect Point and Line\n"); 549 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 550 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 551 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 552 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 553 return false; 554 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 555 X->setEmpty(); 556 ++DeltaSuccesses; 557 return true; 558 } 559 return false; 560 } 561 562 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 563 return false; 564 } 565 566 567 //===----------------------------------------------------------------------===// 568 // DependenceAnalysis methods 569 570 // For debugging purposes. Dumps a dependence to OS. 571 void Dependence::dump(raw_ostream &OS) const { 572 bool Splitable = false; 573 if (isConfused()) 574 OS << "confused"; 575 else { 576 if (isConsistent()) 577 OS << "consistent "; 578 if (isFlow()) 579 OS << "flow"; 580 else if (isOutput()) 581 OS << "output"; 582 else if (isAnti()) 583 OS << "anti"; 584 else if (isInput()) 585 OS << "input"; 586 unsigned Levels = getLevels(); 587 OS << " ["; 588 for (unsigned II = 1; II <= Levels; ++II) { 589 if (isSplitable(II)) 590 Splitable = true; 591 if (isPeelFirst(II)) 592 OS << 'p'; 593 const SCEV *Distance = getDistance(II); 594 if (Distance) 595 OS << *Distance; 596 else if (isScalar(II)) 597 OS << "S"; 598 else { 599 unsigned Direction = getDirection(II); 600 if (Direction == DVEntry::ALL) 601 OS << "*"; 602 else { 603 if (Direction & DVEntry::LT) 604 OS << "<"; 605 if (Direction & DVEntry::EQ) 606 OS << "="; 607 if (Direction & DVEntry::GT) 608 OS << ">"; 609 } 610 } 611 if (isPeelLast(II)) 612 OS << 'p'; 613 if (II < Levels) 614 OS << " "; 615 } 616 if (isLoopIndependent()) 617 OS << "|<"; 618 OS << "]"; 619 if (Splitable) 620 OS << " splitable"; 621 } 622 OS << "!\n"; 623 } 624 625 626 627 static 628 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 629 const Value *A, 630 const Value *B) { 631 const Value *AObj = GetUnderlyingObject(A); 632 const Value *BObj = GetUnderlyingObject(B); 633 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 634 BObj, AA->getTypeStoreSize(BObj->getType())); 635 } 636 637 638 // Returns true if the load or store can be analyzed. Atomic and volatile 639 // operations have properties which this analysis does not understand. 640 static 641 bool isLoadOrStore(const Instruction *I) { 642 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 643 return LI->isUnordered(); 644 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 645 return SI->isUnordered(); 646 return false; 647 } 648 649 650 static 651 Value *getPointerOperand(Instruction *I) { 652 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 653 return LI->getPointerOperand(); 654 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 655 return SI->getPointerOperand(); 656 llvm_unreachable("Value is not load or store instruction"); 657 return nullptr; 658 } 659 660 661 // Examines the loop nesting of the Src and Dst 662 // instructions and establishes their shared loops. Sets the variables 663 // CommonLevels, SrcLevels, and MaxLevels. 664 // The source and destination instructions needn't be contained in the same 665 // loop. The routine establishNestingLevels finds the level of most deeply 666 // nested loop that contains them both, CommonLevels. An instruction that's 667 // not contained in a loop is at level = 0. MaxLevels is equal to the level 668 // of the source plus the level of the destination, minus CommonLevels. 669 // This lets us allocate vectors MaxLevels in length, with room for every 670 // distinct loop referenced in both the source and destination subscripts. 671 // The variable SrcLevels is the nesting depth of the source instruction. 672 // It's used to help calculate distinct loops referenced by the destination. 673 // Here's the map from loops to levels: 674 // 0 - unused 675 // 1 - outermost common loop 676 // ... - other common loops 677 // CommonLevels - innermost common loop 678 // ... - loops containing Src but not Dst 679 // SrcLevels - innermost loop containing Src but not Dst 680 // ... - loops containing Dst but not Src 681 // MaxLevels - innermost loops containing Dst but not Src 682 // Consider the follow code fragment: 683 // for (a = ...) { 684 // for (b = ...) { 685 // for (c = ...) { 686 // for (d = ...) { 687 // A[] = ...; 688 // } 689 // } 690 // for (e = ...) { 691 // for (f = ...) { 692 // for (g = ...) { 693 // ... = A[]; 694 // } 695 // } 696 // } 697 // } 698 // } 699 // If we're looking at the possibility of a dependence between the store 700 // to A (the Src) and the load from A (the Dst), we'll note that they 701 // have 2 loops in common, so CommonLevels will equal 2 and the direction 702 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 703 // A map from loop names to loop numbers would look like 704 // a - 1 705 // b - 2 = CommonLevels 706 // c - 3 707 // d - 4 = SrcLevels 708 // e - 5 709 // f - 6 710 // g - 7 = MaxLevels 711 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 712 const Instruction *Dst) { 713 const BasicBlock *SrcBlock = Src->getParent(); 714 const BasicBlock *DstBlock = Dst->getParent(); 715 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 716 unsigned DstLevel = LI->getLoopDepth(DstBlock); 717 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 718 const Loop *DstLoop = LI->getLoopFor(DstBlock); 719 SrcLevels = SrcLevel; 720 MaxLevels = SrcLevel + DstLevel; 721 while (SrcLevel > DstLevel) { 722 SrcLoop = SrcLoop->getParentLoop(); 723 SrcLevel--; 724 } 725 while (DstLevel > SrcLevel) { 726 DstLoop = DstLoop->getParentLoop(); 727 DstLevel--; 728 } 729 while (SrcLoop != DstLoop) { 730 SrcLoop = SrcLoop->getParentLoop(); 731 DstLoop = DstLoop->getParentLoop(); 732 SrcLevel--; 733 } 734 CommonLevels = SrcLevel; 735 MaxLevels -= CommonLevels; 736 } 737 738 739 // Given one of the loops containing the source, return 740 // its level index in our numbering scheme. 741 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 742 return SrcLoop->getLoopDepth(); 743 } 744 745 746 // Given one of the loops containing the destination, 747 // return its level index in our numbering scheme. 748 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 749 unsigned D = DstLoop->getLoopDepth(); 750 if (D > CommonLevels) 751 return D - CommonLevels + SrcLevels; 752 else 753 return D; 754 } 755 756 757 // Returns true if Expression is loop invariant in LoopNest. 758 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 759 const Loop *LoopNest) const { 760 if (!LoopNest) 761 return true; 762 return SE->isLoopInvariant(Expression, LoopNest) && 763 isLoopInvariant(Expression, LoopNest->getParentLoop()); 764 } 765 766 767 768 // Finds the set of loops from the LoopNest that 769 // have a level <= CommonLevels and are referred to by the SCEV Expression. 770 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 771 const Loop *LoopNest, 772 SmallBitVector &Loops) const { 773 while (LoopNest) { 774 unsigned Level = LoopNest->getLoopDepth(); 775 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 776 Loops.set(Level); 777 LoopNest = LoopNest->getParentLoop(); 778 } 779 } 780 781 void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { 782 const SCEV *Src = Pair->Src; 783 const SCEV *Dst = Pair->Dst; 784 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 785 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 786 if (SrcTy == nullptr || DstTy == nullptr) { 787 assert(SrcTy == DstTy && "This function only unify integer types and " 788 "expect Src and Dst share the same type " 789 "otherwise."); 790 return; 791 } 792 if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { 793 // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). 794 Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); 795 } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { 796 // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). 797 Pair->Src = SE->getSignExtendExpr(Src, DstTy); 798 } 799 } 800 801 // removeMatchingExtensions - Examines a subscript pair. 802 // If the source and destination are identically sign (or zero) 803 // extended, it strips off the extension in an effect to simplify 804 // the actual analysis. 805 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 806 const SCEV *Src = Pair->Src; 807 const SCEV *Dst = Pair->Dst; 808 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 809 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 810 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 811 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 812 const SCEV *SrcCastOp = SrcCast->getOperand(); 813 const SCEV *DstCastOp = DstCast->getOperand(); 814 if (SrcCastOp->getType() == DstCastOp->getType()) { 815 Pair->Src = SrcCastOp; 816 Pair->Dst = DstCastOp; 817 } 818 } 819 } 820 821 822 // Examine the scev and return true iff it's linear. 823 // Collect any loops mentioned in the set of "Loops". 824 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 825 const Loop *LoopNest, 826 SmallBitVector &Loops) { 827 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 828 if (!AddRec) 829 return isLoopInvariant(Src, LoopNest); 830 const SCEV *Start = AddRec->getStart(); 831 const SCEV *Step = AddRec->getStepRecurrence(*SE); 832 if (!isLoopInvariant(Step, LoopNest)) 833 return false; 834 Loops.set(mapSrcLoop(AddRec->getLoop())); 835 return checkSrcSubscript(Start, LoopNest, Loops); 836 } 837 838 839 840 // Examine the scev and return true iff it's linear. 841 // Collect any loops mentioned in the set of "Loops". 842 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 843 const Loop *LoopNest, 844 SmallBitVector &Loops) { 845 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 846 if (!AddRec) 847 return isLoopInvariant(Dst, LoopNest); 848 const SCEV *Start = AddRec->getStart(); 849 const SCEV *Step = AddRec->getStepRecurrence(*SE); 850 if (!isLoopInvariant(Step, LoopNest)) 851 return false; 852 Loops.set(mapDstLoop(AddRec->getLoop())); 853 return checkDstSubscript(Start, LoopNest, Loops); 854 } 855 856 857 // Examines the subscript pair (the Src and Dst SCEVs) 858 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 859 // Collects the associated loops in a set. 860 DependenceAnalysis::Subscript::ClassificationKind 861 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 862 const SCEV *Dst, const Loop *DstLoopNest, 863 SmallBitVector &Loops) { 864 SmallBitVector SrcLoops(MaxLevels + 1); 865 SmallBitVector DstLoops(MaxLevels + 1); 866 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 867 return Subscript::NonLinear; 868 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 869 return Subscript::NonLinear; 870 Loops = SrcLoops; 871 Loops |= DstLoops; 872 unsigned N = Loops.count(); 873 if (N == 0) 874 return Subscript::ZIV; 875 if (N == 1) 876 return Subscript::SIV; 877 if (N == 2 && (SrcLoops.count() == 0 || 878 DstLoops.count() == 0 || 879 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 880 return Subscript::RDIV; 881 return Subscript::MIV; 882 } 883 884 885 // A wrapper around SCEV::isKnownPredicate. 886 // Looks for cases where we're interested in comparing for equality. 887 // If both X and Y have been identically sign or zero extended, 888 // it strips off the (confusing) extensions before invoking 889 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 890 // will be similarly updated. 891 // 892 // If SCEV::isKnownPredicate can't prove the predicate, 893 // we try simple subtraction, which seems to help in some cases 894 // involving symbolics. 895 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 896 const SCEV *X, 897 const SCEV *Y) const { 898 if (Pred == CmpInst::ICMP_EQ || 899 Pred == CmpInst::ICMP_NE) { 900 if ((isa<SCEVSignExtendExpr>(X) && 901 isa<SCEVSignExtendExpr>(Y)) || 902 (isa<SCEVZeroExtendExpr>(X) && 903 isa<SCEVZeroExtendExpr>(Y))) { 904 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 905 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 906 const SCEV *Xop = CX->getOperand(); 907 const SCEV *Yop = CY->getOperand(); 908 if (Xop->getType() == Yop->getType()) { 909 X = Xop; 910 Y = Yop; 911 } 912 } 913 } 914 if (SE->isKnownPredicate(Pred, X, Y)) 915 return true; 916 // If SE->isKnownPredicate can't prove the condition, 917 // we try the brute-force approach of subtracting 918 // and testing the difference. 919 // By testing with SE->isKnownPredicate first, we avoid 920 // the possibility of overflow when the arguments are constants. 921 const SCEV *Delta = SE->getMinusSCEV(X, Y); 922 switch (Pred) { 923 case CmpInst::ICMP_EQ: 924 return Delta->isZero(); 925 case CmpInst::ICMP_NE: 926 return SE->isKnownNonZero(Delta); 927 case CmpInst::ICMP_SGE: 928 return SE->isKnownNonNegative(Delta); 929 case CmpInst::ICMP_SLE: 930 return SE->isKnownNonPositive(Delta); 931 case CmpInst::ICMP_SGT: 932 return SE->isKnownPositive(Delta); 933 case CmpInst::ICMP_SLT: 934 return SE->isKnownNegative(Delta); 935 default: 936 llvm_unreachable("unexpected predicate in isKnownPredicate"); 937 } 938 } 939 940 941 // All subscripts are all the same type. 942 // Loop bound may be smaller (e.g., a char). 943 // Should zero extend loop bound, since it's always >= 0. 944 // This routine collects upper bound and extends if needed. 945 // Return null if no bound available. 946 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 947 Type *T) const { 948 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 949 const SCEV *UB = SE->getBackedgeTakenCount(L); 950 return SE->getNoopOrZeroExtend(UB, T); 951 } 952 return nullptr; 953 } 954 955 956 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 957 // If the cast fails, returns NULL. 958 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 959 Type *T 960 ) const { 961 if (const SCEV *UB = collectUpperBound(L, T)) 962 return dyn_cast<SCEVConstant>(UB); 963 return nullptr; 964 } 965 966 967 // testZIV - 968 // When we have a pair of subscripts of the form [c1] and [c2], 969 // where c1 and c2 are both loop invariant, we attack it using 970 // the ZIV test. Basically, we test by comparing the two values, 971 // but there are actually three possible results: 972 // 1) the values are equal, so there's a dependence 973 // 2) the values are different, so there's no dependence 974 // 3) the values might be equal, so we have to assume a dependence. 975 // 976 // Return true if dependence disproved. 977 bool DependenceAnalysis::testZIV(const SCEV *Src, 978 const SCEV *Dst, 979 FullDependence &Result) const { 980 DEBUG(dbgs() << " src = " << *Src << "\n"); 981 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 982 ++ZIVapplications; 983 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 984 DEBUG(dbgs() << " provably dependent\n"); 985 return false; // provably dependent 986 } 987 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 988 DEBUG(dbgs() << " provably independent\n"); 989 ++ZIVindependence; 990 return true; // provably independent 991 } 992 DEBUG(dbgs() << " possibly dependent\n"); 993 Result.Consistent = false; 994 return false; // possibly dependent 995 } 996 997 998 // strongSIVtest - 999 // From the paper, Practical Dependence Testing, Section 4.2.1 1000 // 1001 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1002 // where i is an induction variable, c1 and c2 are loop invariant, 1003 // and a is a constant, we can solve it exactly using the Strong SIV test. 1004 // 1005 // Can prove independence. Failing that, can compute distance (and direction). 1006 // In the presence of symbolic terms, we can sometimes make progress. 1007 // 1008 // If there's a dependence, 1009 // 1010 // c1 + a*i = c2 + a*i' 1011 // 1012 // The dependence distance is 1013 // 1014 // d = i' - i = (c1 - c2)/a 1015 // 1016 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1017 // loop's upper bound. If a dependence exists, the dependence direction is 1018 // defined as 1019 // 1020 // { < if d > 0 1021 // direction = { = if d = 0 1022 // { > if d < 0 1023 // 1024 // Return true if dependence disproved. 1025 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1026 const SCEV *SrcConst, 1027 const SCEV *DstConst, 1028 const Loop *CurLoop, 1029 unsigned Level, 1030 FullDependence &Result, 1031 Constraint &NewConstraint) const { 1032 DEBUG(dbgs() << "\tStrong SIV test\n"); 1033 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1034 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1035 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1036 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1037 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1038 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1039 ++StrongSIVapplications; 1040 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1041 Level--; 1042 1043 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1044 DEBUG(dbgs() << "\t Delta = " << *Delta); 1045 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1046 1047 // check that |Delta| < iteration count 1048 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1049 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1050 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1051 const SCEV *AbsDelta = 1052 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1053 const SCEV *AbsCoeff = 1054 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1055 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1056 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1057 // Distance greater than trip count - no dependence 1058 ++StrongSIVindependence; 1059 ++StrongSIVsuccesses; 1060 return true; 1061 } 1062 } 1063 1064 // Can we compute distance? 1065 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1066 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1067 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1068 APInt Distance = ConstDelta; // these need to be initialized 1069 APInt Remainder = ConstDelta; 1070 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1071 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1072 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1073 // Make sure Coeff divides Delta exactly 1074 if (Remainder != 0) { 1075 // Coeff doesn't divide Distance, no dependence 1076 ++StrongSIVindependence; 1077 ++StrongSIVsuccesses; 1078 return true; 1079 } 1080 Result.DV[Level].Distance = SE->getConstant(Distance); 1081 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1082 if (Distance.sgt(0)) 1083 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1084 else if (Distance.slt(0)) 1085 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1086 else 1087 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1088 ++StrongSIVsuccesses; 1089 } 1090 else if (Delta->isZero()) { 1091 // since 0/X == 0 1092 Result.DV[Level].Distance = Delta; 1093 NewConstraint.setDistance(Delta, CurLoop); 1094 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1095 ++StrongSIVsuccesses; 1096 } 1097 else { 1098 if (Coeff->isOne()) { 1099 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1100 Result.DV[Level].Distance = Delta; // since X/1 == X 1101 NewConstraint.setDistance(Delta, CurLoop); 1102 } 1103 else { 1104 Result.Consistent = false; 1105 NewConstraint.setLine(Coeff, 1106 SE->getNegativeSCEV(Coeff), 1107 SE->getNegativeSCEV(Delta), CurLoop); 1108 } 1109 1110 // maybe we can get a useful direction 1111 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1112 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1113 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1114 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1115 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1116 // The double negatives above are confusing. 1117 // It helps to read !SE->isKnownNonZero(Delta) 1118 // as "Delta might be Zero" 1119 unsigned NewDirection = Dependence::DVEntry::NONE; 1120 if ((DeltaMaybePositive && CoeffMaybePositive) || 1121 (DeltaMaybeNegative && CoeffMaybeNegative)) 1122 NewDirection = Dependence::DVEntry::LT; 1123 if (DeltaMaybeZero) 1124 NewDirection |= Dependence::DVEntry::EQ; 1125 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1126 (DeltaMaybePositive && CoeffMaybeNegative)) 1127 NewDirection |= Dependence::DVEntry::GT; 1128 if (NewDirection < Result.DV[Level].Direction) 1129 ++StrongSIVsuccesses; 1130 Result.DV[Level].Direction &= NewDirection; 1131 } 1132 return false; 1133 } 1134 1135 1136 // weakCrossingSIVtest - 1137 // From the paper, Practical Dependence Testing, Section 4.2.2 1138 // 1139 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1140 // where i is an induction variable, c1 and c2 are loop invariant, 1141 // and a is a constant, we can solve it exactly using the 1142 // Weak-Crossing SIV test. 1143 // 1144 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1145 // the two lines, where i = i', yielding 1146 // 1147 // c1 + a*i = c2 - a*i 1148 // 2a*i = c2 - c1 1149 // i = (c2 - c1)/2a 1150 // 1151 // If i < 0, there is no dependence. 1152 // If i > upperbound, there is no dependence. 1153 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1154 // If i = upperbound, there's a dependence with distance = 0. 1155 // If i is integral, there's a dependence (all directions). 1156 // If the non-integer part = 1/2, there's a dependence (<> directions). 1157 // Otherwise, there's no dependence. 1158 // 1159 // Can prove independence. Failing that, 1160 // can sometimes refine the directions. 1161 // Can determine iteration for splitting. 1162 // 1163 // Return true if dependence disproved. 1164 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1165 const SCEV *SrcConst, 1166 const SCEV *DstConst, 1167 const Loop *CurLoop, 1168 unsigned Level, 1169 FullDependence &Result, 1170 Constraint &NewConstraint, 1171 const SCEV *&SplitIter) const { 1172 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1173 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1174 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1175 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1176 ++WeakCrossingSIVapplications; 1177 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1178 Level--; 1179 Result.Consistent = false; 1180 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1181 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1182 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1183 if (Delta->isZero()) { 1184 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1185 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1186 ++WeakCrossingSIVsuccesses; 1187 if (!Result.DV[Level].Direction) { 1188 ++WeakCrossingSIVindependence; 1189 return true; 1190 } 1191 Result.DV[Level].Distance = Delta; // = 0 1192 return false; 1193 } 1194 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1195 if (!ConstCoeff) 1196 return false; 1197 1198 Result.DV[Level].Splitable = true; 1199 if (SE->isKnownNegative(ConstCoeff)) { 1200 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1201 assert(ConstCoeff && 1202 "dynamic cast of negative of ConstCoeff should yield constant"); 1203 Delta = SE->getNegativeSCEV(Delta); 1204 } 1205 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1206 1207 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1208 SplitIter = 1209 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1210 Delta), 1211 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1212 ConstCoeff)); 1213 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1214 1215 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1216 if (!ConstDelta) 1217 return false; 1218 1219 // We're certain that ConstCoeff > 0; therefore, 1220 // if Delta < 0, then no dependence. 1221 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1222 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1223 if (SE->isKnownNegative(Delta)) { 1224 // No dependence, Delta < 0 1225 ++WeakCrossingSIVindependence; 1226 ++WeakCrossingSIVsuccesses; 1227 return true; 1228 } 1229 1230 // We're certain that Delta > 0 and ConstCoeff > 0. 1231 // Check Delta/(2*ConstCoeff) against upper loop bound 1232 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1233 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1234 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1235 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1236 ConstantTwo); 1237 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1238 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1239 // Delta too big, no dependence 1240 ++WeakCrossingSIVindependence; 1241 ++WeakCrossingSIVsuccesses; 1242 return true; 1243 } 1244 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1245 // i = i' = UB 1246 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1247 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1248 ++WeakCrossingSIVsuccesses; 1249 if (!Result.DV[Level].Direction) { 1250 ++WeakCrossingSIVindependence; 1251 return true; 1252 } 1253 Result.DV[Level].Splitable = false; 1254 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1255 return false; 1256 } 1257 } 1258 1259 // check that Coeff divides Delta 1260 APInt APDelta = ConstDelta->getValue()->getValue(); 1261 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1262 APInt Distance = APDelta; // these need to be initialzed 1263 APInt Remainder = APDelta; 1264 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1265 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1266 if (Remainder != 0) { 1267 // Coeff doesn't divide Delta, no dependence 1268 ++WeakCrossingSIVindependence; 1269 ++WeakCrossingSIVsuccesses; 1270 return true; 1271 } 1272 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1273 1274 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1275 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1276 Remainder = Distance.srem(Two); 1277 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1278 if (Remainder != 0) { 1279 // Equal direction isn't possible 1280 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1281 ++WeakCrossingSIVsuccesses; 1282 } 1283 return false; 1284 } 1285 1286 1287 // Kirch's algorithm, from 1288 // 1289 // Optimizing Supercompilers for Supercomputers 1290 // Michael Wolfe 1291 // MIT Press, 1989 1292 // 1293 // Program 2.1, page 29. 1294 // Computes the GCD of AM and BM. 1295 // Also finds a solution to the equation ax - by = gcd(a, b). 1296 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1297 static 1298 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1299 APInt &G, APInt &X, APInt &Y) { 1300 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1301 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1302 APInt G0 = AM.abs(); 1303 APInt G1 = BM.abs(); 1304 APInt Q = G0; // these need to be initialized 1305 APInt R = G0; 1306 APInt::sdivrem(G0, G1, Q, R); 1307 while (R != 0) { 1308 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1309 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1310 G0 = G1; G1 = R; 1311 APInt::sdivrem(G0, G1, Q, R); 1312 } 1313 G = G1; 1314 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1315 X = AM.slt(0) ? -A1 : A1; 1316 Y = BM.slt(0) ? B1 : -B1; 1317 1318 // make sure gcd divides Delta 1319 R = Delta.srem(G); 1320 if (R != 0) 1321 return true; // gcd doesn't divide Delta, no dependence 1322 Q = Delta.sdiv(G); 1323 X *= Q; 1324 Y *= Q; 1325 return false; 1326 } 1327 1328 1329 static 1330 APInt floorOfQuotient(APInt A, APInt B) { 1331 APInt Q = A; // these need to be initialized 1332 APInt R = A; 1333 APInt::sdivrem(A, B, Q, R); 1334 if (R == 0) 1335 return Q; 1336 if ((A.sgt(0) && B.sgt(0)) || 1337 (A.slt(0) && B.slt(0))) 1338 return Q; 1339 else 1340 return Q - 1; 1341 } 1342 1343 1344 static 1345 APInt ceilingOfQuotient(APInt A, APInt B) { 1346 APInt Q = A; // these need to be initialized 1347 APInt R = A; 1348 APInt::sdivrem(A, B, Q, R); 1349 if (R == 0) 1350 return Q; 1351 if ((A.sgt(0) && B.sgt(0)) || 1352 (A.slt(0) && B.slt(0))) 1353 return Q + 1; 1354 else 1355 return Q; 1356 } 1357 1358 1359 static 1360 APInt maxAPInt(APInt A, APInt B) { 1361 return A.sgt(B) ? A : B; 1362 } 1363 1364 1365 static 1366 APInt minAPInt(APInt A, APInt B) { 1367 return A.slt(B) ? A : B; 1368 } 1369 1370 1371 // exactSIVtest - 1372 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1373 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1374 // and a2 are constant, we can solve it exactly using an algorithm developed 1375 // by Banerjee and Wolfe. See Section 2.5.3 in 1376 // 1377 // Optimizing Supercompilers for Supercomputers 1378 // Michael Wolfe 1379 // MIT Press, 1989 1380 // 1381 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1382 // so use them if possible. They're also a bit better with symbolics and, 1383 // in the case of the strong SIV test, can compute Distances. 1384 // 1385 // Return true if dependence disproved. 1386 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1387 const SCEV *DstCoeff, 1388 const SCEV *SrcConst, 1389 const SCEV *DstConst, 1390 const Loop *CurLoop, 1391 unsigned Level, 1392 FullDependence &Result, 1393 Constraint &NewConstraint) const { 1394 DEBUG(dbgs() << "\tExact SIV test\n"); 1395 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1396 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1397 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1398 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1399 ++ExactSIVapplications; 1400 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1401 Level--; 1402 Result.Consistent = false; 1403 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1404 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1405 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1406 Delta, CurLoop); 1407 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1408 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1409 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1410 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1411 return false; 1412 1413 // find gcd 1414 APInt G, X, Y; 1415 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1416 APInt BM = ConstDstCoeff->getValue()->getValue(); 1417 unsigned Bits = AM.getBitWidth(); 1418 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1419 // gcd doesn't divide Delta, no dependence 1420 ++ExactSIVindependence; 1421 ++ExactSIVsuccesses; 1422 return true; 1423 } 1424 1425 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1426 1427 // since SCEV construction normalizes, LM = 0 1428 APInt UM(Bits, 1, true); 1429 bool UMvalid = false; 1430 // UM is perhaps unavailable, let's check 1431 if (const SCEVConstant *CUB = 1432 collectConstantUpperBound(CurLoop, Delta->getType())) { 1433 UM = CUB->getValue()->getValue(); 1434 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1435 UMvalid = true; 1436 } 1437 1438 APInt TU(APInt::getSignedMaxValue(Bits)); 1439 APInt TL(APInt::getSignedMinValue(Bits)); 1440 1441 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1442 APInt TMUL = BM.sdiv(G); 1443 if (TMUL.sgt(0)) { 1444 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1445 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1446 if (UMvalid) { 1447 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1448 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1449 } 1450 } 1451 else { 1452 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1453 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1454 if (UMvalid) { 1455 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1456 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1457 } 1458 } 1459 1460 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1461 TMUL = AM.sdiv(G); 1462 if (TMUL.sgt(0)) { 1463 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1464 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1465 if (UMvalid) { 1466 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1467 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1468 } 1469 } 1470 else { 1471 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1472 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1473 if (UMvalid) { 1474 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1475 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1476 } 1477 } 1478 if (TL.sgt(TU)) { 1479 ++ExactSIVindependence; 1480 ++ExactSIVsuccesses; 1481 return true; 1482 } 1483 1484 // explore directions 1485 unsigned NewDirection = Dependence::DVEntry::NONE; 1486 1487 // less than 1488 APInt SaveTU(TU); // save these 1489 APInt SaveTL(TL); 1490 DEBUG(dbgs() << "\t exploring LT direction\n"); 1491 TMUL = AM - BM; 1492 if (TMUL.sgt(0)) { 1493 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1494 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1495 } 1496 else { 1497 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1498 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1499 } 1500 if (TL.sle(TU)) { 1501 NewDirection |= Dependence::DVEntry::LT; 1502 ++ExactSIVsuccesses; 1503 } 1504 1505 // equal 1506 TU = SaveTU; // restore 1507 TL = SaveTL; 1508 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1509 if (TMUL.sgt(0)) { 1510 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1511 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1512 } 1513 else { 1514 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1515 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1516 } 1517 TMUL = BM - AM; 1518 if (TMUL.sgt(0)) { 1519 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1520 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1521 } 1522 else { 1523 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1524 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1525 } 1526 if (TL.sle(TU)) { 1527 NewDirection |= Dependence::DVEntry::EQ; 1528 ++ExactSIVsuccesses; 1529 } 1530 1531 // greater than 1532 TU = SaveTU; // restore 1533 TL = SaveTL; 1534 DEBUG(dbgs() << "\t exploring GT direction\n"); 1535 if (TMUL.sgt(0)) { 1536 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1537 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1538 } 1539 else { 1540 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1541 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1542 } 1543 if (TL.sle(TU)) { 1544 NewDirection |= Dependence::DVEntry::GT; 1545 ++ExactSIVsuccesses; 1546 } 1547 1548 // finished 1549 Result.DV[Level].Direction &= NewDirection; 1550 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1551 ++ExactSIVindependence; 1552 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1553 } 1554 1555 1556 1557 // Return true if the divisor evenly divides the dividend. 1558 static 1559 bool isRemainderZero(const SCEVConstant *Dividend, 1560 const SCEVConstant *Divisor) { 1561 APInt ConstDividend = Dividend->getValue()->getValue(); 1562 APInt ConstDivisor = Divisor->getValue()->getValue(); 1563 return ConstDividend.srem(ConstDivisor) == 0; 1564 } 1565 1566 1567 // weakZeroSrcSIVtest - 1568 // From the paper, Practical Dependence Testing, Section 4.2.2 1569 // 1570 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1571 // where i is an induction variable, c1 and c2 are loop invariant, 1572 // and a is a constant, we can solve it exactly using the 1573 // Weak-Zero SIV test. 1574 // 1575 // Given 1576 // 1577 // c1 = c2 + a*i 1578 // 1579 // we get 1580 // 1581 // (c1 - c2)/a = i 1582 // 1583 // If i is not an integer, there's no dependence. 1584 // If i < 0 or > UB, there's no dependence. 1585 // If i = 0, the direction is <= and peeling the 1586 // 1st iteration will break the dependence. 1587 // If i = UB, the direction is >= and peeling the 1588 // last iteration will break the dependence. 1589 // Otherwise, the direction is *. 1590 // 1591 // Can prove independence. Failing that, we can sometimes refine 1592 // the directions. Can sometimes show that first or last 1593 // iteration carries all the dependences (so worth peeling). 1594 // 1595 // (see also weakZeroDstSIVtest) 1596 // 1597 // Return true if dependence disproved. 1598 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1599 const SCEV *SrcConst, 1600 const SCEV *DstConst, 1601 const Loop *CurLoop, 1602 unsigned Level, 1603 FullDependence &Result, 1604 Constraint &NewConstraint) const { 1605 // For the WeakSIV test, it's possible the loop isn't common to 1606 // the Src and Dst loops. If it isn't, then there's no need to 1607 // record a direction. 1608 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1609 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1610 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1611 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1612 ++WeakZeroSIVapplications; 1613 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1614 Level--; 1615 Result.Consistent = false; 1616 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1617 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1618 DstCoeff, Delta, CurLoop); 1619 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1620 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1621 if (Level < CommonLevels) { 1622 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1623 Result.DV[Level].PeelFirst = true; 1624 ++WeakZeroSIVsuccesses; 1625 } 1626 return false; // dependences caused by first iteration 1627 } 1628 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1629 if (!ConstCoeff) 1630 return false; 1631 const SCEV *AbsCoeff = 1632 SE->isKnownNegative(ConstCoeff) ? 1633 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1634 const SCEV *NewDelta = 1635 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1636 1637 // check that Delta/SrcCoeff < iteration count 1638 // really check NewDelta < count*AbsCoeff 1639 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1640 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1641 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1642 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1643 ++WeakZeroSIVindependence; 1644 ++WeakZeroSIVsuccesses; 1645 return true; 1646 } 1647 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1648 // dependences caused by last iteration 1649 if (Level < CommonLevels) { 1650 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1651 Result.DV[Level].PeelLast = true; 1652 ++WeakZeroSIVsuccesses; 1653 } 1654 return false; 1655 } 1656 } 1657 1658 // check that Delta/SrcCoeff >= 0 1659 // really check that NewDelta >= 0 1660 if (SE->isKnownNegative(NewDelta)) { 1661 // No dependence, newDelta < 0 1662 ++WeakZeroSIVindependence; 1663 ++WeakZeroSIVsuccesses; 1664 return true; 1665 } 1666 1667 // if SrcCoeff doesn't divide Delta, then no dependence 1668 if (isa<SCEVConstant>(Delta) && 1669 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1670 ++WeakZeroSIVindependence; 1671 ++WeakZeroSIVsuccesses; 1672 return true; 1673 } 1674 return false; 1675 } 1676 1677 1678 // weakZeroDstSIVtest - 1679 // From the paper, Practical Dependence Testing, Section 4.2.2 1680 // 1681 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1682 // where i is an induction variable, c1 and c2 are loop invariant, 1683 // and a is a constant, we can solve it exactly using the 1684 // Weak-Zero SIV test. 1685 // 1686 // Given 1687 // 1688 // c1 + a*i = c2 1689 // 1690 // we get 1691 // 1692 // i = (c2 - c1)/a 1693 // 1694 // If i is not an integer, there's no dependence. 1695 // If i < 0 or > UB, there's no dependence. 1696 // If i = 0, the direction is <= and peeling the 1697 // 1st iteration will break the dependence. 1698 // If i = UB, the direction is >= and peeling the 1699 // last iteration will break the dependence. 1700 // Otherwise, the direction is *. 1701 // 1702 // Can prove independence. Failing that, we can sometimes refine 1703 // the directions. Can sometimes show that first or last 1704 // iteration carries all the dependences (so worth peeling). 1705 // 1706 // (see also weakZeroSrcSIVtest) 1707 // 1708 // Return true if dependence disproved. 1709 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1710 const SCEV *SrcConst, 1711 const SCEV *DstConst, 1712 const Loop *CurLoop, 1713 unsigned Level, 1714 FullDependence &Result, 1715 Constraint &NewConstraint) const { 1716 // For the WeakSIV test, it's possible the loop isn't common to the 1717 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1718 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1719 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1720 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1721 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1722 ++WeakZeroSIVapplications; 1723 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1724 Level--; 1725 Result.Consistent = false; 1726 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1727 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1728 Delta, CurLoop); 1729 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1730 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1731 if (Level < CommonLevels) { 1732 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1733 Result.DV[Level].PeelFirst = true; 1734 ++WeakZeroSIVsuccesses; 1735 } 1736 return false; // dependences caused by first iteration 1737 } 1738 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1739 if (!ConstCoeff) 1740 return false; 1741 const SCEV *AbsCoeff = 1742 SE->isKnownNegative(ConstCoeff) ? 1743 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1744 const SCEV *NewDelta = 1745 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1746 1747 // check that Delta/SrcCoeff < iteration count 1748 // really check NewDelta < count*AbsCoeff 1749 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1750 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1751 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1752 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1753 ++WeakZeroSIVindependence; 1754 ++WeakZeroSIVsuccesses; 1755 return true; 1756 } 1757 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1758 // dependences caused by last iteration 1759 if (Level < CommonLevels) { 1760 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1761 Result.DV[Level].PeelLast = true; 1762 ++WeakZeroSIVsuccesses; 1763 } 1764 return false; 1765 } 1766 } 1767 1768 // check that Delta/SrcCoeff >= 0 1769 // really check that NewDelta >= 0 1770 if (SE->isKnownNegative(NewDelta)) { 1771 // No dependence, newDelta < 0 1772 ++WeakZeroSIVindependence; 1773 ++WeakZeroSIVsuccesses; 1774 return true; 1775 } 1776 1777 // if SrcCoeff doesn't divide Delta, then no dependence 1778 if (isa<SCEVConstant>(Delta) && 1779 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1780 ++WeakZeroSIVindependence; 1781 ++WeakZeroSIVsuccesses; 1782 return true; 1783 } 1784 return false; 1785 } 1786 1787 1788 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1789 // Things of the form [c1 + a*i] and [c2 + b*j], 1790 // where i and j are induction variable, c1 and c2 are loop invariant, 1791 // and a and b are constants. 1792 // Returns true if any possible dependence is disproved. 1793 // Marks the result as inconsistent. 1794 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1795 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1796 const SCEV *DstCoeff, 1797 const SCEV *SrcConst, 1798 const SCEV *DstConst, 1799 const Loop *SrcLoop, 1800 const Loop *DstLoop, 1801 FullDependence &Result) const { 1802 DEBUG(dbgs() << "\tExact RDIV test\n"); 1803 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1804 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1805 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1806 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1807 ++ExactRDIVapplications; 1808 Result.Consistent = false; 1809 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1810 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1811 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1812 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1813 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1814 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1815 return false; 1816 1817 // find gcd 1818 APInt G, X, Y; 1819 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1820 APInt BM = ConstDstCoeff->getValue()->getValue(); 1821 unsigned Bits = AM.getBitWidth(); 1822 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1823 // gcd doesn't divide Delta, no dependence 1824 ++ExactRDIVindependence; 1825 return true; 1826 } 1827 1828 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1829 1830 // since SCEV construction seems to normalize, LM = 0 1831 APInt SrcUM(Bits, 1, true); 1832 bool SrcUMvalid = false; 1833 // SrcUM is perhaps unavailable, let's check 1834 if (const SCEVConstant *UpperBound = 1835 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1836 SrcUM = UpperBound->getValue()->getValue(); 1837 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1838 SrcUMvalid = true; 1839 } 1840 1841 APInt DstUM(Bits, 1, true); 1842 bool DstUMvalid = false; 1843 // UM is perhaps unavailable, let's check 1844 if (const SCEVConstant *UpperBound = 1845 collectConstantUpperBound(DstLoop, Delta->getType())) { 1846 DstUM = UpperBound->getValue()->getValue(); 1847 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1848 DstUMvalid = true; 1849 } 1850 1851 APInt TU(APInt::getSignedMaxValue(Bits)); 1852 APInt TL(APInt::getSignedMinValue(Bits)); 1853 1854 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1855 APInt TMUL = BM.sdiv(G); 1856 if (TMUL.sgt(0)) { 1857 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1858 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1859 if (SrcUMvalid) { 1860 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1861 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1862 } 1863 } 1864 else { 1865 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1866 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1867 if (SrcUMvalid) { 1868 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1869 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1870 } 1871 } 1872 1873 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1874 TMUL = AM.sdiv(G); 1875 if (TMUL.sgt(0)) { 1876 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1877 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1878 if (DstUMvalid) { 1879 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1880 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1881 } 1882 } 1883 else { 1884 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1885 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1886 if (DstUMvalid) { 1887 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1888 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1889 } 1890 } 1891 if (TL.sgt(TU)) 1892 ++ExactRDIVindependence; 1893 return TL.sgt(TU); 1894 } 1895 1896 1897 // symbolicRDIVtest - 1898 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1899 // introduce a special case of Banerjee's Inequalities (also called the 1900 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1901 // particularly cases with symbolics. Since it's only able to disprove 1902 // dependence (not compute distances or directions), we'll use it as a 1903 // fall back for the other tests. 1904 // 1905 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1906 // where i and j are induction variables and c1 and c2 are loop invariants, 1907 // we can use the symbolic tests to disprove some dependences, serving as a 1908 // backup for the RDIV test. Note that i and j can be the same variable, 1909 // letting this test serve as a backup for the various SIV tests. 1910 // 1911 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1912 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1913 // loop bounds for the i and j loops, respectively. So, ... 1914 // 1915 // c1 + a1*i = c2 + a2*j 1916 // a1*i - a2*j = c2 - c1 1917 // 1918 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1919 // range of the maximum and minimum possible values of a1*i - a2*j. 1920 // Considering the signs of a1 and a2, we have 4 possible cases: 1921 // 1922 // 1) If a1 >= 0 and a2 >= 0, then 1923 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1924 // -a2*N2 <= c2 - c1 <= a1*N1 1925 // 1926 // 2) If a1 >= 0 and a2 <= 0, then 1927 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1928 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1929 // 1930 // 3) If a1 <= 0 and a2 >= 0, then 1931 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1932 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1933 // 1934 // 4) If a1 <= 0 and a2 <= 0, then 1935 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1936 // a1*N1 <= c2 - c1 <= -a2*N2 1937 // 1938 // return true if dependence disproved 1939 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1940 const SCEV *A2, 1941 const SCEV *C1, 1942 const SCEV *C2, 1943 const Loop *Loop1, 1944 const Loop *Loop2) const { 1945 ++SymbolicRDIVapplications; 1946 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1947 DEBUG(dbgs() << "\t A1 = " << *A1); 1948 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1949 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1950 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1951 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1952 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1953 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1954 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1955 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1956 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1957 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1958 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1959 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1960 if (SE->isKnownNonNegative(A1)) { 1961 if (SE->isKnownNonNegative(A2)) { 1962 // A1 >= 0 && A2 >= 0 1963 if (N1) { 1964 // make sure that c2 - c1 <= a1*N1 1965 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1966 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1967 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1968 ++SymbolicRDIVindependence; 1969 return true; 1970 } 1971 } 1972 if (N2) { 1973 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1974 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1975 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1976 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1977 ++SymbolicRDIVindependence; 1978 return true; 1979 } 1980 } 1981 } 1982 else if (SE->isKnownNonPositive(A2)) { 1983 // a1 >= 0 && a2 <= 0 1984 if (N1 && N2) { 1985 // make sure that c2 - c1 <= a1*N1 - a2*N2 1986 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1987 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1988 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1989 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1990 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1991 ++SymbolicRDIVindependence; 1992 return true; 1993 } 1994 } 1995 // make sure that 0 <= c2 - c1 1996 if (SE->isKnownNegative(C2_C1)) { 1997 ++SymbolicRDIVindependence; 1998 return true; 1999 } 2000 } 2001 } 2002 else if (SE->isKnownNonPositive(A1)) { 2003 if (SE->isKnownNonNegative(A2)) { 2004 // a1 <= 0 && a2 >= 0 2005 if (N1 && N2) { 2006 // make sure that a1*N1 - a2*N2 <= c2 - c1 2007 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2008 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2009 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2010 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2011 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2012 ++SymbolicRDIVindependence; 2013 return true; 2014 } 2015 } 2016 // make sure that c2 - c1 <= 0 2017 if (SE->isKnownPositive(C2_C1)) { 2018 ++SymbolicRDIVindependence; 2019 return true; 2020 } 2021 } 2022 else if (SE->isKnownNonPositive(A2)) { 2023 // a1 <= 0 && a2 <= 0 2024 if (N1) { 2025 // make sure that a1*N1 <= c2 - c1 2026 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2027 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2028 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2029 ++SymbolicRDIVindependence; 2030 return true; 2031 } 2032 } 2033 if (N2) { 2034 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2035 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2036 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2037 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2038 ++SymbolicRDIVindependence; 2039 return true; 2040 } 2041 } 2042 } 2043 } 2044 return false; 2045 } 2046 2047 2048 // testSIV - 2049 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2050 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2051 // a2 are constant, we attack it with an SIV test. While they can all be 2052 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2053 // they apply; they're cheaper and sometimes more precise. 2054 // 2055 // Return true if dependence disproved. 2056 bool DependenceAnalysis::testSIV(const SCEV *Src, 2057 const SCEV *Dst, 2058 unsigned &Level, 2059 FullDependence &Result, 2060 Constraint &NewConstraint, 2061 const SCEV *&SplitIter) const { 2062 DEBUG(dbgs() << " src = " << *Src << "\n"); 2063 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2064 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2065 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2066 if (SrcAddRec && DstAddRec) { 2067 const SCEV *SrcConst = SrcAddRec->getStart(); 2068 const SCEV *DstConst = DstAddRec->getStart(); 2069 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2070 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2071 const Loop *CurLoop = SrcAddRec->getLoop(); 2072 assert(CurLoop == DstAddRec->getLoop() && 2073 "both loops in SIV should be same"); 2074 Level = mapSrcLoop(CurLoop); 2075 bool disproven; 2076 if (SrcCoeff == DstCoeff) 2077 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2078 Level, Result, NewConstraint); 2079 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2080 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2081 Level, Result, NewConstraint, SplitIter); 2082 else 2083 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2084 Level, Result, NewConstraint); 2085 return disproven || 2086 gcdMIVtest(Src, Dst, Result) || 2087 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2088 } 2089 if (SrcAddRec) { 2090 const SCEV *SrcConst = SrcAddRec->getStart(); 2091 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2092 const SCEV *DstConst = Dst; 2093 const Loop *CurLoop = SrcAddRec->getLoop(); 2094 Level = mapSrcLoop(CurLoop); 2095 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2096 Level, Result, NewConstraint) || 2097 gcdMIVtest(Src, Dst, Result); 2098 } 2099 if (DstAddRec) { 2100 const SCEV *DstConst = DstAddRec->getStart(); 2101 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2102 const SCEV *SrcConst = Src; 2103 const Loop *CurLoop = DstAddRec->getLoop(); 2104 Level = mapDstLoop(CurLoop); 2105 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2106 CurLoop, Level, Result, NewConstraint) || 2107 gcdMIVtest(Src, Dst, Result); 2108 } 2109 llvm_unreachable("SIV test expected at least one AddRec"); 2110 return false; 2111 } 2112 2113 2114 // testRDIV - 2115 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2116 // where i and j are induction variables, c1 and c2 are loop invariant, 2117 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2118 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2119 // It doesn't make sense to talk about distance or direction in this case, 2120 // so there's no point in making special versions of the Strong SIV test or 2121 // the Weak-crossing SIV test. 2122 // 2123 // With minor algebra, this test can also be used for things like 2124 // [c1 + a1*i + a2*j][c2]. 2125 // 2126 // Return true if dependence disproved. 2127 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2128 const SCEV *Dst, 2129 FullDependence &Result) const { 2130 // we have 3 possible situations here: 2131 // 1) [a*i + b] and [c*j + d] 2132 // 2) [a*i + c*j + b] and [d] 2133 // 3) [b] and [a*i + c*j + d] 2134 // We need to find what we've got and get organized 2135 2136 const SCEV *SrcConst, *DstConst; 2137 const SCEV *SrcCoeff, *DstCoeff; 2138 const Loop *SrcLoop, *DstLoop; 2139 2140 DEBUG(dbgs() << " src = " << *Src << "\n"); 2141 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2142 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2143 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2144 if (SrcAddRec && DstAddRec) { 2145 SrcConst = SrcAddRec->getStart(); 2146 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2147 SrcLoop = SrcAddRec->getLoop(); 2148 DstConst = DstAddRec->getStart(); 2149 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2150 DstLoop = DstAddRec->getLoop(); 2151 } 2152 else if (SrcAddRec) { 2153 if (const SCEVAddRecExpr *tmpAddRec = 2154 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2155 SrcConst = tmpAddRec->getStart(); 2156 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2157 SrcLoop = tmpAddRec->getLoop(); 2158 DstConst = Dst; 2159 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2160 DstLoop = SrcAddRec->getLoop(); 2161 } 2162 else 2163 llvm_unreachable("RDIV reached by surprising SCEVs"); 2164 } 2165 else if (DstAddRec) { 2166 if (const SCEVAddRecExpr *tmpAddRec = 2167 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2168 DstConst = tmpAddRec->getStart(); 2169 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2170 DstLoop = tmpAddRec->getLoop(); 2171 SrcConst = Src; 2172 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2173 SrcLoop = DstAddRec->getLoop(); 2174 } 2175 else 2176 llvm_unreachable("RDIV reached by surprising SCEVs"); 2177 } 2178 else 2179 llvm_unreachable("RDIV expected at least one AddRec"); 2180 return exactRDIVtest(SrcCoeff, DstCoeff, 2181 SrcConst, DstConst, 2182 SrcLoop, DstLoop, 2183 Result) || 2184 gcdMIVtest(Src, Dst, Result) || 2185 symbolicRDIVtest(SrcCoeff, DstCoeff, 2186 SrcConst, DstConst, 2187 SrcLoop, DstLoop); 2188 } 2189 2190 2191 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2192 // Return true if dependence disproved. 2193 // Can sometimes refine direction vectors. 2194 bool DependenceAnalysis::testMIV(const SCEV *Src, 2195 const SCEV *Dst, 2196 const SmallBitVector &Loops, 2197 FullDependence &Result) const { 2198 DEBUG(dbgs() << " src = " << *Src << "\n"); 2199 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2200 Result.Consistent = false; 2201 return gcdMIVtest(Src, Dst, Result) || 2202 banerjeeMIVtest(Src, Dst, Loops, Result); 2203 } 2204 2205 2206 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2207 // in this case 10. If there is no constant part, returns NULL. 2208 static 2209 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2210 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2211 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2212 return Constant; 2213 } 2214 return nullptr; 2215 } 2216 2217 2218 //===----------------------------------------------------------------------===// 2219 // gcdMIVtest - 2220 // Tests an MIV subscript pair for dependence. 2221 // Returns true if any possible dependence is disproved. 2222 // Marks the result as inconsistent. 2223 // Can sometimes disprove the equal direction for 1 or more loops, 2224 // as discussed in Michael Wolfe's book, 2225 // High Performance Compilers for Parallel Computing, page 235. 2226 // 2227 // We spend some effort (code!) to handle cases like 2228 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2229 // but M and N are just loop-invariant variables. 2230 // This should help us handle linearized subscripts; 2231 // also makes this test a useful backup to the various SIV tests. 2232 // 2233 // It occurs to me that the presence of loop-invariant variables 2234 // changes the nature of the test from "greatest common divisor" 2235 // to "a common divisor". 2236 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2237 const SCEV *Dst, 2238 FullDependence &Result) const { 2239 DEBUG(dbgs() << "starting gcd\n"); 2240 ++GCDapplications; 2241 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2242 APInt RunningGCD = APInt::getNullValue(BitWidth); 2243 2244 // Examine Src coefficients. 2245 // Compute running GCD and record source constant. 2246 // Because we're looking for the constant at the end of the chain, 2247 // we can't quit the loop just because the GCD == 1. 2248 const SCEV *Coefficients = Src; 2249 while (const SCEVAddRecExpr *AddRec = 2250 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2251 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2252 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2253 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2254 // If the coefficient is the product of a constant and other stuff, 2255 // we can use the constant in the GCD computation. 2256 Constant = getConstantPart(Product); 2257 if (!Constant) 2258 return false; 2259 APInt ConstCoeff = Constant->getValue()->getValue(); 2260 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2261 Coefficients = AddRec->getStart(); 2262 } 2263 const SCEV *SrcConst = Coefficients; 2264 2265 // Examine Dst coefficients. 2266 // Compute running GCD and record destination constant. 2267 // Because we're looking for the constant at the end of the chain, 2268 // we can't quit the loop just because the GCD == 1. 2269 Coefficients = Dst; 2270 while (const SCEVAddRecExpr *AddRec = 2271 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2272 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2273 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2274 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2275 // If the coefficient is the product of a constant and other stuff, 2276 // we can use the constant in the GCD computation. 2277 Constant = getConstantPart(Product); 2278 if (!Constant) 2279 return false; 2280 APInt ConstCoeff = Constant->getValue()->getValue(); 2281 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2282 Coefficients = AddRec->getStart(); 2283 } 2284 const SCEV *DstConst = Coefficients; 2285 2286 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2287 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2288 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2289 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2290 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2291 // If Delta is a sum of products, we may be able to make further progress. 2292 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2293 const SCEV *Operand = Sum->getOperand(Op); 2294 if (isa<SCEVConstant>(Operand)) { 2295 assert(!Constant && "Surprised to find multiple constants"); 2296 Constant = cast<SCEVConstant>(Operand); 2297 } 2298 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2299 // Search for constant operand to participate in GCD; 2300 // If none found; return false. 2301 const SCEVConstant *ConstOp = getConstantPart(Product); 2302 if (!ConstOp) 2303 return false; 2304 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2305 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2306 ConstOpValue.abs()); 2307 } 2308 else 2309 return false; 2310 } 2311 } 2312 if (!Constant) 2313 return false; 2314 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2315 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2316 if (ConstDelta == 0) 2317 return false; 2318 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2319 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2320 APInt Remainder = ConstDelta.srem(RunningGCD); 2321 if (Remainder != 0) { 2322 ++GCDindependence; 2323 return true; 2324 } 2325 2326 // Try to disprove equal directions. 2327 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2328 // the code above can't disprove the dependence because the GCD = 1. 2329 // So we consider what happen if i = i' and what happens if j = j'. 2330 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2331 // which is infeasible, so we can disallow the = direction for the i level. 2332 // Setting j = j' doesn't help matters, so we end up with a direction vector 2333 // of [<>, *] 2334 // 2335 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2336 // we need to remember that the constant part is 5 and the RunningGCD should 2337 // be initialized to ExtraGCD = 30. 2338 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2339 2340 bool Improved = false; 2341 Coefficients = Src; 2342 while (const SCEVAddRecExpr *AddRec = 2343 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2344 Coefficients = AddRec->getStart(); 2345 const Loop *CurLoop = AddRec->getLoop(); 2346 RunningGCD = ExtraGCD; 2347 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2348 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2349 const SCEV *Inner = Src; 2350 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2351 AddRec = cast<SCEVAddRecExpr>(Inner); 2352 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2353 if (CurLoop == AddRec->getLoop()) 2354 ; // SrcCoeff == Coeff 2355 else { 2356 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2357 // If the coefficient is the product of a constant and other stuff, 2358 // we can use the constant in the GCD computation. 2359 Constant = getConstantPart(Product); 2360 else 2361 Constant = cast<SCEVConstant>(Coeff); 2362 APInt ConstCoeff = Constant->getValue()->getValue(); 2363 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2364 } 2365 Inner = AddRec->getStart(); 2366 } 2367 Inner = Dst; 2368 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2369 AddRec = cast<SCEVAddRecExpr>(Inner); 2370 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2371 if (CurLoop == AddRec->getLoop()) 2372 DstCoeff = Coeff; 2373 else { 2374 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2375 // If the coefficient is the product of a constant and other stuff, 2376 // we can use the constant in the GCD computation. 2377 Constant = getConstantPart(Product); 2378 else 2379 Constant = cast<SCEVConstant>(Coeff); 2380 APInt ConstCoeff = Constant->getValue()->getValue(); 2381 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2382 } 2383 Inner = AddRec->getStart(); 2384 } 2385 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2386 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2387 // If the coefficient is the product of a constant and other stuff, 2388 // we can use the constant in the GCD computation. 2389 Constant = getConstantPart(Product); 2390 else if (isa<SCEVConstant>(Delta)) 2391 Constant = cast<SCEVConstant>(Delta); 2392 else { 2393 // The difference of the two coefficients might not be a product 2394 // or constant, in which case we give up on this direction. 2395 continue; 2396 } 2397 APInt ConstCoeff = Constant->getValue()->getValue(); 2398 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2399 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2400 if (RunningGCD != 0) { 2401 Remainder = ConstDelta.srem(RunningGCD); 2402 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2403 if (Remainder != 0) { 2404 unsigned Level = mapSrcLoop(CurLoop); 2405 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2406 Improved = true; 2407 } 2408 } 2409 } 2410 if (Improved) 2411 ++GCDsuccesses; 2412 DEBUG(dbgs() << "all done\n"); 2413 return false; 2414 } 2415 2416 2417 //===----------------------------------------------------------------------===// 2418 // banerjeeMIVtest - 2419 // Use Banerjee's Inequalities to test an MIV subscript pair. 2420 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2421 // Generally follows the discussion in Section 2.5.2 of 2422 // 2423 // Optimizing Supercompilers for Supercomputers 2424 // Michael Wolfe 2425 // 2426 // The inequalities given on page 25 are simplified in that loops are 2427 // normalized so that the lower bound is always 0 and the stride is always 1. 2428 // For example, Wolfe gives 2429 // 2430 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2431 // 2432 // where A_k is the coefficient of the kth index in the source subscript, 2433 // B_k is the coefficient of the kth index in the destination subscript, 2434 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2435 // index, and N_k is the stride of the kth index. Since all loops are normalized 2436 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2437 // equation to 2438 // 2439 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2440 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2441 // 2442 // Similar simplifications are possible for the other equations. 2443 // 2444 // When we can't determine the number of iterations for a loop, 2445 // we use NULL as an indicator for the worst case, infinity. 2446 // When computing the upper bound, NULL denotes +inf; 2447 // for the lower bound, NULL denotes -inf. 2448 // 2449 // Return true if dependence disproved. 2450 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2451 const SCEV *Dst, 2452 const SmallBitVector &Loops, 2453 FullDependence &Result) const { 2454 DEBUG(dbgs() << "starting Banerjee\n"); 2455 ++BanerjeeApplications; 2456 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2457 const SCEV *A0; 2458 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2459 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2460 const SCEV *B0; 2461 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2462 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2463 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2464 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2465 2466 // Compute bounds for all the * directions. 2467 DEBUG(dbgs() << "\tBounds[*]\n"); 2468 for (unsigned K = 1; K <= MaxLevels; ++K) { 2469 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2470 Bound[K].Direction = Dependence::DVEntry::ALL; 2471 Bound[K].DirSet = Dependence::DVEntry::NONE; 2472 findBoundsALL(A, B, Bound, K); 2473 #ifndef NDEBUG 2474 DEBUG(dbgs() << "\t " << K << '\t'); 2475 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2476 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2477 else 2478 DEBUG(dbgs() << "-inf\t"); 2479 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2480 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2481 else 2482 DEBUG(dbgs() << "+inf\n"); 2483 #endif 2484 } 2485 2486 // Test the *, *, *, ... case. 2487 bool Disproved = false; 2488 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2489 // Explore the direction vector hierarchy. 2490 unsigned DepthExpanded = 0; 2491 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2492 Loops, DepthExpanded, Delta); 2493 if (NewDeps > 0) { 2494 bool Improved = false; 2495 for (unsigned K = 1; K <= CommonLevels; ++K) { 2496 if (Loops[K]) { 2497 unsigned Old = Result.DV[K - 1].Direction; 2498 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2499 Improved |= Old != Result.DV[K - 1].Direction; 2500 if (!Result.DV[K - 1].Direction) { 2501 Improved = false; 2502 Disproved = true; 2503 break; 2504 } 2505 } 2506 } 2507 if (Improved) 2508 ++BanerjeeSuccesses; 2509 } 2510 else { 2511 ++BanerjeeIndependence; 2512 Disproved = true; 2513 } 2514 } 2515 else { 2516 ++BanerjeeIndependence; 2517 Disproved = true; 2518 } 2519 delete [] Bound; 2520 delete [] A; 2521 delete [] B; 2522 return Disproved; 2523 } 2524 2525 2526 // Hierarchically expands the direction vector 2527 // search space, combining the directions of discovered dependences 2528 // in the DirSet field of Bound. Returns the number of distinct 2529 // dependences discovered. If the dependence is disproved, 2530 // it will return 0. 2531 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2532 CoefficientInfo *A, 2533 CoefficientInfo *B, 2534 BoundInfo *Bound, 2535 const SmallBitVector &Loops, 2536 unsigned &DepthExpanded, 2537 const SCEV *Delta) const { 2538 if (Level > CommonLevels) { 2539 // record result 2540 DEBUG(dbgs() << "\t["); 2541 for (unsigned K = 1; K <= CommonLevels; ++K) { 2542 if (Loops[K]) { 2543 Bound[K].DirSet |= Bound[K].Direction; 2544 #ifndef NDEBUG 2545 switch (Bound[K].Direction) { 2546 case Dependence::DVEntry::LT: 2547 DEBUG(dbgs() << " <"); 2548 break; 2549 case Dependence::DVEntry::EQ: 2550 DEBUG(dbgs() << " ="); 2551 break; 2552 case Dependence::DVEntry::GT: 2553 DEBUG(dbgs() << " >"); 2554 break; 2555 case Dependence::DVEntry::ALL: 2556 DEBUG(dbgs() << " *"); 2557 break; 2558 default: 2559 llvm_unreachable("unexpected Bound[K].Direction"); 2560 } 2561 #endif 2562 } 2563 } 2564 DEBUG(dbgs() << " ]\n"); 2565 return 1; 2566 } 2567 if (Loops[Level]) { 2568 if (Level > DepthExpanded) { 2569 DepthExpanded = Level; 2570 // compute bounds for <, =, > at current level 2571 findBoundsLT(A, B, Bound, Level); 2572 findBoundsGT(A, B, Bound, Level); 2573 findBoundsEQ(A, B, Bound, Level); 2574 #ifndef NDEBUG 2575 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2576 DEBUG(dbgs() << "\t <\t"); 2577 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2578 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2579 else 2580 DEBUG(dbgs() << "-inf\t"); 2581 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2582 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2583 else 2584 DEBUG(dbgs() << "+inf\n"); 2585 DEBUG(dbgs() << "\t =\t"); 2586 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2587 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2588 else 2589 DEBUG(dbgs() << "-inf\t"); 2590 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2591 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2592 else 2593 DEBUG(dbgs() << "+inf\n"); 2594 DEBUG(dbgs() << "\t >\t"); 2595 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2596 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2597 else 2598 DEBUG(dbgs() << "-inf\t"); 2599 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2600 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2601 else 2602 DEBUG(dbgs() << "+inf\n"); 2603 #endif 2604 } 2605 2606 unsigned NewDeps = 0; 2607 2608 // test bounds for <, *, *, ... 2609 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2610 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2611 Loops, DepthExpanded, Delta); 2612 2613 // Test bounds for =, *, *, ... 2614 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2615 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2616 Loops, DepthExpanded, Delta); 2617 2618 // test bounds for >, *, *, ... 2619 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2620 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2621 Loops, DepthExpanded, Delta); 2622 2623 Bound[Level].Direction = Dependence::DVEntry::ALL; 2624 return NewDeps; 2625 } 2626 else 2627 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2628 } 2629 2630 2631 // Returns true iff the current bounds are plausible. 2632 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2633 unsigned Level, 2634 BoundInfo *Bound, 2635 const SCEV *Delta) const { 2636 Bound[Level].Direction = DirKind; 2637 if (const SCEV *LowerBound = getLowerBound(Bound)) 2638 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2639 return false; 2640 if (const SCEV *UpperBound = getUpperBound(Bound)) 2641 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2642 return false; 2643 return true; 2644 } 2645 2646 2647 // Computes the upper and lower bounds for level K 2648 // using the * direction. Records them in Bound. 2649 // Wolfe gives the equations 2650 // 2651 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2652 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2653 // 2654 // Since we normalize loops, we can simplify these equations to 2655 // 2656 // LB^*_k = (A^-_k - B^+_k)U_k 2657 // UB^*_k = (A^+_k - B^-_k)U_k 2658 // 2659 // We must be careful to handle the case where the upper bound is unknown. 2660 // Note that the lower bound is always <= 0 2661 // and the upper bound is always >= 0. 2662 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2663 CoefficientInfo *B, 2664 BoundInfo *Bound, 2665 unsigned K) const { 2666 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2667 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2668 if (Bound[K].Iterations) { 2669 Bound[K].Lower[Dependence::DVEntry::ALL] = 2670 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2671 Bound[K].Iterations); 2672 Bound[K].Upper[Dependence::DVEntry::ALL] = 2673 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2674 Bound[K].Iterations); 2675 } 2676 else { 2677 // If the difference is 0, we won't need to know the number of iterations. 2678 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2679 Bound[K].Lower[Dependence::DVEntry::ALL] = 2680 SE->getConstant(A[K].Coeff->getType(), 0); 2681 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2682 Bound[K].Upper[Dependence::DVEntry::ALL] = 2683 SE->getConstant(A[K].Coeff->getType(), 0); 2684 } 2685 } 2686 2687 2688 // Computes the upper and lower bounds for level K 2689 // using the = direction. Records them in Bound. 2690 // Wolfe gives the equations 2691 // 2692 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2693 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2694 // 2695 // Since we normalize loops, we can simplify these equations to 2696 // 2697 // LB^=_k = (A_k - B_k)^- U_k 2698 // UB^=_k = (A_k - B_k)^+ U_k 2699 // 2700 // We must be careful to handle the case where the upper bound is unknown. 2701 // Note that the lower bound is always <= 0 2702 // and the upper bound is always >= 0. 2703 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2704 CoefficientInfo *B, 2705 BoundInfo *Bound, 2706 unsigned K) const { 2707 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2708 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2709 if (Bound[K].Iterations) { 2710 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2711 const SCEV *NegativePart = getNegativePart(Delta); 2712 Bound[K].Lower[Dependence::DVEntry::EQ] = 2713 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2714 const SCEV *PositivePart = getPositivePart(Delta); 2715 Bound[K].Upper[Dependence::DVEntry::EQ] = 2716 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2717 } 2718 else { 2719 // If the positive/negative part of the difference is 0, 2720 // we won't need to know the number of iterations. 2721 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2722 const SCEV *NegativePart = getNegativePart(Delta); 2723 if (NegativePart->isZero()) 2724 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2725 const SCEV *PositivePart = getPositivePart(Delta); 2726 if (PositivePart->isZero()) 2727 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2728 } 2729 } 2730 2731 2732 // Computes the upper and lower bounds for level K 2733 // using the < direction. Records them in Bound. 2734 // Wolfe gives the equations 2735 // 2736 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2737 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2738 // 2739 // Since we normalize loops, we can simplify these equations to 2740 // 2741 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2742 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2743 // 2744 // We must be careful to handle the case where the upper bound is unknown. 2745 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2746 CoefficientInfo *B, 2747 BoundInfo *Bound, 2748 unsigned K) const { 2749 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2750 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2751 if (Bound[K].Iterations) { 2752 const SCEV *Iter_1 = 2753 SE->getMinusSCEV(Bound[K].Iterations, 2754 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2755 const SCEV *NegPart = 2756 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2757 Bound[K].Lower[Dependence::DVEntry::LT] = 2758 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2759 const SCEV *PosPart = 2760 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2761 Bound[K].Upper[Dependence::DVEntry::LT] = 2762 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2763 } 2764 else { 2765 // If the positive/negative part of the difference is 0, 2766 // we won't need to know the number of iterations. 2767 const SCEV *NegPart = 2768 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2769 if (NegPart->isZero()) 2770 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2771 const SCEV *PosPart = 2772 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2773 if (PosPart->isZero()) 2774 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2775 } 2776 } 2777 2778 2779 // Computes the upper and lower bounds for level K 2780 // using the > direction. Records them in Bound. 2781 // Wolfe gives the equations 2782 // 2783 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2784 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2785 // 2786 // Since we normalize loops, we can simplify these equations to 2787 // 2788 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2789 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2790 // 2791 // We must be careful to handle the case where the upper bound is unknown. 2792 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2793 CoefficientInfo *B, 2794 BoundInfo *Bound, 2795 unsigned K) const { 2796 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2797 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2798 if (Bound[K].Iterations) { 2799 const SCEV *Iter_1 = 2800 SE->getMinusSCEV(Bound[K].Iterations, 2801 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2802 const SCEV *NegPart = 2803 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2804 Bound[K].Lower[Dependence::DVEntry::GT] = 2805 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2806 const SCEV *PosPart = 2807 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2808 Bound[K].Upper[Dependence::DVEntry::GT] = 2809 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2810 } 2811 else { 2812 // If the positive/negative part of the difference is 0, 2813 // we won't need to know the number of iterations. 2814 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2815 if (NegPart->isZero()) 2816 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2817 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2818 if (PosPart->isZero()) 2819 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2820 } 2821 } 2822 2823 2824 // X^+ = max(X, 0) 2825 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2826 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2827 } 2828 2829 2830 // X^- = min(X, 0) 2831 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2832 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2833 } 2834 2835 2836 // Walks through the subscript, 2837 // collecting each coefficient, the associated loop bounds, 2838 // and recording its positive and negative parts for later use. 2839 DependenceAnalysis::CoefficientInfo * 2840 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2841 bool SrcFlag, 2842 const SCEV *&Constant) const { 2843 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2844 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2845 for (unsigned K = 1; K <= MaxLevels; ++K) { 2846 CI[K].Coeff = Zero; 2847 CI[K].PosPart = Zero; 2848 CI[K].NegPart = Zero; 2849 CI[K].Iterations = nullptr; 2850 } 2851 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2852 const Loop *L = AddRec->getLoop(); 2853 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2854 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2855 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2856 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2857 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2858 Subscript = AddRec->getStart(); 2859 } 2860 Constant = Subscript; 2861 #ifndef NDEBUG 2862 DEBUG(dbgs() << "\tCoefficient Info\n"); 2863 for (unsigned K = 1; K <= MaxLevels; ++K) { 2864 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2865 DEBUG(dbgs() << "\tPos Part = "); 2866 DEBUG(dbgs() << *CI[K].PosPart); 2867 DEBUG(dbgs() << "\tNeg Part = "); 2868 DEBUG(dbgs() << *CI[K].NegPart); 2869 DEBUG(dbgs() << "\tUpper Bound = "); 2870 if (CI[K].Iterations) 2871 DEBUG(dbgs() << *CI[K].Iterations); 2872 else 2873 DEBUG(dbgs() << "+inf"); 2874 DEBUG(dbgs() << '\n'); 2875 } 2876 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2877 #endif 2878 return CI; 2879 } 2880 2881 2882 // Looks through all the bounds info and 2883 // computes the lower bound given the current direction settings 2884 // at each level. If the lower bound for any level is -inf, 2885 // the result is -inf. 2886 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2887 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2888 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2889 if (Bound[K].Lower[Bound[K].Direction]) 2890 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2891 else 2892 Sum = nullptr; 2893 } 2894 return Sum; 2895 } 2896 2897 2898 // Looks through all the bounds info and 2899 // computes the upper bound given the current direction settings 2900 // at each level. If the upper bound at any level is +inf, 2901 // the result is +inf. 2902 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2903 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2904 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2905 if (Bound[K].Upper[Bound[K].Direction]) 2906 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2907 else 2908 Sum = nullptr; 2909 } 2910 return Sum; 2911 } 2912 2913 2914 //===----------------------------------------------------------------------===// 2915 // Constraint manipulation for Delta test. 2916 2917 // Given a linear SCEV, 2918 // return the coefficient (the step) 2919 // corresponding to the specified loop. 2920 // If there isn't one, return 0. 2921 // For example, given a*i + b*j + c*k, zeroing the coefficient 2922 // corresponding to the j loop would yield b. 2923 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2924 const Loop *TargetLoop) const { 2925 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2926 if (!AddRec) 2927 return SE->getConstant(Expr->getType(), 0); 2928 if (AddRec->getLoop() == TargetLoop) 2929 return AddRec->getStepRecurrence(*SE); 2930 return findCoefficient(AddRec->getStart(), TargetLoop); 2931 } 2932 2933 2934 // Given a linear SCEV, 2935 // return the SCEV given by zeroing out the coefficient 2936 // corresponding to the specified loop. 2937 // For example, given a*i + b*j + c*k, zeroing the coefficient 2938 // corresponding to the j loop would yield a*i + c*k. 2939 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2940 const Loop *TargetLoop) const { 2941 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2942 if (!AddRec) 2943 return Expr; // ignore 2944 if (AddRec->getLoop() == TargetLoop) 2945 return AddRec->getStart(); 2946 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2947 AddRec->getStepRecurrence(*SE), 2948 AddRec->getLoop(), 2949 AddRec->getNoWrapFlags()); 2950 } 2951 2952 2953 // Given a linear SCEV Expr, 2954 // return the SCEV given by adding some Value to the 2955 // coefficient corresponding to the specified TargetLoop. 2956 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2957 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2958 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2959 const Loop *TargetLoop, 2960 const SCEV *Value) const { 2961 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2962 if (!AddRec) // create a new addRec 2963 return SE->getAddRecExpr(Expr, 2964 Value, 2965 TargetLoop, 2966 SCEV::FlagAnyWrap); // Worst case, with no info. 2967 if (AddRec->getLoop() == TargetLoop) { 2968 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2969 if (Sum->isZero()) 2970 return AddRec->getStart(); 2971 return SE->getAddRecExpr(AddRec->getStart(), 2972 Sum, 2973 AddRec->getLoop(), 2974 AddRec->getNoWrapFlags()); 2975 } 2976 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2977 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2978 return SE->getAddRecExpr( 2979 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2980 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2981 AddRec->getNoWrapFlags()); 2982 } 2983 2984 2985 // Review the constraints, looking for opportunities 2986 // to simplify a subscript pair (Src and Dst). 2987 // Return true if some simplification occurs. 2988 // If the simplification isn't exact (that is, if it is conservative 2989 // in terms of dependence), set consistent to false. 2990 // Corresponds to Figure 5 from the paper 2991 // 2992 // Practical Dependence Testing 2993 // Goff, Kennedy, Tseng 2994 // PLDI 1991 2995 bool DependenceAnalysis::propagate(const SCEV *&Src, 2996 const SCEV *&Dst, 2997 SmallBitVector &Loops, 2998 SmallVectorImpl<Constraint> &Constraints, 2999 bool &Consistent) { 3000 bool Result = false; 3001 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3002 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3003 DEBUG(Constraints[LI].dump(dbgs())); 3004 if (Constraints[LI].isDistance()) 3005 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3006 else if (Constraints[LI].isLine()) 3007 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3008 else if (Constraints[LI].isPoint()) 3009 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3010 } 3011 return Result; 3012 } 3013 3014 3015 // Attempt to propagate a distance 3016 // constraint into a subscript pair (Src and Dst). 3017 // Return true if some simplification occurs. 3018 // If the simplification isn't exact (that is, if it is conservative 3019 // in terms of dependence), set consistent to false. 3020 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3021 const SCEV *&Dst, 3022 Constraint &CurConstraint, 3023 bool &Consistent) { 3024 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3025 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3026 const SCEV *A_K = findCoefficient(Src, CurLoop); 3027 if (A_K->isZero()) 3028 return false; 3029 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3030 Src = SE->getMinusSCEV(Src, DA_K); 3031 Src = zeroCoefficient(Src, CurLoop); 3032 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3033 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3034 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3035 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3036 if (!findCoefficient(Dst, CurLoop)->isZero()) 3037 Consistent = false; 3038 return true; 3039 } 3040 3041 3042 // Attempt to propagate a line 3043 // constraint into a subscript pair (Src and Dst). 3044 // Return true if some simplification occurs. 3045 // If the simplification isn't exact (that is, if it is conservative 3046 // in terms of dependence), set consistent to false. 3047 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3048 const SCEV *&Dst, 3049 Constraint &CurConstraint, 3050 bool &Consistent) { 3051 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3052 const SCEV *A = CurConstraint.getA(); 3053 const SCEV *B = CurConstraint.getB(); 3054 const SCEV *C = CurConstraint.getC(); 3055 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3056 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3057 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3058 if (A->isZero()) { 3059 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3060 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3061 if (!Bconst || !Cconst) return false; 3062 APInt Beta = Bconst->getValue()->getValue(); 3063 APInt Charlie = Cconst->getValue()->getValue(); 3064 APInt CdivB = Charlie.sdiv(Beta); 3065 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3066 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3067 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3068 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3069 Dst = zeroCoefficient(Dst, CurLoop); 3070 if (!findCoefficient(Src, CurLoop)->isZero()) 3071 Consistent = false; 3072 } 3073 else if (B->isZero()) { 3074 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3075 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3076 if (!Aconst || !Cconst) return false; 3077 APInt Alpha = Aconst->getValue()->getValue(); 3078 APInt Charlie = Cconst->getValue()->getValue(); 3079 APInt CdivA = Charlie.sdiv(Alpha); 3080 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3081 const SCEV *A_K = findCoefficient(Src, CurLoop); 3082 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3083 Src = zeroCoefficient(Src, CurLoop); 3084 if (!findCoefficient(Dst, CurLoop)->isZero()) 3085 Consistent = false; 3086 } 3087 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3088 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3089 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3090 if (!Aconst || !Cconst) return false; 3091 APInt Alpha = Aconst->getValue()->getValue(); 3092 APInt Charlie = Cconst->getValue()->getValue(); 3093 APInt CdivA = Charlie.sdiv(Alpha); 3094 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3095 const SCEV *A_K = findCoefficient(Src, CurLoop); 3096 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3097 Src = zeroCoefficient(Src, CurLoop); 3098 Dst = addToCoefficient(Dst, CurLoop, A_K); 3099 if (!findCoefficient(Dst, CurLoop)->isZero()) 3100 Consistent = false; 3101 } 3102 else { 3103 // paper is incorrect here, or perhaps just misleading 3104 const SCEV *A_K = findCoefficient(Src, CurLoop); 3105 Src = SE->getMulExpr(Src, A); 3106 Dst = SE->getMulExpr(Dst, A); 3107 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3108 Src = zeroCoefficient(Src, CurLoop); 3109 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3110 if (!findCoefficient(Dst, CurLoop)->isZero()) 3111 Consistent = false; 3112 } 3113 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3114 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3115 return true; 3116 } 3117 3118 3119 // Attempt to propagate a point 3120 // constraint into a subscript pair (Src and Dst). 3121 // Return true if some simplification occurs. 3122 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3123 const SCEV *&Dst, 3124 Constraint &CurConstraint) { 3125 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3126 const SCEV *A_K = findCoefficient(Src, CurLoop); 3127 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3128 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3129 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3130 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3131 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3132 Src = zeroCoefficient(Src, CurLoop); 3133 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3134 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3135 Dst = zeroCoefficient(Dst, CurLoop); 3136 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3137 return true; 3138 } 3139 3140 3141 // Update direction vector entry based on the current constraint. 3142 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3143 const Constraint &CurConstraint 3144 ) const { 3145 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3146 DEBUG(CurConstraint.dump(dbgs())); 3147 if (CurConstraint.isAny()) 3148 ; // use defaults 3149 else if (CurConstraint.isDistance()) { 3150 // this one is consistent, the others aren't 3151 Level.Scalar = false; 3152 Level.Distance = CurConstraint.getD(); 3153 unsigned NewDirection = Dependence::DVEntry::NONE; 3154 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3155 NewDirection = Dependence::DVEntry::EQ; 3156 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3157 NewDirection |= Dependence::DVEntry::LT; 3158 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3159 NewDirection |= Dependence::DVEntry::GT; 3160 Level.Direction &= NewDirection; 3161 } 3162 else if (CurConstraint.isLine()) { 3163 Level.Scalar = false; 3164 Level.Distance = nullptr; 3165 // direction should be accurate 3166 } 3167 else if (CurConstraint.isPoint()) { 3168 Level.Scalar = false; 3169 Level.Distance = nullptr; 3170 unsigned NewDirection = Dependence::DVEntry::NONE; 3171 if (!isKnownPredicate(CmpInst::ICMP_NE, 3172 CurConstraint.getY(), 3173 CurConstraint.getX())) 3174 // if X may be = Y 3175 NewDirection |= Dependence::DVEntry::EQ; 3176 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3177 CurConstraint.getY(), 3178 CurConstraint.getX())) 3179 // if Y may be > X 3180 NewDirection |= Dependence::DVEntry::LT; 3181 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3182 CurConstraint.getY(), 3183 CurConstraint.getX())) 3184 // if Y may be < X 3185 NewDirection |= Dependence::DVEntry::GT; 3186 Level.Direction &= NewDirection; 3187 } 3188 else 3189 llvm_unreachable("constraint has unexpected kind"); 3190 } 3191 3192 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3193 /// source and destination array references are recurrences on a nested loop, 3194 /// this function flattens the nested recurrences into separate recurrences 3195 /// for each loop level. 3196 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3197 const SCEV *DstSCEV, 3198 SmallVectorImpl<Subscript> &Pair, 3199 const SCEV *ElementSize) { 3200 const SCEVUnknown *SrcBase = 3201 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3202 const SCEVUnknown *DstBase = 3203 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3204 3205 if (!SrcBase || !DstBase || SrcBase != DstBase) 3206 return false; 3207 3208 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3209 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3210 3211 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3212 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3213 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3214 return false; 3215 3216 // First step: collect parametric terms in both array references. 3217 SmallVector<const SCEV *, 4> Terms; 3218 SrcAR->collectParametricTerms(*SE, Terms); 3219 DstAR->collectParametricTerms(*SE, Terms); 3220 3221 // Second step: find subscript sizes. 3222 SmallVector<const SCEV *, 4> Sizes; 3223 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3224 3225 // Third step: compute the access functions for each subscript. 3226 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3227 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3228 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3229 3230 // Fail when there is only a subscript: that's a linearized access function. 3231 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3232 SrcSubscripts.size() != DstSubscripts.size()) 3233 return false; 3234 3235 int size = SrcSubscripts.size(); 3236 3237 DEBUG({ 3238 dbgs() << "\nSrcSubscripts: "; 3239 for (int i = 0; i < size; i++) 3240 dbgs() << *SrcSubscripts[i]; 3241 dbgs() << "\nDstSubscripts: "; 3242 for (int i = 0; i < size; i++) 3243 dbgs() << *DstSubscripts[i]; 3244 }); 3245 3246 // The delinearization transforms a single-subscript MIV dependence test into 3247 // a multi-subscript SIV dependence test that is easier to compute. So we 3248 // resize Pair to contain as many pairs of subscripts as the delinearization 3249 // has found, and then initialize the pairs following the delinearization. 3250 Pair.resize(size); 3251 for (int i = 0; i < size; ++i) { 3252 Pair[i].Src = SrcSubscripts[i]; 3253 Pair[i].Dst = DstSubscripts[i]; 3254 unifySubscriptType(&Pair[i]); 3255 3256 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3257 // delinearization has found, and add these constraints to the dependence 3258 // check to avoid memory accesses overflow from one dimension into another. 3259 // This is related to the problem of determining the existence of data 3260 // dependences in array accesses using a different number of subscripts: in 3261 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3262 } 3263 3264 return true; 3265 } 3266 3267 //===----------------------------------------------------------------------===// 3268 3269 #ifndef NDEBUG 3270 // For debugging purposes, dump a small bit vector to dbgs(). 3271 static void dumpSmallBitVector(SmallBitVector &BV) { 3272 dbgs() << "{"; 3273 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3274 dbgs() << VI; 3275 if (BV.find_next(VI) >= 0) 3276 dbgs() << ' '; 3277 } 3278 dbgs() << "}\n"; 3279 } 3280 #endif 3281 3282 3283 // depends - 3284 // Returns NULL if there is no dependence. 3285 // Otherwise, return a Dependence with as many details as possible. 3286 // Corresponds to Section 3.1 in the paper 3287 // 3288 // Practical Dependence Testing 3289 // Goff, Kennedy, Tseng 3290 // PLDI 1991 3291 // 3292 // Care is required to keep the routine below, getSplitIteration(), 3293 // up to date with respect to this routine. 3294 std::unique_ptr<Dependence> 3295 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3296 bool PossiblyLoopIndependent) { 3297 if (Src == Dst) 3298 PossiblyLoopIndependent = false; 3299 3300 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3301 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3302 // if both instructions don't reference memory, there's no dependence 3303 return nullptr; 3304 3305 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3306 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3307 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3308 return make_unique<Dependence>(Src, Dst); 3309 } 3310 3311 Value *SrcPtr = getPointerOperand(Src); 3312 Value *DstPtr = getPointerOperand(Dst); 3313 3314 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3315 case AliasAnalysis::MayAlias: 3316 case AliasAnalysis::PartialAlias: 3317 // cannot analyse objects if we don't understand their aliasing. 3318 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3319 return make_unique<Dependence>(Src, Dst); 3320 case AliasAnalysis::NoAlias: 3321 // If the objects noalias, they are distinct, accesses are independent. 3322 DEBUG(dbgs() << "no alias\n"); 3323 return nullptr; 3324 case AliasAnalysis::MustAlias: 3325 break; // The underlying objects alias; test accesses for dependence. 3326 } 3327 3328 // establish loop nesting levels 3329 establishNestingLevels(Src, Dst); 3330 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3331 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3332 3333 auto Result = llvm::make_unique<FullDependence>( 3334 Src, Dst, PossiblyLoopIndependent, CommonLevels); 3335 ++TotalArrayPairs; 3336 3337 // See if there are GEPs we can use. 3338 bool UsefulGEP = false; 3339 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3340 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3341 if (SrcGEP && DstGEP && 3342 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3343 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3344 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3345 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3346 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3347 3348 UsefulGEP = 3349 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3350 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3351 } 3352 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3353 SmallVector<Subscript, 4> Pair(Pairs); 3354 if (UsefulGEP) { 3355 DEBUG(dbgs() << " using GEPs\n"); 3356 unsigned P = 0; 3357 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3358 SrcEnd = SrcGEP->idx_end(), 3359 DstIdx = DstGEP->idx_begin(); 3360 SrcIdx != SrcEnd; 3361 ++SrcIdx, ++DstIdx, ++P) { 3362 Pair[P].Src = SE->getSCEV(*SrcIdx); 3363 Pair[P].Dst = SE->getSCEV(*DstIdx); 3364 unifySubscriptType(&Pair[P]); 3365 } 3366 } 3367 else { 3368 DEBUG(dbgs() << " ignoring GEPs\n"); 3369 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3370 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3371 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3372 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3373 Pair[0].Src = SrcSCEV; 3374 Pair[0].Dst = DstSCEV; 3375 } 3376 3377 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3378 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3379 DEBUG(dbgs() << " delinerized GEP\n"); 3380 Pairs = Pair.size(); 3381 } 3382 3383 for (unsigned P = 0; P < Pairs; ++P) { 3384 Pair[P].Loops.resize(MaxLevels + 1); 3385 Pair[P].GroupLoops.resize(MaxLevels + 1); 3386 Pair[P].Group.resize(Pairs); 3387 removeMatchingExtensions(&Pair[P]); 3388 Pair[P].Classification = 3389 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3390 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3391 Pair[P].Loops); 3392 Pair[P].GroupLoops = Pair[P].Loops; 3393 Pair[P].Group.set(P); 3394 DEBUG(dbgs() << " subscript " << P << "\n"); 3395 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3396 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3397 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3398 DEBUG(dbgs() << "\tloops = "); 3399 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3400 } 3401 3402 SmallBitVector Separable(Pairs); 3403 SmallBitVector Coupled(Pairs); 3404 3405 // Partition subscripts into separable and minimally-coupled groups 3406 // Algorithm in paper is algorithmically better; 3407 // this may be faster in practice. Check someday. 3408 // 3409 // Here's an example of how it works. Consider this code: 3410 // 3411 // for (i = ...) { 3412 // for (j = ...) { 3413 // for (k = ...) { 3414 // for (l = ...) { 3415 // for (m = ...) { 3416 // A[i][j][k][m] = ...; 3417 // ... = A[0][j][l][i + j]; 3418 // } 3419 // } 3420 // } 3421 // } 3422 // } 3423 // 3424 // There are 4 subscripts here: 3425 // 0 [i] and [0] 3426 // 1 [j] and [j] 3427 // 2 [k] and [l] 3428 // 3 [m] and [i + j] 3429 // 3430 // We've already classified each subscript pair as ZIV, SIV, etc., 3431 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3432 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3433 // and set Pair[P].Group = {P}. 3434 // 3435 // Src Dst Classification Loops GroupLoops Group 3436 // 0 [i] [0] SIV {1} {1} {0} 3437 // 1 [j] [j] SIV {2} {2} {1} 3438 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3439 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3440 // 3441 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3442 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3443 // 3444 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3445 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3446 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3447 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3448 // to either Separable or Coupled). 3449 // 3450 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3451 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3452 // so Pair[3].Group = {0, 1, 3} and Done = false. 3453 // 3454 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3455 // Since Done remains true, we add 2 to the set of Separable pairs. 3456 // 3457 // Finally, we consider 3. There's nothing to compare it with, 3458 // so Done remains true and we add it to the Coupled set. 3459 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3460 // 3461 // In the end, we've got 1 separable subscript and 1 coupled group. 3462 for (unsigned SI = 0; SI < Pairs; ++SI) { 3463 if (Pair[SI].Classification == Subscript::NonLinear) { 3464 // ignore these, but collect loops for later 3465 ++NonlinearSubscriptPairs; 3466 collectCommonLoops(Pair[SI].Src, 3467 LI->getLoopFor(Src->getParent()), 3468 Pair[SI].Loops); 3469 collectCommonLoops(Pair[SI].Dst, 3470 LI->getLoopFor(Dst->getParent()), 3471 Pair[SI].Loops); 3472 Result->Consistent = false; 3473 } 3474 else if (Pair[SI].Classification == Subscript::ZIV) { 3475 // always separable 3476 Separable.set(SI); 3477 } 3478 else { 3479 // SIV, RDIV, or MIV, so check for coupled group 3480 bool Done = true; 3481 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3482 SmallBitVector Intersection = Pair[SI].GroupLoops; 3483 Intersection &= Pair[SJ].GroupLoops; 3484 if (Intersection.any()) { 3485 // accumulate set of all the loops in group 3486 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3487 // accumulate set of all subscripts in group 3488 Pair[SJ].Group |= Pair[SI].Group; 3489 Done = false; 3490 } 3491 } 3492 if (Done) { 3493 if (Pair[SI].Group.count() == 1) { 3494 Separable.set(SI); 3495 ++SeparableSubscriptPairs; 3496 } 3497 else { 3498 Coupled.set(SI); 3499 ++CoupledSubscriptPairs; 3500 } 3501 } 3502 } 3503 } 3504 3505 DEBUG(dbgs() << " Separable = "); 3506 DEBUG(dumpSmallBitVector(Separable)); 3507 DEBUG(dbgs() << " Coupled = "); 3508 DEBUG(dumpSmallBitVector(Coupled)); 3509 3510 Constraint NewConstraint; 3511 NewConstraint.setAny(SE); 3512 3513 // test separable subscripts 3514 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3515 DEBUG(dbgs() << "testing subscript " << SI); 3516 switch (Pair[SI].Classification) { 3517 case Subscript::ZIV: 3518 DEBUG(dbgs() << ", ZIV\n"); 3519 if (testZIV(Pair[SI].Src, Pair[SI].Dst, *Result)) 3520 return nullptr; 3521 break; 3522 case Subscript::SIV: { 3523 DEBUG(dbgs() << ", SIV\n"); 3524 unsigned Level; 3525 const SCEV *SplitIter = nullptr; 3526 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, *Result, NewConstraint, 3527 SplitIter)) 3528 return nullptr; 3529 break; 3530 } 3531 case Subscript::RDIV: 3532 DEBUG(dbgs() << ", RDIV\n"); 3533 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, *Result)) 3534 return nullptr; 3535 break; 3536 case Subscript::MIV: 3537 DEBUG(dbgs() << ", MIV\n"); 3538 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, *Result)) 3539 return nullptr; 3540 break; 3541 default: 3542 llvm_unreachable("subscript has unexpected classification"); 3543 } 3544 } 3545 3546 if (Coupled.count()) { 3547 // test coupled subscript groups 3548 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3549 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3550 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3551 for (unsigned II = 0; II <= MaxLevels; ++II) 3552 Constraints[II].setAny(SE); 3553 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3554 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3555 SmallBitVector Group(Pair[SI].Group); 3556 SmallBitVector Sivs(Pairs); 3557 SmallBitVector Mivs(Pairs); 3558 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3559 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3560 DEBUG(dbgs() << SJ << " "); 3561 if (Pair[SJ].Classification == Subscript::SIV) 3562 Sivs.set(SJ); 3563 else 3564 Mivs.set(SJ); 3565 } 3566 DEBUG(dbgs() << "}\n"); 3567 while (Sivs.any()) { 3568 bool Changed = false; 3569 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3570 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3571 // SJ is an SIV subscript that's part of the current coupled group 3572 unsigned Level; 3573 const SCEV *SplitIter = nullptr; 3574 DEBUG(dbgs() << "SIV\n"); 3575 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, *Result, NewConstraint, 3576 SplitIter)) 3577 return nullptr; 3578 ConstrainedLevels.set(Level); 3579 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3580 if (Constraints[Level].isEmpty()) { 3581 ++DeltaIndependence; 3582 return nullptr; 3583 } 3584 Changed = true; 3585 } 3586 Sivs.reset(SJ); 3587 } 3588 if (Changed) { 3589 // propagate, possibly creating new SIVs and ZIVs 3590 DEBUG(dbgs() << " propagating\n"); 3591 DEBUG(dbgs() << "\tMivs = "); 3592 DEBUG(dumpSmallBitVector(Mivs)); 3593 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3594 // SJ is an MIV subscript that's part of the current coupled group 3595 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3596 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3597 Constraints, Result->Consistent)) { 3598 DEBUG(dbgs() << "\t Changed\n"); 3599 ++DeltaPropagations; 3600 Pair[SJ].Classification = 3601 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3602 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3603 Pair[SJ].Loops); 3604 switch (Pair[SJ].Classification) { 3605 case Subscript::ZIV: 3606 DEBUG(dbgs() << "ZIV\n"); 3607 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, *Result)) 3608 return nullptr; 3609 Mivs.reset(SJ); 3610 break; 3611 case Subscript::SIV: 3612 Sivs.set(SJ); 3613 Mivs.reset(SJ); 3614 break; 3615 case Subscript::RDIV: 3616 case Subscript::MIV: 3617 break; 3618 default: 3619 llvm_unreachable("bad subscript classification"); 3620 } 3621 } 3622 } 3623 } 3624 } 3625 3626 // test & propagate remaining RDIVs 3627 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3628 if (Pair[SJ].Classification == Subscript::RDIV) { 3629 DEBUG(dbgs() << "RDIV test\n"); 3630 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, *Result)) 3631 return nullptr; 3632 // I don't yet understand how to propagate RDIV results 3633 Mivs.reset(SJ); 3634 } 3635 } 3636 3637 // test remaining MIVs 3638 // This code is temporary. 3639 // Better to somehow test all remaining subscripts simultaneously. 3640 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3641 if (Pair[SJ].Classification == Subscript::MIV) { 3642 DEBUG(dbgs() << "MIV test\n"); 3643 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, *Result)) 3644 return nullptr; 3645 } 3646 else 3647 llvm_unreachable("expected only MIV subscripts at this point"); 3648 } 3649 3650 // update Result->DV from constraint vector 3651 DEBUG(dbgs() << " updating\n"); 3652 for (int SJ = ConstrainedLevels.find_first(); 3653 SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { 3654 updateDirection(Result->DV[SJ - 1], Constraints[SJ]); 3655 if (Result->DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3656 return nullptr; 3657 } 3658 } 3659 } 3660 3661 // Make sure the Scalar flags are set correctly. 3662 SmallBitVector CompleteLoops(MaxLevels + 1); 3663 for (unsigned SI = 0; SI < Pairs; ++SI) 3664 CompleteLoops |= Pair[SI].Loops; 3665 for (unsigned II = 1; II <= CommonLevels; ++II) 3666 if (CompleteLoops[II]) 3667 Result->DV[II - 1].Scalar = false; 3668 3669 if (PossiblyLoopIndependent) { 3670 // Make sure the LoopIndependent flag is set correctly. 3671 // All directions must include equal, otherwise no 3672 // loop-independent dependence is possible. 3673 for (unsigned II = 1; II <= CommonLevels; ++II) { 3674 if (!(Result->getDirection(II) & Dependence::DVEntry::EQ)) { 3675 Result->LoopIndependent = false; 3676 break; 3677 } 3678 } 3679 } 3680 else { 3681 // On the other hand, if all directions are equal and there's no 3682 // loop-independent dependence possible, then no dependence exists. 3683 bool AllEqual = true; 3684 for (unsigned II = 1; II <= CommonLevels; ++II) { 3685 if (Result->getDirection(II) != Dependence::DVEntry::EQ) { 3686 AllEqual = false; 3687 break; 3688 } 3689 } 3690 if (AllEqual) 3691 return nullptr; 3692 } 3693 3694 return std::move(Result); 3695 } 3696 3697 3698 3699 //===----------------------------------------------------------------------===// 3700 // getSplitIteration - 3701 // Rather than spend rarely-used space recording the splitting iteration 3702 // during the Weak-Crossing SIV test, we re-compute it on demand. 3703 // The re-computation is basically a repeat of the entire dependence test, 3704 // though simplified since we know that the dependence exists. 3705 // It's tedious, since we must go through all propagations, etc. 3706 // 3707 // Care is required to keep this code up to date with respect to the routine 3708 // above, depends(). 3709 // 3710 // Generally, the dependence analyzer will be used to build 3711 // a dependence graph for a function (basically a map from instructions 3712 // to dependences). Looking for cycles in the graph shows us loops 3713 // that cannot be trivially vectorized/parallelized. 3714 // 3715 // We can try to improve the situation by examining all the dependences 3716 // that make up the cycle, looking for ones we can break. 3717 // Sometimes, peeling the first or last iteration of a loop will break 3718 // dependences, and we've got flags for those possibilities. 3719 // Sometimes, splitting a loop at some other iteration will do the trick, 3720 // and we've got a flag for that case. Rather than waste the space to 3721 // record the exact iteration (since we rarely know), we provide 3722 // a method that calculates the iteration. It's a drag that it must work 3723 // from scratch, but wonderful in that it's possible. 3724 // 3725 // Here's an example: 3726 // 3727 // for (i = 0; i < 10; i++) 3728 // A[i] = ... 3729 // ... = A[11 - i] 3730 // 3731 // There's a loop-carried flow dependence from the store to the load, 3732 // found by the weak-crossing SIV test. The dependence will have a flag, 3733 // indicating that the dependence can be broken by splitting the loop. 3734 // Calling getSplitIteration will return 5. 3735 // Splitting the loop breaks the dependence, like so: 3736 // 3737 // for (i = 0; i <= 5; i++) 3738 // A[i] = ... 3739 // ... = A[11 - i] 3740 // for (i = 6; i < 10; i++) 3741 // A[i] = ... 3742 // ... = A[11 - i] 3743 // 3744 // breaks the dependence and allows us to vectorize/parallelize 3745 // both loops. 3746 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3747 unsigned SplitLevel) { 3748 assert(Dep.isSplitable(SplitLevel) && 3749 "Dep should be splitable at SplitLevel"); 3750 Instruction *Src = Dep.getSrc(); 3751 Instruction *Dst = Dep.getDst(); 3752 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3753 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3754 assert(isLoadOrStore(Src)); 3755 assert(isLoadOrStore(Dst)); 3756 Value *SrcPtr = getPointerOperand(Src); 3757 Value *DstPtr = getPointerOperand(Dst); 3758 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3759 AliasAnalysis::MustAlias); 3760 3761 // establish loop nesting levels 3762 establishNestingLevels(Src, Dst); 3763 3764 FullDependence Result(Src, Dst, false, CommonLevels); 3765 3766 // See if there are GEPs we can use. 3767 bool UsefulGEP = false; 3768 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3769 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3770 if (SrcGEP && DstGEP && 3771 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3772 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3773 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3774 UsefulGEP = 3775 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3776 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3777 } 3778 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3779 SmallVector<Subscript, 4> Pair(Pairs); 3780 if (UsefulGEP) { 3781 unsigned P = 0; 3782 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3783 SrcEnd = SrcGEP->idx_end(), 3784 DstIdx = DstGEP->idx_begin(); 3785 SrcIdx != SrcEnd; 3786 ++SrcIdx, ++DstIdx, ++P) { 3787 Pair[P].Src = SE->getSCEV(*SrcIdx); 3788 Pair[P].Dst = SE->getSCEV(*DstIdx); 3789 } 3790 } 3791 else { 3792 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3793 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3794 Pair[0].Src = SrcSCEV; 3795 Pair[0].Dst = DstSCEV; 3796 } 3797 3798 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3799 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3800 DEBUG(dbgs() << " delinerized GEP\n"); 3801 Pairs = Pair.size(); 3802 } 3803 3804 for (unsigned P = 0; P < Pairs; ++P) { 3805 Pair[P].Loops.resize(MaxLevels + 1); 3806 Pair[P].GroupLoops.resize(MaxLevels + 1); 3807 Pair[P].Group.resize(Pairs); 3808 removeMatchingExtensions(&Pair[P]); 3809 Pair[P].Classification = 3810 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3811 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3812 Pair[P].Loops); 3813 Pair[P].GroupLoops = Pair[P].Loops; 3814 Pair[P].Group.set(P); 3815 } 3816 3817 SmallBitVector Separable(Pairs); 3818 SmallBitVector Coupled(Pairs); 3819 3820 // partition subscripts into separable and minimally-coupled groups 3821 for (unsigned SI = 0; SI < Pairs; ++SI) { 3822 if (Pair[SI].Classification == Subscript::NonLinear) { 3823 // ignore these, but collect loops for later 3824 collectCommonLoops(Pair[SI].Src, 3825 LI->getLoopFor(Src->getParent()), 3826 Pair[SI].Loops); 3827 collectCommonLoops(Pair[SI].Dst, 3828 LI->getLoopFor(Dst->getParent()), 3829 Pair[SI].Loops); 3830 Result.Consistent = false; 3831 } 3832 else if (Pair[SI].Classification == Subscript::ZIV) 3833 Separable.set(SI); 3834 else { 3835 // SIV, RDIV, or MIV, so check for coupled group 3836 bool Done = true; 3837 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3838 SmallBitVector Intersection = Pair[SI].GroupLoops; 3839 Intersection &= Pair[SJ].GroupLoops; 3840 if (Intersection.any()) { 3841 // accumulate set of all the loops in group 3842 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3843 // accumulate set of all subscripts in group 3844 Pair[SJ].Group |= Pair[SI].Group; 3845 Done = false; 3846 } 3847 } 3848 if (Done) { 3849 if (Pair[SI].Group.count() == 1) 3850 Separable.set(SI); 3851 else 3852 Coupled.set(SI); 3853 } 3854 } 3855 } 3856 3857 Constraint NewConstraint; 3858 NewConstraint.setAny(SE); 3859 3860 // test separable subscripts 3861 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3862 switch (Pair[SI].Classification) { 3863 case Subscript::SIV: { 3864 unsigned Level; 3865 const SCEV *SplitIter = nullptr; 3866 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3867 Result, NewConstraint, SplitIter); 3868 if (Level == SplitLevel) { 3869 assert(SplitIter != nullptr); 3870 return SplitIter; 3871 } 3872 break; 3873 } 3874 case Subscript::ZIV: 3875 case Subscript::RDIV: 3876 case Subscript::MIV: 3877 break; 3878 default: 3879 llvm_unreachable("subscript has unexpected classification"); 3880 } 3881 } 3882 3883 if (Coupled.count()) { 3884 // test coupled subscript groups 3885 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3886 for (unsigned II = 0; II <= MaxLevels; ++II) 3887 Constraints[II].setAny(SE); 3888 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3889 SmallBitVector Group(Pair[SI].Group); 3890 SmallBitVector Sivs(Pairs); 3891 SmallBitVector Mivs(Pairs); 3892 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3893 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3894 if (Pair[SJ].Classification == Subscript::SIV) 3895 Sivs.set(SJ); 3896 else 3897 Mivs.set(SJ); 3898 } 3899 while (Sivs.any()) { 3900 bool Changed = false; 3901 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3902 // SJ is an SIV subscript that's part of the current coupled group 3903 unsigned Level; 3904 const SCEV *SplitIter = nullptr; 3905 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3906 Result, NewConstraint, SplitIter); 3907 if (Level == SplitLevel && SplitIter) 3908 return SplitIter; 3909 ConstrainedLevels.set(Level); 3910 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3911 Changed = true; 3912 Sivs.reset(SJ); 3913 } 3914 if (Changed) { 3915 // propagate, possibly creating new SIVs and ZIVs 3916 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3917 // SJ is an MIV subscript that's part of the current coupled group 3918 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3919 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3920 Pair[SJ].Classification = 3921 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3922 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3923 Pair[SJ].Loops); 3924 switch (Pair[SJ].Classification) { 3925 case Subscript::ZIV: 3926 Mivs.reset(SJ); 3927 break; 3928 case Subscript::SIV: 3929 Sivs.set(SJ); 3930 Mivs.reset(SJ); 3931 break; 3932 case Subscript::RDIV: 3933 case Subscript::MIV: 3934 break; 3935 default: 3936 llvm_unreachable("bad subscript classification"); 3937 } 3938 } 3939 } 3940 } 3941 } 3942 } 3943 } 3944 llvm_unreachable("somehow reached end of routine"); 3945 return nullptr; 3946 } 3947