1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/Analysis/AliasAnalysis.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Operator.h" 63 #include "llvm/Support/CommandLine.h" 64 #include "llvm/Support/Debug.h" 65 #include "llvm/Support/ErrorHandling.h" 66 #include "llvm/Support/raw_ostream.h" 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "da" 71 72 //===----------------------------------------------------------------------===// 73 // statistics 74 75 STATISTIC(TotalArrayPairs, "Array pairs tested"); 76 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 77 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 78 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 79 STATISTIC(ZIVapplications, "ZIV applications"); 80 STATISTIC(ZIVindependence, "ZIV independence"); 81 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 82 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 83 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 84 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 85 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 86 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 87 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 88 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 89 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 90 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 91 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 92 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 93 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 94 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 95 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 96 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 97 STATISTIC(DeltaApplications, "Delta applications"); 98 STATISTIC(DeltaSuccesses, "Delta successes"); 99 STATISTIC(DeltaIndependence, "Delta independence"); 100 STATISTIC(DeltaPropagations, "Delta propagations"); 101 STATISTIC(GCDapplications, "GCD applications"); 102 STATISTIC(GCDsuccesses, "GCD successes"); 103 STATISTIC(GCDindependence, "GCD independence"); 104 STATISTIC(BanerjeeApplications, "Banerjee applications"); 105 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 106 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 107 108 static cl::opt<bool> 109 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 110 cl::desc("Try to delinearize array references.")); 111 112 //===----------------------------------------------------------------------===// 113 // basics 114 115 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da", 116 "Dependence Analysis", true, true) 117 INITIALIZE_PASS_DEPENDENCY(LoopInfo) 118 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 119 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 120 INITIALIZE_PASS_END(DependenceAnalysis, "da", 121 "Dependence Analysis", true, true) 122 123 char DependenceAnalysis::ID = 0; 124 125 126 FunctionPass *llvm::createDependenceAnalysisPass() { 127 return new DependenceAnalysis(); 128 } 129 130 131 bool DependenceAnalysis::runOnFunction(Function &F) { 132 this->F = &F; 133 AA = &getAnalysis<AliasAnalysis>(); 134 SE = &getAnalysis<ScalarEvolution>(); 135 LI = &getAnalysis<LoopInfo>(); 136 return false; 137 } 138 139 140 void DependenceAnalysis::releaseMemory() { 141 } 142 143 144 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 145 AU.setPreservesAll(); 146 AU.addRequiredTransitive<AliasAnalysis>(); 147 AU.addRequiredTransitive<ScalarEvolution>(); 148 AU.addRequiredTransitive<LoopInfo>(); 149 } 150 151 152 // Used to test the dependence analyzer. 153 // Looks through the function, noting loads and stores. 154 // Calls depends() on every possible pair and prints out the result. 155 // Ignores all other instructions. 156 static 157 void dumpExampleDependence(raw_ostream &OS, Function *F, 158 DependenceAnalysis *DA) { 159 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); 160 SrcI != SrcE; ++SrcI) { 161 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 162 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 163 DstI != DstE; ++DstI) { 164 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 165 OS << "da analyze - "; 166 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 167 D->dump(OS); 168 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 169 if (D->isSplitable(Level)) { 170 OS << "da analyze - split level = " << Level; 171 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 172 OS << "!\n"; 173 } 174 } 175 } 176 else 177 OS << "none!\n"; 178 } 179 } 180 } 181 } 182 } 183 184 185 void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { 186 dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this)); 187 } 188 189 //===----------------------------------------------------------------------===// 190 // Dependence methods 191 192 // Returns true if this is an input dependence. 193 bool Dependence::isInput() const { 194 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 195 } 196 197 198 // Returns true if this is an output dependence. 199 bool Dependence::isOutput() const { 200 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 201 } 202 203 204 // Returns true if this is an flow (aka true) dependence. 205 bool Dependence::isFlow() const { 206 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 207 } 208 209 210 // Returns true if this is an anti dependence. 211 bool Dependence::isAnti() const { 212 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 213 } 214 215 216 // Returns true if a particular level is scalar; that is, 217 // if no subscript in the source or destination mention the induction 218 // variable associated with the loop at this level. 219 // Leave this out of line, so it will serve as a virtual method anchor 220 bool Dependence::isScalar(unsigned level) const { 221 return false; 222 } 223 224 225 //===----------------------------------------------------------------------===// 226 // FullDependence methods 227 228 FullDependence::FullDependence(Instruction *Source, 229 Instruction *Destination, 230 bool PossiblyLoopIndependent, 231 unsigned CommonLevels) : 232 Dependence(Source, Destination), 233 Levels(CommonLevels), 234 LoopIndependent(PossiblyLoopIndependent) { 235 Consistent = true; 236 DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; 237 } 238 239 // The rest are simple getters that hide the implementation. 240 241 // getDirection - Returns the direction associated with a particular level. 242 unsigned FullDependence::getDirection(unsigned Level) const { 243 assert(0 < Level && Level <= Levels && "Level out of range"); 244 return DV[Level - 1].Direction; 245 } 246 247 248 // Returns the distance (or NULL) associated with a particular level. 249 const SCEV *FullDependence::getDistance(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Distance; 252 } 253 254 255 // Returns true if a particular level is scalar; that is, 256 // if no subscript in the source or destination mention the induction 257 // variable associated with the loop at this level. 258 bool FullDependence::isScalar(unsigned Level) const { 259 assert(0 < Level && Level <= Levels && "Level out of range"); 260 return DV[Level - 1].Scalar; 261 } 262 263 264 // Returns true if peeling the first iteration from this loop 265 // will break this dependence. 266 bool FullDependence::isPeelFirst(unsigned Level) const { 267 assert(0 < Level && Level <= Levels && "Level out of range"); 268 return DV[Level - 1].PeelFirst; 269 } 270 271 272 // Returns true if peeling the last iteration from this loop 273 // will break this dependence. 274 bool FullDependence::isPeelLast(unsigned Level) const { 275 assert(0 < Level && Level <= Levels && "Level out of range"); 276 return DV[Level - 1].PeelLast; 277 } 278 279 280 // Returns true if splitting this loop will break the dependence. 281 bool FullDependence::isSplitable(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].Splitable; 284 } 285 286 287 //===----------------------------------------------------------------------===// 288 // DependenceAnalysis::Constraint methods 289 290 // If constraint is a point <X, Y>, returns X. 291 // Otherwise assert. 292 const SCEV *DependenceAnalysis::Constraint::getX() const { 293 assert(Kind == Point && "Kind should be Point"); 294 return A; 295 } 296 297 298 // If constraint is a point <X, Y>, returns Y. 299 // Otherwise assert. 300 const SCEV *DependenceAnalysis::Constraint::getY() const { 301 assert(Kind == Point && "Kind should be Point"); 302 return B; 303 } 304 305 306 // If constraint is a line AX + BY = C, returns A. 307 // Otherwise assert. 308 const SCEV *DependenceAnalysis::Constraint::getA() const { 309 assert((Kind == Line || Kind == Distance) && 310 "Kind should be Line (or Distance)"); 311 return A; 312 } 313 314 315 // If constraint is a line AX + BY = C, returns B. 316 // Otherwise assert. 317 const SCEV *DependenceAnalysis::Constraint::getB() const { 318 assert((Kind == Line || Kind == Distance) && 319 "Kind should be Line (or Distance)"); 320 return B; 321 } 322 323 324 // If constraint is a line AX + BY = C, returns C. 325 // Otherwise assert. 326 const SCEV *DependenceAnalysis::Constraint::getC() const { 327 assert((Kind == Line || Kind == Distance) && 328 "Kind should be Line (or Distance)"); 329 return C; 330 } 331 332 333 // If constraint is a distance, returns D. 334 // Otherwise assert. 335 const SCEV *DependenceAnalysis::Constraint::getD() const { 336 assert(Kind == Distance && "Kind should be Distance"); 337 return SE->getNegativeSCEV(C); 338 } 339 340 341 // Returns the loop associated with this constraint. 342 const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const { 343 assert((Kind == Distance || Kind == Line || Kind == Point) && 344 "Kind should be Distance, Line, or Point"); 345 return AssociatedLoop; 346 } 347 348 349 void DependenceAnalysis::Constraint::setPoint(const SCEV *X, 350 const SCEV *Y, 351 const Loop *CurLoop) { 352 Kind = Point; 353 A = X; 354 B = Y; 355 AssociatedLoop = CurLoop; 356 } 357 358 359 void DependenceAnalysis::Constraint::setLine(const SCEV *AA, 360 const SCEV *BB, 361 const SCEV *CC, 362 const Loop *CurLoop) { 363 Kind = Line; 364 A = AA; 365 B = BB; 366 C = CC; 367 AssociatedLoop = CurLoop; 368 } 369 370 371 void DependenceAnalysis::Constraint::setDistance(const SCEV *D, 372 const Loop *CurLoop) { 373 Kind = Distance; 374 A = SE->getConstant(D->getType(), 1); 375 B = SE->getNegativeSCEV(A); 376 C = SE->getNegativeSCEV(D); 377 AssociatedLoop = CurLoop; 378 } 379 380 381 void DependenceAnalysis::Constraint::setEmpty() { 382 Kind = Empty; 383 } 384 385 386 void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) { 387 SE = NewSE; 388 Kind = Any; 389 } 390 391 392 // For debugging purposes. Dumps the constraint out to OS. 393 void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const { 394 if (isEmpty()) 395 OS << " Empty\n"; 396 else if (isAny()) 397 OS << " Any\n"; 398 else if (isPoint()) 399 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 400 else if (isDistance()) 401 OS << " Distance is " << *getD() << 402 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 403 else if (isLine()) 404 OS << " Line is " << *getA() << "*X + " << 405 *getB() << "*Y = " << *getC() << "\n"; 406 else 407 llvm_unreachable("unknown constraint type in Constraint::dump"); 408 } 409 410 411 // Updates X with the intersection 412 // of the Constraints X and Y. Returns true if X has changed. 413 // Corresponds to Figure 4 from the paper 414 // 415 // Practical Dependence Testing 416 // Goff, Kennedy, Tseng 417 // PLDI 1991 418 bool DependenceAnalysis::intersectConstraints(Constraint *X, 419 const Constraint *Y) { 420 ++DeltaApplications; 421 DEBUG(dbgs() << "\tintersect constraints\n"); 422 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 423 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 424 assert(!Y->isPoint() && "Y must not be a Point"); 425 if (X->isAny()) { 426 if (Y->isAny()) 427 return false; 428 *X = *Y; 429 return true; 430 } 431 if (X->isEmpty()) 432 return false; 433 if (Y->isEmpty()) { 434 X->setEmpty(); 435 return true; 436 } 437 438 if (X->isDistance() && Y->isDistance()) { 439 DEBUG(dbgs() << "\t intersect 2 distances\n"); 440 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 441 return false; 442 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 443 X->setEmpty(); 444 ++DeltaSuccesses; 445 return true; 446 } 447 // Hmmm, interesting situation. 448 // I guess if either is constant, keep it and ignore the other. 449 if (isa<SCEVConstant>(Y->getD())) { 450 *X = *Y; 451 return true; 452 } 453 return false; 454 } 455 456 // At this point, the pseudo-code in Figure 4 of the paper 457 // checks if (X->isPoint() && Y->isPoint()). 458 // This case can't occur in our implementation, 459 // since a Point can only arise as the result of intersecting 460 // two Line constraints, and the right-hand value, Y, is never 461 // the result of an intersection. 462 assert(!(X->isPoint() && Y->isPoint()) && 463 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 464 465 if (X->isLine() && Y->isLine()) { 466 DEBUG(dbgs() << "\t intersect 2 lines\n"); 467 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 468 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 469 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 470 // slopes are equal, so lines are parallel 471 DEBUG(dbgs() << "\t\tsame slope\n"); 472 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 473 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 474 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 475 return false; 476 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 477 X->setEmpty(); 478 ++DeltaSuccesses; 479 return true; 480 } 481 return false; 482 } 483 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 484 // slopes differ, so lines intersect 485 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 486 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 487 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 488 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 489 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 490 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 491 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 492 const SCEVConstant *C1A2_C2A1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 494 const SCEVConstant *C1B2_C2B1 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 496 const SCEVConstant *A1B2_A2B1 = 497 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 498 const SCEVConstant *A2B1_A1B2 = 499 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 500 if (!C1B2_C2B1 || !C1A2_C2A1 || 501 !A1B2_A2B1 || !A2B1_A1B2) 502 return false; 503 APInt Xtop = C1B2_C2B1->getValue()->getValue(); 504 APInt Xbot = A1B2_A2B1->getValue()->getValue(); 505 APInt Ytop = C1A2_C2A1->getValue()->getValue(); 506 APInt Ybot = A2B1_A1B2->getValue()->getValue(); 507 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 508 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 509 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 510 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 511 APInt Xq = Xtop; // these need to be initialized, even 512 APInt Xr = Xtop; // though they're just going to be overwritten 513 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 514 APInt Yq = Ytop; 515 APInt Yr = Ytop; 516 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 517 if (Xr != 0 || Yr != 0) { 518 X->setEmpty(); 519 ++DeltaSuccesses; 520 return true; 521 } 522 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 523 if (Xq.slt(0) || Yq.slt(0)) { 524 X->setEmpty(); 525 ++DeltaSuccesses; 526 return true; 527 } 528 if (const SCEVConstant *CUB = 529 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 530 APInt UpperBound = CUB->getValue()->getValue(); 531 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 532 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 533 X->setEmpty(); 534 ++DeltaSuccesses; 535 return true; 536 } 537 } 538 X->setPoint(SE->getConstant(Xq), 539 SE->getConstant(Yq), 540 X->getAssociatedLoop()); 541 ++DeltaSuccesses; 542 return true; 543 } 544 return false; 545 } 546 547 // if (X->isLine() && Y->isPoint()) This case can't occur. 548 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 549 550 if (X->isPoint() && Y->isLine()) { 551 DEBUG(dbgs() << "\t intersect Point and Line\n"); 552 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 553 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 554 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 555 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 556 return false; 557 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 558 X->setEmpty(); 559 ++DeltaSuccesses; 560 return true; 561 } 562 return false; 563 } 564 565 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 566 return false; 567 } 568 569 570 //===----------------------------------------------------------------------===// 571 // DependenceAnalysis methods 572 573 // For debugging purposes. Dumps a dependence to OS. 574 void Dependence::dump(raw_ostream &OS) const { 575 bool Splitable = false; 576 if (isConfused()) 577 OS << "confused"; 578 else { 579 if (isConsistent()) 580 OS << "consistent "; 581 if (isFlow()) 582 OS << "flow"; 583 else if (isOutput()) 584 OS << "output"; 585 else if (isAnti()) 586 OS << "anti"; 587 else if (isInput()) 588 OS << "input"; 589 unsigned Levels = getLevels(); 590 OS << " ["; 591 for (unsigned II = 1; II <= Levels; ++II) { 592 if (isSplitable(II)) 593 Splitable = true; 594 if (isPeelFirst(II)) 595 OS << 'p'; 596 const SCEV *Distance = getDistance(II); 597 if (Distance) 598 OS << *Distance; 599 else if (isScalar(II)) 600 OS << "S"; 601 else { 602 unsigned Direction = getDirection(II); 603 if (Direction == DVEntry::ALL) 604 OS << "*"; 605 else { 606 if (Direction & DVEntry::LT) 607 OS << "<"; 608 if (Direction & DVEntry::EQ) 609 OS << "="; 610 if (Direction & DVEntry::GT) 611 OS << ">"; 612 } 613 } 614 if (isPeelLast(II)) 615 OS << 'p'; 616 if (II < Levels) 617 OS << " "; 618 } 619 if (isLoopIndependent()) 620 OS << "|<"; 621 OS << "]"; 622 if (Splitable) 623 OS << " splitable"; 624 } 625 OS << "!\n"; 626 } 627 628 629 630 static 631 AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 632 const Value *A, 633 const Value *B) { 634 const Value *AObj = GetUnderlyingObject(A); 635 const Value *BObj = GetUnderlyingObject(B); 636 return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), 637 BObj, AA->getTypeStoreSize(BObj->getType())); 638 } 639 640 641 // Returns true if the load or store can be analyzed. Atomic and volatile 642 // operations have properties which this analysis does not understand. 643 static 644 bool isLoadOrStore(const Instruction *I) { 645 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 646 return LI->isUnordered(); 647 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 648 return SI->isUnordered(); 649 return false; 650 } 651 652 653 static 654 Value *getPointerOperand(Instruction *I) { 655 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 656 return LI->getPointerOperand(); 657 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 658 return SI->getPointerOperand(); 659 llvm_unreachable("Value is not load or store instruction"); 660 return nullptr; 661 } 662 663 664 // Examines the loop nesting of the Src and Dst 665 // instructions and establishes their shared loops. Sets the variables 666 // CommonLevels, SrcLevels, and MaxLevels. 667 // The source and destination instructions needn't be contained in the same 668 // loop. The routine establishNestingLevels finds the level of most deeply 669 // nested loop that contains them both, CommonLevels. An instruction that's 670 // not contained in a loop is at level = 0. MaxLevels is equal to the level 671 // of the source plus the level of the destination, minus CommonLevels. 672 // This lets us allocate vectors MaxLevels in length, with room for every 673 // distinct loop referenced in both the source and destination subscripts. 674 // The variable SrcLevels is the nesting depth of the source instruction. 675 // It's used to help calculate distinct loops referenced by the destination. 676 // Here's the map from loops to levels: 677 // 0 - unused 678 // 1 - outermost common loop 679 // ... - other common loops 680 // CommonLevels - innermost common loop 681 // ... - loops containing Src but not Dst 682 // SrcLevels - innermost loop containing Src but not Dst 683 // ... - loops containing Dst but not Src 684 // MaxLevels - innermost loops containing Dst but not Src 685 // Consider the follow code fragment: 686 // for (a = ...) { 687 // for (b = ...) { 688 // for (c = ...) { 689 // for (d = ...) { 690 // A[] = ...; 691 // } 692 // } 693 // for (e = ...) { 694 // for (f = ...) { 695 // for (g = ...) { 696 // ... = A[]; 697 // } 698 // } 699 // } 700 // } 701 // } 702 // If we're looking at the possibility of a dependence between the store 703 // to A (the Src) and the load from A (the Dst), we'll note that they 704 // have 2 loops in common, so CommonLevels will equal 2 and the direction 705 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 706 // A map from loop names to loop numbers would look like 707 // a - 1 708 // b - 2 = CommonLevels 709 // c - 3 710 // d - 4 = SrcLevels 711 // e - 5 712 // f - 6 713 // g - 7 = MaxLevels 714 void DependenceAnalysis::establishNestingLevels(const Instruction *Src, 715 const Instruction *Dst) { 716 const BasicBlock *SrcBlock = Src->getParent(); 717 const BasicBlock *DstBlock = Dst->getParent(); 718 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 719 unsigned DstLevel = LI->getLoopDepth(DstBlock); 720 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 721 const Loop *DstLoop = LI->getLoopFor(DstBlock); 722 SrcLevels = SrcLevel; 723 MaxLevels = SrcLevel + DstLevel; 724 while (SrcLevel > DstLevel) { 725 SrcLoop = SrcLoop->getParentLoop(); 726 SrcLevel--; 727 } 728 while (DstLevel > SrcLevel) { 729 DstLoop = DstLoop->getParentLoop(); 730 DstLevel--; 731 } 732 while (SrcLoop != DstLoop) { 733 SrcLoop = SrcLoop->getParentLoop(); 734 DstLoop = DstLoop->getParentLoop(); 735 SrcLevel--; 736 } 737 CommonLevels = SrcLevel; 738 MaxLevels -= CommonLevels; 739 } 740 741 742 // Given one of the loops containing the source, return 743 // its level index in our numbering scheme. 744 unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const { 745 return SrcLoop->getLoopDepth(); 746 } 747 748 749 // Given one of the loops containing the destination, 750 // return its level index in our numbering scheme. 751 unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const { 752 unsigned D = DstLoop->getLoopDepth(); 753 if (D > CommonLevels) 754 return D - CommonLevels + SrcLevels; 755 else 756 return D; 757 } 758 759 760 // Returns true if Expression is loop invariant in LoopNest. 761 bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression, 762 const Loop *LoopNest) const { 763 if (!LoopNest) 764 return true; 765 return SE->isLoopInvariant(Expression, LoopNest) && 766 isLoopInvariant(Expression, LoopNest->getParentLoop()); 767 } 768 769 770 771 // Finds the set of loops from the LoopNest that 772 // have a level <= CommonLevels and are referred to by the SCEV Expression. 773 void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, 774 const Loop *LoopNest, 775 SmallBitVector &Loops) const { 776 while (LoopNest) { 777 unsigned Level = LoopNest->getLoopDepth(); 778 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 779 Loops.set(Level); 780 LoopNest = LoopNest->getParentLoop(); 781 } 782 } 783 784 void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { 785 const SCEV *Src = Pair->Src; 786 const SCEV *Dst = Pair->Dst; 787 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 788 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 789 if (SrcTy == nullptr || DstTy == nullptr) { 790 assert(SrcTy == DstTy && "This function only unify integer types and " 791 "expect Src and Dst share the same type " 792 "otherwise."); 793 return; 794 } 795 if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { 796 // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). 797 Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); 798 } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { 799 // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). 800 Pair->Src = SE->getSignExtendExpr(Src, DstTy); 801 } 802 } 803 804 // removeMatchingExtensions - Examines a subscript pair. 805 // If the source and destination are identically sign (or zero) 806 // extended, it strips off the extension in an effect to simplify 807 // the actual analysis. 808 void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { 809 const SCEV *Src = Pair->Src; 810 const SCEV *Dst = Pair->Dst; 811 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 812 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 813 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 814 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 815 const SCEV *SrcCastOp = SrcCast->getOperand(); 816 const SCEV *DstCastOp = DstCast->getOperand(); 817 if (SrcCastOp->getType() == DstCastOp->getType()) { 818 Pair->Src = SrcCastOp; 819 Pair->Dst = DstCastOp; 820 } 821 } 822 } 823 824 825 // Examine the scev and return true iff it's linear. 826 // Collect any loops mentioned in the set of "Loops". 827 bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src, 828 const Loop *LoopNest, 829 SmallBitVector &Loops) { 830 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 831 if (!AddRec) 832 return isLoopInvariant(Src, LoopNest); 833 const SCEV *Start = AddRec->getStart(); 834 const SCEV *Step = AddRec->getStepRecurrence(*SE); 835 if (!isLoopInvariant(Step, LoopNest)) 836 return false; 837 Loops.set(mapSrcLoop(AddRec->getLoop())); 838 return checkSrcSubscript(Start, LoopNest, Loops); 839 } 840 841 842 843 // Examine the scev and return true iff it's linear. 844 // Collect any loops mentioned in the set of "Loops". 845 bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst, 846 const Loop *LoopNest, 847 SmallBitVector &Loops) { 848 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 849 if (!AddRec) 850 return isLoopInvariant(Dst, LoopNest); 851 const SCEV *Start = AddRec->getStart(); 852 const SCEV *Step = AddRec->getStepRecurrence(*SE); 853 if (!isLoopInvariant(Step, LoopNest)) 854 return false; 855 Loops.set(mapDstLoop(AddRec->getLoop())); 856 return checkDstSubscript(Start, LoopNest, Loops); 857 } 858 859 860 // Examines the subscript pair (the Src and Dst SCEVs) 861 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 862 // Collects the associated loops in a set. 863 DependenceAnalysis::Subscript::ClassificationKind 864 DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 865 const SCEV *Dst, const Loop *DstLoopNest, 866 SmallBitVector &Loops) { 867 SmallBitVector SrcLoops(MaxLevels + 1); 868 SmallBitVector DstLoops(MaxLevels + 1); 869 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 870 return Subscript::NonLinear; 871 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 872 return Subscript::NonLinear; 873 Loops = SrcLoops; 874 Loops |= DstLoops; 875 unsigned N = Loops.count(); 876 if (N == 0) 877 return Subscript::ZIV; 878 if (N == 1) 879 return Subscript::SIV; 880 if (N == 2 && (SrcLoops.count() == 0 || 881 DstLoops.count() == 0 || 882 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 883 return Subscript::RDIV; 884 return Subscript::MIV; 885 } 886 887 888 // A wrapper around SCEV::isKnownPredicate. 889 // Looks for cases where we're interested in comparing for equality. 890 // If both X and Y have been identically sign or zero extended, 891 // it strips off the (confusing) extensions before invoking 892 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 893 // will be similarly updated. 894 // 895 // If SCEV::isKnownPredicate can't prove the predicate, 896 // we try simple subtraction, which seems to help in some cases 897 // involving symbolics. 898 bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred, 899 const SCEV *X, 900 const SCEV *Y) const { 901 if (Pred == CmpInst::ICMP_EQ || 902 Pred == CmpInst::ICMP_NE) { 903 if ((isa<SCEVSignExtendExpr>(X) && 904 isa<SCEVSignExtendExpr>(Y)) || 905 (isa<SCEVZeroExtendExpr>(X) && 906 isa<SCEVZeroExtendExpr>(Y))) { 907 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 908 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 909 const SCEV *Xop = CX->getOperand(); 910 const SCEV *Yop = CY->getOperand(); 911 if (Xop->getType() == Yop->getType()) { 912 X = Xop; 913 Y = Yop; 914 } 915 } 916 } 917 if (SE->isKnownPredicate(Pred, X, Y)) 918 return true; 919 // If SE->isKnownPredicate can't prove the condition, 920 // we try the brute-force approach of subtracting 921 // and testing the difference. 922 // By testing with SE->isKnownPredicate first, we avoid 923 // the possibility of overflow when the arguments are constants. 924 const SCEV *Delta = SE->getMinusSCEV(X, Y); 925 switch (Pred) { 926 case CmpInst::ICMP_EQ: 927 return Delta->isZero(); 928 case CmpInst::ICMP_NE: 929 return SE->isKnownNonZero(Delta); 930 case CmpInst::ICMP_SGE: 931 return SE->isKnownNonNegative(Delta); 932 case CmpInst::ICMP_SLE: 933 return SE->isKnownNonPositive(Delta); 934 case CmpInst::ICMP_SGT: 935 return SE->isKnownPositive(Delta); 936 case CmpInst::ICMP_SLT: 937 return SE->isKnownNegative(Delta); 938 default: 939 llvm_unreachable("unexpected predicate in isKnownPredicate"); 940 } 941 } 942 943 944 // All subscripts are all the same type. 945 // Loop bound may be smaller (e.g., a char). 946 // Should zero extend loop bound, since it's always >= 0. 947 // This routine collects upper bound and extends if needed. 948 // Return null if no bound available. 949 const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, 950 Type *T) const { 951 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 952 const SCEV *UB = SE->getBackedgeTakenCount(L); 953 return SE->getNoopOrZeroExtend(UB, T); 954 } 955 return nullptr; 956 } 957 958 959 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 960 // If the cast fails, returns NULL. 961 const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, 962 Type *T 963 ) const { 964 if (const SCEV *UB = collectUpperBound(L, T)) 965 return dyn_cast<SCEVConstant>(UB); 966 return nullptr; 967 } 968 969 970 // testZIV - 971 // When we have a pair of subscripts of the form [c1] and [c2], 972 // where c1 and c2 are both loop invariant, we attack it using 973 // the ZIV test. Basically, we test by comparing the two values, 974 // but there are actually three possible results: 975 // 1) the values are equal, so there's a dependence 976 // 2) the values are different, so there's no dependence 977 // 3) the values might be equal, so we have to assume a dependence. 978 // 979 // Return true if dependence disproved. 980 bool DependenceAnalysis::testZIV(const SCEV *Src, 981 const SCEV *Dst, 982 FullDependence &Result) const { 983 DEBUG(dbgs() << " src = " << *Src << "\n"); 984 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 985 ++ZIVapplications; 986 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 987 DEBUG(dbgs() << " provably dependent\n"); 988 return false; // provably dependent 989 } 990 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 991 DEBUG(dbgs() << " provably independent\n"); 992 ++ZIVindependence; 993 return true; // provably independent 994 } 995 DEBUG(dbgs() << " possibly dependent\n"); 996 Result.Consistent = false; 997 return false; // possibly dependent 998 } 999 1000 1001 // strongSIVtest - 1002 // From the paper, Practical Dependence Testing, Section 4.2.1 1003 // 1004 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1005 // where i is an induction variable, c1 and c2 are loop invariant, 1006 // and a is a constant, we can solve it exactly using the Strong SIV test. 1007 // 1008 // Can prove independence. Failing that, can compute distance (and direction). 1009 // In the presence of symbolic terms, we can sometimes make progress. 1010 // 1011 // If there's a dependence, 1012 // 1013 // c1 + a*i = c2 + a*i' 1014 // 1015 // The dependence distance is 1016 // 1017 // d = i' - i = (c1 - c2)/a 1018 // 1019 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1020 // loop's upper bound. If a dependence exists, the dependence direction is 1021 // defined as 1022 // 1023 // { < if d > 0 1024 // direction = { = if d = 0 1025 // { > if d < 0 1026 // 1027 // Return true if dependence disproved. 1028 bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff, 1029 const SCEV *SrcConst, 1030 const SCEV *DstConst, 1031 const Loop *CurLoop, 1032 unsigned Level, 1033 FullDependence &Result, 1034 Constraint &NewConstraint) const { 1035 DEBUG(dbgs() << "\tStrong SIV test\n"); 1036 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1037 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1038 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1039 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1040 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1041 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1042 ++StrongSIVapplications; 1043 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1044 Level--; 1045 1046 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1047 DEBUG(dbgs() << "\t Delta = " << *Delta); 1048 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1049 1050 // check that |Delta| < iteration count 1051 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1052 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1053 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1054 const SCEV *AbsDelta = 1055 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1056 const SCEV *AbsCoeff = 1057 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1058 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1059 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1060 // Distance greater than trip count - no dependence 1061 ++StrongSIVindependence; 1062 ++StrongSIVsuccesses; 1063 return true; 1064 } 1065 } 1066 1067 // Can we compute distance? 1068 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1069 APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); 1070 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); 1071 APInt Distance = ConstDelta; // these need to be initialized 1072 APInt Remainder = ConstDelta; 1073 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1074 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1075 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1076 // Make sure Coeff divides Delta exactly 1077 if (Remainder != 0) { 1078 // Coeff doesn't divide Distance, no dependence 1079 ++StrongSIVindependence; 1080 ++StrongSIVsuccesses; 1081 return true; 1082 } 1083 Result.DV[Level].Distance = SE->getConstant(Distance); 1084 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1085 if (Distance.sgt(0)) 1086 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1087 else if (Distance.slt(0)) 1088 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1089 else 1090 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1091 ++StrongSIVsuccesses; 1092 } 1093 else if (Delta->isZero()) { 1094 // since 0/X == 0 1095 Result.DV[Level].Distance = Delta; 1096 NewConstraint.setDistance(Delta, CurLoop); 1097 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1098 ++StrongSIVsuccesses; 1099 } 1100 else { 1101 if (Coeff->isOne()) { 1102 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1103 Result.DV[Level].Distance = Delta; // since X/1 == X 1104 NewConstraint.setDistance(Delta, CurLoop); 1105 } 1106 else { 1107 Result.Consistent = false; 1108 NewConstraint.setLine(Coeff, 1109 SE->getNegativeSCEV(Coeff), 1110 SE->getNegativeSCEV(Delta), CurLoop); 1111 } 1112 1113 // maybe we can get a useful direction 1114 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1115 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1116 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1117 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1118 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1119 // The double negatives above are confusing. 1120 // It helps to read !SE->isKnownNonZero(Delta) 1121 // as "Delta might be Zero" 1122 unsigned NewDirection = Dependence::DVEntry::NONE; 1123 if ((DeltaMaybePositive && CoeffMaybePositive) || 1124 (DeltaMaybeNegative && CoeffMaybeNegative)) 1125 NewDirection = Dependence::DVEntry::LT; 1126 if (DeltaMaybeZero) 1127 NewDirection |= Dependence::DVEntry::EQ; 1128 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1129 (DeltaMaybePositive && CoeffMaybeNegative)) 1130 NewDirection |= Dependence::DVEntry::GT; 1131 if (NewDirection < Result.DV[Level].Direction) 1132 ++StrongSIVsuccesses; 1133 Result.DV[Level].Direction &= NewDirection; 1134 } 1135 return false; 1136 } 1137 1138 1139 // weakCrossingSIVtest - 1140 // From the paper, Practical Dependence Testing, Section 4.2.2 1141 // 1142 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1143 // where i is an induction variable, c1 and c2 are loop invariant, 1144 // and a is a constant, we can solve it exactly using the 1145 // Weak-Crossing SIV test. 1146 // 1147 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1148 // the two lines, where i = i', yielding 1149 // 1150 // c1 + a*i = c2 - a*i 1151 // 2a*i = c2 - c1 1152 // i = (c2 - c1)/2a 1153 // 1154 // If i < 0, there is no dependence. 1155 // If i > upperbound, there is no dependence. 1156 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1157 // If i = upperbound, there's a dependence with distance = 0. 1158 // If i is integral, there's a dependence (all directions). 1159 // If the non-integer part = 1/2, there's a dependence (<> directions). 1160 // Otherwise, there's no dependence. 1161 // 1162 // Can prove independence. Failing that, 1163 // can sometimes refine the directions. 1164 // Can determine iteration for splitting. 1165 // 1166 // Return true if dependence disproved. 1167 bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff, 1168 const SCEV *SrcConst, 1169 const SCEV *DstConst, 1170 const Loop *CurLoop, 1171 unsigned Level, 1172 FullDependence &Result, 1173 Constraint &NewConstraint, 1174 const SCEV *&SplitIter) const { 1175 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1176 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1177 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1178 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1179 ++WeakCrossingSIVapplications; 1180 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1181 Level--; 1182 Result.Consistent = false; 1183 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1184 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1185 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1186 if (Delta->isZero()) { 1187 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1188 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1189 ++WeakCrossingSIVsuccesses; 1190 if (!Result.DV[Level].Direction) { 1191 ++WeakCrossingSIVindependence; 1192 return true; 1193 } 1194 Result.DV[Level].Distance = Delta; // = 0 1195 return false; 1196 } 1197 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1198 if (!ConstCoeff) 1199 return false; 1200 1201 Result.DV[Level].Splitable = true; 1202 if (SE->isKnownNegative(ConstCoeff)) { 1203 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1204 assert(ConstCoeff && 1205 "dynamic cast of negative of ConstCoeff should yield constant"); 1206 Delta = SE->getNegativeSCEV(Delta); 1207 } 1208 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1209 1210 // compute SplitIter for use by DependenceAnalysis::getSplitIteration() 1211 SplitIter = 1212 SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), 1213 Delta), 1214 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), 1215 ConstCoeff)); 1216 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1217 1218 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1219 if (!ConstDelta) 1220 return false; 1221 1222 // We're certain that ConstCoeff > 0; therefore, 1223 // if Delta < 0, then no dependence. 1224 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1225 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1226 if (SE->isKnownNegative(Delta)) { 1227 // No dependence, Delta < 0 1228 ++WeakCrossingSIVindependence; 1229 ++WeakCrossingSIVsuccesses; 1230 return true; 1231 } 1232 1233 // We're certain that Delta > 0 and ConstCoeff > 0. 1234 // Check Delta/(2*ConstCoeff) against upper loop bound 1235 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1236 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1237 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1238 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1239 ConstantTwo); 1240 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1241 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1242 // Delta too big, no dependence 1243 ++WeakCrossingSIVindependence; 1244 ++WeakCrossingSIVsuccesses; 1245 return true; 1246 } 1247 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1248 // i = i' = UB 1249 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1250 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1251 ++WeakCrossingSIVsuccesses; 1252 if (!Result.DV[Level].Direction) { 1253 ++WeakCrossingSIVindependence; 1254 return true; 1255 } 1256 Result.DV[Level].Splitable = false; 1257 Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); 1258 return false; 1259 } 1260 } 1261 1262 // check that Coeff divides Delta 1263 APInt APDelta = ConstDelta->getValue()->getValue(); 1264 APInt APCoeff = ConstCoeff->getValue()->getValue(); 1265 APInt Distance = APDelta; // these need to be initialzed 1266 APInt Remainder = APDelta; 1267 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1268 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1269 if (Remainder != 0) { 1270 // Coeff doesn't divide Delta, no dependence 1271 ++WeakCrossingSIVindependence; 1272 ++WeakCrossingSIVsuccesses; 1273 return true; 1274 } 1275 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1276 1277 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1278 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1279 Remainder = Distance.srem(Two); 1280 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1281 if (Remainder != 0) { 1282 // Equal direction isn't possible 1283 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1284 ++WeakCrossingSIVsuccesses; 1285 } 1286 return false; 1287 } 1288 1289 1290 // Kirch's algorithm, from 1291 // 1292 // Optimizing Supercompilers for Supercomputers 1293 // Michael Wolfe 1294 // MIT Press, 1989 1295 // 1296 // Program 2.1, page 29. 1297 // Computes the GCD of AM and BM. 1298 // Also finds a solution to the equation ax - by = gcd(a, b). 1299 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1300 static 1301 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1302 APInt &G, APInt &X, APInt &Y) { 1303 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1304 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1305 APInt G0 = AM.abs(); 1306 APInt G1 = BM.abs(); 1307 APInt Q = G0; // these need to be initialized 1308 APInt R = G0; 1309 APInt::sdivrem(G0, G1, Q, R); 1310 while (R != 0) { 1311 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1312 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1313 G0 = G1; G1 = R; 1314 APInt::sdivrem(G0, G1, Q, R); 1315 } 1316 G = G1; 1317 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1318 X = AM.slt(0) ? -A1 : A1; 1319 Y = BM.slt(0) ? B1 : -B1; 1320 1321 // make sure gcd divides Delta 1322 R = Delta.srem(G); 1323 if (R != 0) 1324 return true; // gcd doesn't divide Delta, no dependence 1325 Q = Delta.sdiv(G); 1326 X *= Q; 1327 Y *= Q; 1328 return false; 1329 } 1330 1331 1332 static 1333 APInt floorOfQuotient(APInt A, APInt B) { 1334 APInt Q = A; // these need to be initialized 1335 APInt R = A; 1336 APInt::sdivrem(A, B, Q, R); 1337 if (R == 0) 1338 return Q; 1339 if ((A.sgt(0) && B.sgt(0)) || 1340 (A.slt(0) && B.slt(0))) 1341 return Q; 1342 else 1343 return Q - 1; 1344 } 1345 1346 1347 static 1348 APInt ceilingOfQuotient(APInt A, APInt B) { 1349 APInt Q = A; // these need to be initialized 1350 APInt R = A; 1351 APInt::sdivrem(A, B, Q, R); 1352 if (R == 0) 1353 return Q; 1354 if ((A.sgt(0) && B.sgt(0)) || 1355 (A.slt(0) && B.slt(0))) 1356 return Q + 1; 1357 else 1358 return Q; 1359 } 1360 1361 1362 static 1363 APInt maxAPInt(APInt A, APInt B) { 1364 return A.sgt(B) ? A : B; 1365 } 1366 1367 1368 static 1369 APInt minAPInt(APInt A, APInt B) { 1370 return A.slt(B) ? A : B; 1371 } 1372 1373 1374 // exactSIVtest - 1375 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1376 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1377 // and a2 are constant, we can solve it exactly using an algorithm developed 1378 // by Banerjee and Wolfe. See Section 2.5.3 in 1379 // 1380 // Optimizing Supercompilers for Supercomputers 1381 // Michael Wolfe 1382 // MIT Press, 1989 1383 // 1384 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1385 // so use them if possible. They're also a bit better with symbolics and, 1386 // in the case of the strong SIV test, can compute Distances. 1387 // 1388 // Return true if dependence disproved. 1389 bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff, 1390 const SCEV *DstCoeff, 1391 const SCEV *SrcConst, 1392 const SCEV *DstConst, 1393 const Loop *CurLoop, 1394 unsigned Level, 1395 FullDependence &Result, 1396 Constraint &NewConstraint) const { 1397 DEBUG(dbgs() << "\tExact SIV test\n"); 1398 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1399 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1400 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1401 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1402 ++ExactSIVapplications; 1403 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1404 Level--; 1405 Result.Consistent = false; 1406 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1407 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1408 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1409 Delta, CurLoop); 1410 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1411 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1412 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1413 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1414 return false; 1415 1416 // find gcd 1417 APInt G, X, Y; 1418 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1419 APInt BM = ConstDstCoeff->getValue()->getValue(); 1420 unsigned Bits = AM.getBitWidth(); 1421 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1422 // gcd doesn't divide Delta, no dependence 1423 ++ExactSIVindependence; 1424 ++ExactSIVsuccesses; 1425 return true; 1426 } 1427 1428 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1429 1430 // since SCEV construction normalizes, LM = 0 1431 APInt UM(Bits, 1, true); 1432 bool UMvalid = false; 1433 // UM is perhaps unavailable, let's check 1434 if (const SCEVConstant *CUB = 1435 collectConstantUpperBound(CurLoop, Delta->getType())) { 1436 UM = CUB->getValue()->getValue(); 1437 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1438 UMvalid = true; 1439 } 1440 1441 APInt TU(APInt::getSignedMaxValue(Bits)); 1442 APInt TL(APInt::getSignedMinValue(Bits)); 1443 1444 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1445 APInt TMUL = BM.sdiv(G); 1446 if (TMUL.sgt(0)) { 1447 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1448 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1449 if (UMvalid) { 1450 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1451 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1452 } 1453 } 1454 else { 1455 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1456 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1457 if (UMvalid) { 1458 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1459 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1460 } 1461 } 1462 1463 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1464 TMUL = AM.sdiv(G); 1465 if (TMUL.sgt(0)) { 1466 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1467 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1468 if (UMvalid) { 1469 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1470 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1471 } 1472 } 1473 else { 1474 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1475 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1476 if (UMvalid) { 1477 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1478 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1479 } 1480 } 1481 if (TL.sgt(TU)) { 1482 ++ExactSIVindependence; 1483 ++ExactSIVsuccesses; 1484 return true; 1485 } 1486 1487 // explore directions 1488 unsigned NewDirection = Dependence::DVEntry::NONE; 1489 1490 // less than 1491 APInt SaveTU(TU); // save these 1492 APInt SaveTL(TL); 1493 DEBUG(dbgs() << "\t exploring LT direction\n"); 1494 TMUL = AM - BM; 1495 if (TMUL.sgt(0)) { 1496 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1497 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1498 } 1499 else { 1500 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1501 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1502 } 1503 if (TL.sle(TU)) { 1504 NewDirection |= Dependence::DVEntry::LT; 1505 ++ExactSIVsuccesses; 1506 } 1507 1508 // equal 1509 TU = SaveTU; // restore 1510 TL = SaveTL; 1511 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1512 if (TMUL.sgt(0)) { 1513 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1514 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1515 } 1516 else { 1517 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1518 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1519 } 1520 TMUL = BM - AM; 1521 if (TMUL.sgt(0)) { 1522 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1523 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1524 } 1525 else { 1526 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1527 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1528 } 1529 if (TL.sle(TU)) { 1530 NewDirection |= Dependence::DVEntry::EQ; 1531 ++ExactSIVsuccesses; 1532 } 1533 1534 // greater than 1535 TU = SaveTU; // restore 1536 TL = SaveTL; 1537 DEBUG(dbgs() << "\t exploring GT direction\n"); 1538 if (TMUL.sgt(0)) { 1539 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1540 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1541 } 1542 else { 1543 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1544 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1545 } 1546 if (TL.sle(TU)) { 1547 NewDirection |= Dependence::DVEntry::GT; 1548 ++ExactSIVsuccesses; 1549 } 1550 1551 // finished 1552 Result.DV[Level].Direction &= NewDirection; 1553 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1554 ++ExactSIVindependence; 1555 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1556 } 1557 1558 1559 1560 // Return true if the divisor evenly divides the dividend. 1561 static 1562 bool isRemainderZero(const SCEVConstant *Dividend, 1563 const SCEVConstant *Divisor) { 1564 APInt ConstDividend = Dividend->getValue()->getValue(); 1565 APInt ConstDivisor = Divisor->getValue()->getValue(); 1566 return ConstDividend.srem(ConstDivisor) == 0; 1567 } 1568 1569 1570 // weakZeroSrcSIVtest - 1571 // From the paper, Practical Dependence Testing, Section 4.2.2 1572 // 1573 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1574 // where i is an induction variable, c1 and c2 are loop invariant, 1575 // and a is a constant, we can solve it exactly using the 1576 // Weak-Zero SIV test. 1577 // 1578 // Given 1579 // 1580 // c1 = c2 + a*i 1581 // 1582 // we get 1583 // 1584 // (c1 - c2)/a = i 1585 // 1586 // If i is not an integer, there's no dependence. 1587 // If i < 0 or > UB, there's no dependence. 1588 // If i = 0, the direction is <= and peeling the 1589 // 1st iteration will break the dependence. 1590 // If i = UB, the direction is >= and peeling the 1591 // last iteration will break the dependence. 1592 // Otherwise, the direction is *. 1593 // 1594 // Can prove independence. Failing that, we can sometimes refine 1595 // the directions. Can sometimes show that first or last 1596 // iteration carries all the dependences (so worth peeling). 1597 // 1598 // (see also weakZeroDstSIVtest) 1599 // 1600 // Return true if dependence disproved. 1601 bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1602 const SCEV *SrcConst, 1603 const SCEV *DstConst, 1604 const Loop *CurLoop, 1605 unsigned Level, 1606 FullDependence &Result, 1607 Constraint &NewConstraint) const { 1608 // For the WeakSIV test, it's possible the loop isn't common to 1609 // the Src and Dst loops. If it isn't, then there's no need to 1610 // record a direction. 1611 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1612 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1613 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1614 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1615 ++WeakZeroSIVapplications; 1616 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1617 Level--; 1618 Result.Consistent = false; 1619 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1620 NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), 1621 DstCoeff, Delta, CurLoop); 1622 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1623 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1624 if (Level < CommonLevels) { 1625 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1626 Result.DV[Level].PeelFirst = true; 1627 ++WeakZeroSIVsuccesses; 1628 } 1629 return false; // dependences caused by first iteration 1630 } 1631 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1632 if (!ConstCoeff) 1633 return false; 1634 const SCEV *AbsCoeff = 1635 SE->isKnownNegative(ConstCoeff) ? 1636 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1637 const SCEV *NewDelta = 1638 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1639 1640 // check that Delta/SrcCoeff < iteration count 1641 // really check NewDelta < count*AbsCoeff 1642 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1643 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1644 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1645 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1646 ++WeakZeroSIVindependence; 1647 ++WeakZeroSIVsuccesses; 1648 return true; 1649 } 1650 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1651 // dependences caused by last iteration 1652 if (Level < CommonLevels) { 1653 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1654 Result.DV[Level].PeelLast = true; 1655 ++WeakZeroSIVsuccesses; 1656 } 1657 return false; 1658 } 1659 } 1660 1661 // check that Delta/SrcCoeff >= 0 1662 // really check that NewDelta >= 0 1663 if (SE->isKnownNegative(NewDelta)) { 1664 // No dependence, newDelta < 0 1665 ++WeakZeroSIVindependence; 1666 ++WeakZeroSIVsuccesses; 1667 return true; 1668 } 1669 1670 // if SrcCoeff doesn't divide Delta, then no dependence 1671 if (isa<SCEVConstant>(Delta) && 1672 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1673 ++WeakZeroSIVindependence; 1674 ++WeakZeroSIVsuccesses; 1675 return true; 1676 } 1677 return false; 1678 } 1679 1680 1681 // weakZeroDstSIVtest - 1682 // From the paper, Practical Dependence Testing, Section 4.2.2 1683 // 1684 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1685 // where i is an induction variable, c1 and c2 are loop invariant, 1686 // and a is a constant, we can solve it exactly using the 1687 // Weak-Zero SIV test. 1688 // 1689 // Given 1690 // 1691 // c1 + a*i = c2 1692 // 1693 // we get 1694 // 1695 // i = (c2 - c1)/a 1696 // 1697 // If i is not an integer, there's no dependence. 1698 // If i < 0 or > UB, there's no dependence. 1699 // If i = 0, the direction is <= and peeling the 1700 // 1st iteration will break the dependence. 1701 // If i = UB, the direction is >= and peeling the 1702 // last iteration will break the dependence. 1703 // Otherwise, the direction is *. 1704 // 1705 // Can prove independence. Failing that, we can sometimes refine 1706 // the directions. Can sometimes show that first or last 1707 // iteration carries all the dependences (so worth peeling). 1708 // 1709 // (see also weakZeroSrcSIVtest) 1710 // 1711 // Return true if dependence disproved. 1712 bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1713 const SCEV *SrcConst, 1714 const SCEV *DstConst, 1715 const Loop *CurLoop, 1716 unsigned Level, 1717 FullDependence &Result, 1718 Constraint &NewConstraint) const { 1719 // For the WeakSIV test, it's possible the loop isn't common to the 1720 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1721 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1722 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1723 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1724 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1725 ++WeakZeroSIVapplications; 1726 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1727 Level--; 1728 Result.Consistent = false; 1729 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1730 NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), 1731 Delta, CurLoop); 1732 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1733 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1734 if (Level < CommonLevels) { 1735 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1736 Result.DV[Level].PeelFirst = true; 1737 ++WeakZeroSIVsuccesses; 1738 } 1739 return false; // dependences caused by first iteration 1740 } 1741 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1742 if (!ConstCoeff) 1743 return false; 1744 const SCEV *AbsCoeff = 1745 SE->isKnownNegative(ConstCoeff) ? 1746 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1747 const SCEV *NewDelta = 1748 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1749 1750 // check that Delta/SrcCoeff < iteration count 1751 // really check NewDelta < count*AbsCoeff 1752 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1753 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1754 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1755 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1756 ++WeakZeroSIVindependence; 1757 ++WeakZeroSIVsuccesses; 1758 return true; 1759 } 1760 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1761 // dependences caused by last iteration 1762 if (Level < CommonLevels) { 1763 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1764 Result.DV[Level].PeelLast = true; 1765 ++WeakZeroSIVsuccesses; 1766 } 1767 return false; 1768 } 1769 } 1770 1771 // check that Delta/SrcCoeff >= 0 1772 // really check that NewDelta >= 0 1773 if (SE->isKnownNegative(NewDelta)) { 1774 // No dependence, newDelta < 0 1775 ++WeakZeroSIVindependence; 1776 ++WeakZeroSIVsuccesses; 1777 return true; 1778 } 1779 1780 // if SrcCoeff doesn't divide Delta, then no dependence 1781 if (isa<SCEVConstant>(Delta) && 1782 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1783 ++WeakZeroSIVindependence; 1784 ++WeakZeroSIVsuccesses; 1785 return true; 1786 } 1787 return false; 1788 } 1789 1790 1791 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1792 // Things of the form [c1 + a*i] and [c2 + b*j], 1793 // where i and j are induction variable, c1 and c2 are loop invariant, 1794 // and a and b are constants. 1795 // Returns true if any possible dependence is disproved. 1796 // Marks the result as inconsistent. 1797 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1798 bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff, 1799 const SCEV *DstCoeff, 1800 const SCEV *SrcConst, 1801 const SCEV *DstConst, 1802 const Loop *SrcLoop, 1803 const Loop *DstLoop, 1804 FullDependence &Result) const { 1805 DEBUG(dbgs() << "\tExact RDIV test\n"); 1806 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1807 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1808 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1809 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1810 ++ExactRDIVapplications; 1811 Result.Consistent = false; 1812 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1813 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1814 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1815 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1816 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1817 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1818 return false; 1819 1820 // find gcd 1821 APInt G, X, Y; 1822 APInt AM = ConstSrcCoeff->getValue()->getValue(); 1823 APInt BM = ConstDstCoeff->getValue()->getValue(); 1824 unsigned Bits = AM.getBitWidth(); 1825 if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { 1826 // gcd doesn't divide Delta, no dependence 1827 ++ExactRDIVindependence; 1828 return true; 1829 } 1830 1831 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1832 1833 // since SCEV construction seems to normalize, LM = 0 1834 APInt SrcUM(Bits, 1, true); 1835 bool SrcUMvalid = false; 1836 // SrcUM is perhaps unavailable, let's check 1837 if (const SCEVConstant *UpperBound = 1838 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1839 SrcUM = UpperBound->getValue()->getValue(); 1840 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1841 SrcUMvalid = true; 1842 } 1843 1844 APInt DstUM(Bits, 1, true); 1845 bool DstUMvalid = false; 1846 // UM is perhaps unavailable, let's check 1847 if (const SCEVConstant *UpperBound = 1848 collectConstantUpperBound(DstLoop, Delta->getType())) { 1849 DstUM = UpperBound->getValue()->getValue(); 1850 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1851 DstUMvalid = true; 1852 } 1853 1854 APInt TU(APInt::getSignedMaxValue(Bits)); 1855 APInt TL(APInt::getSignedMinValue(Bits)); 1856 1857 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1858 APInt TMUL = BM.sdiv(G); 1859 if (TMUL.sgt(0)) { 1860 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1861 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1862 if (SrcUMvalid) { 1863 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1864 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1865 } 1866 } 1867 else { 1868 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1869 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1870 if (SrcUMvalid) { 1871 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1872 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1873 } 1874 } 1875 1876 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1877 TMUL = AM.sdiv(G); 1878 if (TMUL.sgt(0)) { 1879 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1880 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1881 if (DstUMvalid) { 1882 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1883 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1884 } 1885 } 1886 else { 1887 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1888 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1889 if (DstUMvalid) { 1890 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1891 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1892 } 1893 } 1894 if (TL.sgt(TU)) 1895 ++ExactRDIVindependence; 1896 return TL.sgt(TU); 1897 } 1898 1899 1900 // symbolicRDIVtest - 1901 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1902 // introduce a special case of Banerjee's Inequalities (also called the 1903 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1904 // particularly cases with symbolics. Since it's only able to disprove 1905 // dependence (not compute distances or directions), we'll use it as a 1906 // fall back for the other tests. 1907 // 1908 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1909 // where i and j are induction variables and c1 and c2 are loop invariants, 1910 // we can use the symbolic tests to disprove some dependences, serving as a 1911 // backup for the RDIV test. Note that i and j can be the same variable, 1912 // letting this test serve as a backup for the various SIV tests. 1913 // 1914 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1915 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1916 // loop bounds for the i and j loops, respectively. So, ... 1917 // 1918 // c1 + a1*i = c2 + a2*j 1919 // a1*i - a2*j = c2 - c1 1920 // 1921 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1922 // range of the maximum and minimum possible values of a1*i - a2*j. 1923 // Considering the signs of a1 and a2, we have 4 possible cases: 1924 // 1925 // 1) If a1 >= 0 and a2 >= 0, then 1926 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1927 // -a2*N2 <= c2 - c1 <= a1*N1 1928 // 1929 // 2) If a1 >= 0 and a2 <= 0, then 1930 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1931 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1932 // 1933 // 3) If a1 <= 0 and a2 >= 0, then 1934 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1935 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1936 // 1937 // 4) If a1 <= 0 and a2 <= 0, then 1938 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1939 // a1*N1 <= c2 - c1 <= -a2*N2 1940 // 1941 // return true if dependence disproved 1942 bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1, 1943 const SCEV *A2, 1944 const SCEV *C1, 1945 const SCEV *C2, 1946 const Loop *Loop1, 1947 const Loop *Loop2) const { 1948 ++SymbolicRDIVapplications; 1949 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1950 DEBUG(dbgs() << "\t A1 = " << *A1); 1951 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1952 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1953 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1954 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1955 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1956 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1957 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1958 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1959 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1960 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1961 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1962 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1963 if (SE->isKnownNonNegative(A1)) { 1964 if (SE->isKnownNonNegative(A2)) { 1965 // A1 >= 0 && A2 >= 0 1966 if (N1) { 1967 // make sure that c2 - c1 <= a1*N1 1968 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1969 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1970 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1971 ++SymbolicRDIVindependence; 1972 return true; 1973 } 1974 } 1975 if (N2) { 1976 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1977 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1978 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1979 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1980 ++SymbolicRDIVindependence; 1981 return true; 1982 } 1983 } 1984 } 1985 else if (SE->isKnownNonPositive(A2)) { 1986 // a1 >= 0 && a2 <= 0 1987 if (N1 && N2) { 1988 // make sure that c2 - c1 <= a1*N1 - a2*N2 1989 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1990 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1991 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1992 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1993 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1994 ++SymbolicRDIVindependence; 1995 return true; 1996 } 1997 } 1998 // make sure that 0 <= c2 - c1 1999 if (SE->isKnownNegative(C2_C1)) { 2000 ++SymbolicRDIVindependence; 2001 return true; 2002 } 2003 } 2004 } 2005 else if (SE->isKnownNonPositive(A1)) { 2006 if (SE->isKnownNonNegative(A2)) { 2007 // a1 <= 0 && a2 >= 0 2008 if (N1 && N2) { 2009 // make sure that a1*N1 - a2*N2 <= c2 - c1 2010 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2011 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2012 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2013 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2014 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2015 ++SymbolicRDIVindependence; 2016 return true; 2017 } 2018 } 2019 // make sure that c2 - c1 <= 0 2020 if (SE->isKnownPositive(C2_C1)) { 2021 ++SymbolicRDIVindependence; 2022 return true; 2023 } 2024 } 2025 else if (SE->isKnownNonPositive(A2)) { 2026 // a1 <= 0 && a2 <= 0 2027 if (N1) { 2028 // make sure that a1*N1 <= c2 - c1 2029 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2030 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2031 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2032 ++SymbolicRDIVindependence; 2033 return true; 2034 } 2035 } 2036 if (N2) { 2037 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2038 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2039 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2040 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2041 ++SymbolicRDIVindependence; 2042 return true; 2043 } 2044 } 2045 } 2046 } 2047 return false; 2048 } 2049 2050 2051 // testSIV - 2052 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2053 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2054 // a2 are constant, we attack it with an SIV test. While they can all be 2055 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2056 // they apply; they're cheaper and sometimes more precise. 2057 // 2058 // Return true if dependence disproved. 2059 bool DependenceAnalysis::testSIV(const SCEV *Src, 2060 const SCEV *Dst, 2061 unsigned &Level, 2062 FullDependence &Result, 2063 Constraint &NewConstraint, 2064 const SCEV *&SplitIter) const { 2065 DEBUG(dbgs() << " src = " << *Src << "\n"); 2066 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2067 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2068 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2069 if (SrcAddRec && DstAddRec) { 2070 const SCEV *SrcConst = SrcAddRec->getStart(); 2071 const SCEV *DstConst = DstAddRec->getStart(); 2072 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2073 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2074 const Loop *CurLoop = SrcAddRec->getLoop(); 2075 assert(CurLoop == DstAddRec->getLoop() && 2076 "both loops in SIV should be same"); 2077 Level = mapSrcLoop(CurLoop); 2078 bool disproven; 2079 if (SrcCoeff == DstCoeff) 2080 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2081 Level, Result, NewConstraint); 2082 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2083 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2084 Level, Result, NewConstraint, SplitIter); 2085 else 2086 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2087 Level, Result, NewConstraint); 2088 return disproven || 2089 gcdMIVtest(Src, Dst, Result) || 2090 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2091 } 2092 if (SrcAddRec) { 2093 const SCEV *SrcConst = SrcAddRec->getStart(); 2094 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2095 const SCEV *DstConst = Dst; 2096 const Loop *CurLoop = SrcAddRec->getLoop(); 2097 Level = mapSrcLoop(CurLoop); 2098 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2099 Level, Result, NewConstraint) || 2100 gcdMIVtest(Src, Dst, Result); 2101 } 2102 if (DstAddRec) { 2103 const SCEV *DstConst = DstAddRec->getStart(); 2104 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2105 const SCEV *SrcConst = Src; 2106 const Loop *CurLoop = DstAddRec->getLoop(); 2107 Level = mapDstLoop(CurLoop); 2108 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2109 CurLoop, Level, Result, NewConstraint) || 2110 gcdMIVtest(Src, Dst, Result); 2111 } 2112 llvm_unreachable("SIV test expected at least one AddRec"); 2113 return false; 2114 } 2115 2116 2117 // testRDIV - 2118 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2119 // where i and j are induction variables, c1 and c2 are loop invariant, 2120 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2121 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2122 // It doesn't make sense to talk about distance or direction in this case, 2123 // so there's no point in making special versions of the Strong SIV test or 2124 // the Weak-crossing SIV test. 2125 // 2126 // With minor algebra, this test can also be used for things like 2127 // [c1 + a1*i + a2*j][c2]. 2128 // 2129 // Return true if dependence disproved. 2130 bool DependenceAnalysis::testRDIV(const SCEV *Src, 2131 const SCEV *Dst, 2132 FullDependence &Result) const { 2133 // we have 3 possible situations here: 2134 // 1) [a*i + b] and [c*j + d] 2135 // 2) [a*i + c*j + b] and [d] 2136 // 3) [b] and [a*i + c*j + d] 2137 // We need to find what we've got and get organized 2138 2139 const SCEV *SrcConst, *DstConst; 2140 const SCEV *SrcCoeff, *DstCoeff; 2141 const Loop *SrcLoop, *DstLoop; 2142 2143 DEBUG(dbgs() << " src = " << *Src << "\n"); 2144 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2145 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2146 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2147 if (SrcAddRec && DstAddRec) { 2148 SrcConst = SrcAddRec->getStart(); 2149 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2150 SrcLoop = SrcAddRec->getLoop(); 2151 DstConst = DstAddRec->getStart(); 2152 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2153 DstLoop = DstAddRec->getLoop(); 2154 } 2155 else if (SrcAddRec) { 2156 if (const SCEVAddRecExpr *tmpAddRec = 2157 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2158 SrcConst = tmpAddRec->getStart(); 2159 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2160 SrcLoop = tmpAddRec->getLoop(); 2161 DstConst = Dst; 2162 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2163 DstLoop = SrcAddRec->getLoop(); 2164 } 2165 else 2166 llvm_unreachable("RDIV reached by surprising SCEVs"); 2167 } 2168 else if (DstAddRec) { 2169 if (const SCEVAddRecExpr *tmpAddRec = 2170 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2171 DstConst = tmpAddRec->getStart(); 2172 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2173 DstLoop = tmpAddRec->getLoop(); 2174 SrcConst = Src; 2175 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2176 SrcLoop = DstAddRec->getLoop(); 2177 } 2178 else 2179 llvm_unreachable("RDIV reached by surprising SCEVs"); 2180 } 2181 else 2182 llvm_unreachable("RDIV expected at least one AddRec"); 2183 return exactRDIVtest(SrcCoeff, DstCoeff, 2184 SrcConst, DstConst, 2185 SrcLoop, DstLoop, 2186 Result) || 2187 gcdMIVtest(Src, Dst, Result) || 2188 symbolicRDIVtest(SrcCoeff, DstCoeff, 2189 SrcConst, DstConst, 2190 SrcLoop, DstLoop); 2191 } 2192 2193 2194 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2195 // Return true if dependence disproved. 2196 // Can sometimes refine direction vectors. 2197 bool DependenceAnalysis::testMIV(const SCEV *Src, 2198 const SCEV *Dst, 2199 const SmallBitVector &Loops, 2200 FullDependence &Result) const { 2201 DEBUG(dbgs() << " src = " << *Src << "\n"); 2202 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2203 Result.Consistent = false; 2204 return gcdMIVtest(Src, Dst, Result) || 2205 banerjeeMIVtest(Src, Dst, Loops, Result); 2206 } 2207 2208 2209 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2210 // in this case 10. If there is no constant part, returns NULL. 2211 static 2212 const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { 2213 for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) { 2214 if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op))) 2215 return Constant; 2216 } 2217 return nullptr; 2218 } 2219 2220 2221 //===----------------------------------------------------------------------===// 2222 // gcdMIVtest - 2223 // Tests an MIV subscript pair for dependence. 2224 // Returns true if any possible dependence is disproved. 2225 // Marks the result as inconsistent. 2226 // Can sometimes disprove the equal direction for 1 or more loops, 2227 // as discussed in Michael Wolfe's book, 2228 // High Performance Compilers for Parallel Computing, page 235. 2229 // 2230 // We spend some effort (code!) to handle cases like 2231 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2232 // but M and N are just loop-invariant variables. 2233 // This should help us handle linearized subscripts; 2234 // also makes this test a useful backup to the various SIV tests. 2235 // 2236 // It occurs to me that the presence of loop-invariant variables 2237 // changes the nature of the test from "greatest common divisor" 2238 // to "a common divisor". 2239 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, 2240 const SCEV *Dst, 2241 FullDependence &Result) const { 2242 DEBUG(dbgs() << "starting gcd\n"); 2243 ++GCDapplications; 2244 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2245 APInt RunningGCD = APInt::getNullValue(BitWidth); 2246 2247 // Examine Src coefficients. 2248 // Compute running GCD and record source constant. 2249 // Because we're looking for the constant at the end of the chain, 2250 // we can't quit the loop just because the GCD == 1. 2251 const SCEV *Coefficients = Src; 2252 while (const SCEVAddRecExpr *AddRec = 2253 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2254 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2255 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2256 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2257 // If the coefficient is the product of a constant and other stuff, 2258 // we can use the constant in the GCD computation. 2259 Constant = getConstantPart(Product); 2260 if (!Constant) 2261 return false; 2262 APInt ConstCoeff = Constant->getValue()->getValue(); 2263 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2264 Coefficients = AddRec->getStart(); 2265 } 2266 const SCEV *SrcConst = Coefficients; 2267 2268 // Examine Dst coefficients. 2269 // Compute running GCD and record destination constant. 2270 // Because we're looking for the constant at the end of the chain, 2271 // we can't quit the loop just because the GCD == 1. 2272 Coefficients = Dst; 2273 while (const SCEVAddRecExpr *AddRec = 2274 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2275 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2276 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff); 2277 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2278 // If the coefficient is the product of a constant and other stuff, 2279 // we can use the constant in the GCD computation. 2280 Constant = getConstantPart(Product); 2281 if (!Constant) 2282 return false; 2283 APInt ConstCoeff = Constant->getValue()->getValue(); 2284 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2285 Coefficients = AddRec->getStart(); 2286 } 2287 const SCEV *DstConst = Coefficients; 2288 2289 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2290 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2291 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2292 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2293 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2294 // If Delta is a sum of products, we may be able to make further progress. 2295 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2296 const SCEV *Operand = Sum->getOperand(Op); 2297 if (isa<SCEVConstant>(Operand)) { 2298 assert(!Constant && "Surprised to find multiple constants"); 2299 Constant = cast<SCEVConstant>(Operand); 2300 } 2301 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2302 // Search for constant operand to participate in GCD; 2303 // If none found; return false. 2304 const SCEVConstant *ConstOp = getConstantPart(Product); 2305 if (!ConstOp) 2306 return false; 2307 APInt ConstOpValue = ConstOp->getValue()->getValue(); 2308 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2309 ConstOpValue.abs()); 2310 } 2311 else 2312 return false; 2313 } 2314 } 2315 if (!Constant) 2316 return false; 2317 APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); 2318 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2319 if (ConstDelta == 0) 2320 return false; 2321 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2322 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2323 APInt Remainder = ConstDelta.srem(RunningGCD); 2324 if (Remainder != 0) { 2325 ++GCDindependence; 2326 return true; 2327 } 2328 2329 // Try to disprove equal directions. 2330 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2331 // the code above can't disprove the dependence because the GCD = 1. 2332 // So we consider what happen if i = i' and what happens if j = j'. 2333 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2334 // which is infeasible, so we can disallow the = direction for the i level. 2335 // Setting j = j' doesn't help matters, so we end up with a direction vector 2336 // of [<>, *] 2337 // 2338 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2339 // we need to remember that the constant part is 5 and the RunningGCD should 2340 // be initialized to ExtraGCD = 30. 2341 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2342 2343 bool Improved = false; 2344 Coefficients = Src; 2345 while (const SCEVAddRecExpr *AddRec = 2346 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2347 Coefficients = AddRec->getStart(); 2348 const Loop *CurLoop = AddRec->getLoop(); 2349 RunningGCD = ExtraGCD; 2350 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2351 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2352 const SCEV *Inner = Src; 2353 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2354 AddRec = cast<SCEVAddRecExpr>(Inner); 2355 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2356 if (CurLoop == AddRec->getLoop()) 2357 ; // SrcCoeff == Coeff 2358 else { 2359 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2360 // If the coefficient is the product of a constant and other stuff, 2361 // we can use the constant in the GCD computation. 2362 Constant = getConstantPart(Product); 2363 else 2364 Constant = cast<SCEVConstant>(Coeff); 2365 APInt ConstCoeff = Constant->getValue()->getValue(); 2366 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2367 } 2368 Inner = AddRec->getStart(); 2369 } 2370 Inner = Dst; 2371 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2372 AddRec = cast<SCEVAddRecExpr>(Inner); 2373 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2374 if (CurLoop == AddRec->getLoop()) 2375 DstCoeff = Coeff; 2376 else { 2377 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff)) 2378 // If the coefficient is the product of a constant and other stuff, 2379 // we can use the constant in the GCD computation. 2380 Constant = getConstantPart(Product); 2381 else 2382 Constant = cast<SCEVConstant>(Coeff); 2383 APInt ConstCoeff = Constant->getValue()->getValue(); 2384 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2385 } 2386 Inner = AddRec->getStart(); 2387 } 2388 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2389 if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta)) 2390 // If the coefficient is the product of a constant and other stuff, 2391 // we can use the constant in the GCD computation. 2392 Constant = getConstantPart(Product); 2393 else if (isa<SCEVConstant>(Delta)) 2394 Constant = cast<SCEVConstant>(Delta); 2395 else { 2396 // The difference of the two coefficients might not be a product 2397 // or constant, in which case we give up on this direction. 2398 continue; 2399 } 2400 APInt ConstCoeff = Constant->getValue()->getValue(); 2401 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2402 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2403 if (RunningGCD != 0) { 2404 Remainder = ConstDelta.srem(RunningGCD); 2405 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2406 if (Remainder != 0) { 2407 unsigned Level = mapSrcLoop(CurLoop); 2408 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2409 Improved = true; 2410 } 2411 } 2412 } 2413 if (Improved) 2414 ++GCDsuccesses; 2415 DEBUG(dbgs() << "all done\n"); 2416 return false; 2417 } 2418 2419 2420 //===----------------------------------------------------------------------===// 2421 // banerjeeMIVtest - 2422 // Use Banerjee's Inequalities to test an MIV subscript pair. 2423 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2424 // Generally follows the discussion in Section 2.5.2 of 2425 // 2426 // Optimizing Supercompilers for Supercomputers 2427 // Michael Wolfe 2428 // 2429 // The inequalities given on page 25 are simplified in that loops are 2430 // normalized so that the lower bound is always 0 and the stride is always 1. 2431 // For example, Wolfe gives 2432 // 2433 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2434 // 2435 // where A_k is the coefficient of the kth index in the source subscript, 2436 // B_k is the coefficient of the kth index in the destination subscript, 2437 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2438 // index, and N_k is the stride of the kth index. Since all loops are normalized 2439 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2440 // equation to 2441 // 2442 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2443 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2444 // 2445 // Similar simplifications are possible for the other equations. 2446 // 2447 // When we can't determine the number of iterations for a loop, 2448 // we use NULL as an indicator for the worst case, infinity. 2449 // When computing the upper bound, NULL denotes +inf; 2450 // for the lower bound, NULL denotes -inf. 2451 // 2452 // Return true if dependence disproved. 2453 bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src, 2454 const SCEV *Dst, 2455 const SmallBitVector &Loops, 2456 FullDependence &Result) const { 2457 DEBUG(dbgs() << "starting Banerjee\n"); 2458 ++BanerjeeApplications; 2459 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2460 const SCEV *A0; 2461 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2462 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2463 const SCEV *B0; 2464 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2465 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2466 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2467 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2468 2469 // Compute bounds for all the * directions. 2470 DEBUG(dbgs() << "\tBounds[*]\n"); 2471 for (unsigned K = 1; K <= MaxLevels; ++K) { 2472 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2473 Bound[K].Direction = Dependence::DVEntry::ALL; 2474 Bound[K].DirSet = Dependence::DVEntry::NONE; 2475 findBoundsALL(A, B, Bound, K); 2476 #ifndef NDEBUG 2477 DEBUG(dbgs() << "\t " << K << '\t'); 2478 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2479 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2480 else 2481 DEBUG(dbgs() << "-inf\t"); 2482 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2483 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2484 else 2485 DEBUG(dbgs() << "+inf\n"); 2486 #endif 2487 } 2488 2489 // Test the *, *, *, ... case. 2490 bool Disproved = false; 2491 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2492 // Explore the direction vector hierarchy. 2493 unsigned DepthExpanded = 0; 2494 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2495 Loops, DepthExpanded, Delta); 2496 if (NewDeps > 0) { 2497 bool Improved = false; 2498 for (unsigned K = 1; K <= CommonLevels; ++K) { 2499 if (Loops[K]) { 2500 unsigned Old = Result.DV[K - 1].Direction; 2501 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2502 Improved |= Old != Result.DV[K - 1].Direction; 2503 if (!Result.DV[K - 1].Direction) { 2504 Improved = false; 2505 Disproved = true; 2506 break; 2507 } 2508 } 2509 } 2510 if (Improved) 2511 ++BanerjeeSuccesses; 2512 } 2513 else { 2514 ++BanerjeeIndependence; 2515 Disproved = true; 2516 } 2517 } 2518 else { 2519 ++BanerjeeIndependence; 2520 Disproved = true; 2521 } 2522 delete [] Bound; 2523 delete [] A; 2524 delete [] B; 2525 return Disproved; 2526 } 2527 2528 2529 // Hierarchically expands the direction vector 2530 // search space, combining the directions of discovered dependences 2531 // in the DirSet field of Bound. Returns the number of distinct 2532 // dependences discovered. If the dependence is disproved, 2533 // it will return 0. 2534 unsigned DependenceAnalysis::exploreDirections(unsigned Level, 2535 CoefficientInfo *A, 2536 CoefficientInfo *B, 2537 BoundInfo *Bound, 2538 const SmallBitVector &Loops, 2539 unsigned &DepthExpanded, 2540 const SCEV *Delta) const { 2541 if (Level > CommonLevels) { 2542 // record result 2543 DEBUG(dbgs() << "\t["); 2544 for (unsigned K = 1; K <= CommonLevels; ++K) { 2545 if (Loops[K]) { 2546 Bound[K].DirSet |= Bound[K].Direction; 2547 #ifndef NDEBUG 2548 switch (Bound[K].Direction) { 2549 case Dependence::DVEntry::LT: 2550 DEBUG(dbgs() << " <"); 2551 break; 2552 case Dependence::DVEntry::EQ: 2553 DEBUG(dbgs() << " ="); 2554 break; 2555 case Dependence::DVEntry::GT: 2556 DEBUG(dbgs() << " >"); 2557 break; 2558 case Dependence::DVEntry::ALL: 2559 DEBUG(dbgs() << " *"); 2560 break; 2561 default: 2562 llvm_unreachable("unexpected Bound[K].Direction"); 2563 } 2564 #endif 2565 } 2566 } 2567 DEBUG(dbgs() << " ]\n"); 2568 return 1; 2569 } 2570 if (Loops[Level]) { 2571 if (Level > DepthExpanded) { 2572 DepthExpanded = Level; 2573 // compute bounds for <, =, > at current level 2574 findBoundsLT(A, B, Bound, Level); 2575 findBoundsGT(A, B, Bound, Level); 2576 findBoundsEQ(A, B, Bound, Level); 2577 #ifndef NDEBUG 2578 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2579 DEBUG(dbgs() << "\t <\t"); 2580 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2581 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2582 else 2583 DEBUG(dbgs() << "-inf\t"); 2584 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2585 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2586 else 2587 DEBUG(dbgs() << "+inf\n"); 2588 DEBUG(dbgs() << "\t =\t"); 2589 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2590 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2591 else 2592 DEBUG(dbgs() << "-inf\t"); 2593 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2594 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2595 else 2596 DEBUG(dbgs() << "+inf\n"); 2597 DEBUG(dbgs() << "\t >\t"); 2598 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2599 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2600 else 2601 DEBUG(dbgs() << "-inf\t"); 2602 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2603 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2604 else 2605 DEBUG(dbgs() << "+inf\n"); 2606 #endif 2607 } 2608 2609 unsigned NewDeps = 0; 2610 2611 // test bounds for <, *, *, ... 2612 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2613 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2614 Loops, DepthExpanded, Delta); 2615 2616 // Test bounds for =, *, *, ... 2617 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2618 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2619 Loops, DepthExpanded, Delta); 2620 2621 // test bounds for >, *, *, ... 2622 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2623 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2624 Loops, DepthExpanded, Delta); 2625 2626 Bound[Level].Direction = Dependence::DVEntry::ALL; 2627 return NewDeps; 2628 } 2629 else 2630 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2631 } 2632 2633 2634 // Returns true iff the current bounds are plausible. 2635 bool DependenceAnalysis::testBounds(unsigned char DirKind, 2636 unsigned Level, 2637 BoundInfo *Bound, 2638 const SCEV *Delta) const { 2639 Bound[Level].Direction = DirKind; 2640 if (const SCEV *LowerBound = getLowerBound(Bound)) 2641 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2642 return false; 2643 if (const SCEV *UpperBound = getUpperBound(Bound)) 2644 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2645 return false; 2646 return true; 2647 } 2648 2649 2650 // Computes the upper and lower bounds for level K 2651 // using the * direction. Records them in Bound. 2652 // Wolfe gives the equations 2653 // 2654 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2655 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2656 // 2657 // Since we normalize loops, we can simplify these equations to 2658 // 2659 // LB^*_k = (A^-_k - B^+_k)U_k 2660 // UB^*_k = (A^+_k - B^-_k)U_k 2661 // 2662 // We must be careful to handle the case where the upper bound is unknown. 2663 // Note that the lower bound is always <= 0 2664 // and the upper bound is always >= 0. 2665 void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, 2666 CoefficientInfo *B, 2667 BoundInfo *Bound, 2668 unsigned K) const { 2669 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2670 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2671 if (Bound[K].Iterations) { 2672 Bound[K].Lower[Dependence::DVEntry::ALL] = 2673 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2674 Bound[K].Iterations); 2675 Bound[K].Upper[Dependence::DVEntry::ALL] = 2676 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2677 Bound[K].Iterations); 2678 } 2679 else { 2680 // If the difference is 0, we won't need to know the number of iterations. 2681 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2682 Bound[K].Lower[Dependence::DVEntry::ALL] = 2683 SE->getConstant(A[K].Coeff->getType(), 0); 2684 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2685 Bound[K].Upper[Dependence::DVEntry::ALL] = 2686 SE->getConstant(A[K].Coeff->getType(), 0); 2687 } 2688 } 2689 2690 2691 // Computes the upper and lower bounds for level K 2692 // using the = direction. Records them in Bound. 2693 // Wolfe gives the equations 2694 // 2695 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2696 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2697 // 2698 // Since we normalize loops, we can simplify these equations to 2699 // 2700 // LB^=_k = (A_k - B_k)^- U_k 2701 // UB^=_k = (A_k - B_k)^+ U_k 2702 // 2703 // We must be careful to handle the case where the upper bound is unknown. 2704 // Note that the lower bound is always <= 0 2705 // and the upper bound is always >= 0. 2706 void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, 2707 CoefficientInfo *B, 2708 BoundInfo *Bound, 2709 unsigned K) const { 2710 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2711 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2712 if (Bound[K].Iterations) { 2713 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2714 const SCEV *NegativePart = getNegativePart(Delta); 2715 Bound[K].Lower[Dependence::DVEntry::EQ] = 2716 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2717 const SCEV *PositivePart = getPositivePart(Delta); 2718 Bound[K].Upper[Dependence::DVEntry::EQ] = 2719 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2720 } 2721 else { 2722 // If the positive/negative part of the difference is 0, 2723 // we won't need to know the number of iterations. 2724 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2725 const SCEV *NegativePart = getNegativePart(Delta); 2726 if (NegativePart->isZero()) 2727 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2728 const SCEV *PositivePart = getPositivePart(Delta); 2729 if (PositivePart->isZero()) 2730 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2731 } 2732 } 2733 2734 2735 // Computes the upper and lower bounds for level K 2736 // using the < direction. Records them in Bound. 2737 // Wolfe gives the equations 2738 // 2739 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2740 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2741 // 2742 // Since we normalize loops, we can simplify these equations to 2743 // 2744 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2745 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2746 // 2747 // We must be careful to handle the case where the upper bound is unknown. 2748 void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, 2749 CoefficientInfo *B, 2750 BoundInfo *Bound, 2751 unsigned K) const { 2752 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2753 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2754 if (Bound[K].Iterations) { 2755 const SCEV *Iter_1 = 2756 SE->getMinusSCEV(Bound[K].Iterations, 2757 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2758 const SCEV *NegPart = 2759 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2760 Bound[K].Lower[Dependence::DVEntry::LT] = 2761 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2762 const SCEV *PosPart = 2763 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2764 Bound[K].Upper[Dependence::DVEntry::LT] = 2765 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2766 } 2767 else { 2768 // If the positive/negative part of the difference is 0, 2769 // we won't need to know the number of iterations. 2770 const SCEV *NegPart = 2771 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2772 if (NegPart->isZero()) 2773 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2774 const SCEV *PosPart = 2775 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2776 if (PosPart->isZero()) 2777 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2778 } 2779 } 2780 2781 2782 // Computes the upper and lower bounds for level K 2783 // using the > direction. Records them in Bound. 2784 // Wolfe gives the equations 2785 // 2786 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2787 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2788 // 2789 // Since we normalize loops, we can simplify these equations to 2790 // 2791 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2792 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2793 // 2794 // We must be careful to handle the case where the upper bound is unknown. 2795 void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, 2796 CoefficientInfo *B, 2797 BoundInfo *Bound, 2798 unsigned K) const { 2799 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2800 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2801 if (Bound[K].Iterations) { 2802 const SCEV *Iter_1 = 2803 SE->getMinusSCEV(Bound[K].Iterations, 2804 SE->getConstant(Bound[K].Iterations->getType(), 1)); 2805 const SCEV *NegPart = 2806 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2807 Bound[K].Lower[Dependence::DVEntry::GT] = 2808 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2809 const SCEV *PosPart = 2810 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2811 Bound[K].Upper[Dependence::DVEntry::GT] = 2812 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2813 } 2814 else { 2815 // If the positive/negative part of the difference is 0, 2816 // we won't need to know the number of iterations. 2817 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2818 if (NegPart->isZero()) 2819 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2820 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2821 if (PosPart->isZero()) 2822 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2823 } 2824 } 2825 2826 2827 // X^+ = max(X, 0) 2828 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { 2829 return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); 2830 } 2831 2832 2833 // X^- = min(X, 0) 2834 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { 2835 return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); 2836 } 2837 2838 2839 // Walks through the subscript, 2840 // collecting each coefficient, the associated loop bounds, 2841 // and recording its positive and negative parts for later use. 2842 DependenceAnalysis::CoefficientInfo * 2843 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, 2844 bool SrcFlag, 2845 const SCEV *&Constant) const { 2846 const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); 2847 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2848 for (unsigned K = 1; K <= MaxLevels; ++K) { 2849 CI[K].Coeff = Zero; 2850 CI[K].PosPart = Zero; 2851 CI[K].NegPart = Zero; 2852 CI[K].Iterations = nullptr; 2853 } 2854 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2855 const Loop *L = AddRec->getLoop(); 2856 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2857 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2858 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2859 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2860 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2861 Subscript = AddRec->getStart(); 2862 } 2863 Constant = Subscript; 2864 #ifndef NDEBUG 2865 DEBUG(dbgs() << "\tCoefficient Info\n"); 2866 for (unsigned K = 1; K <= MaxLevels; ++K) { 2867 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2868 DEBUG(dbgs() << "\tPos Part = "); 2869 DEBUG(dbgs() << *CI[K].PosPart); 2870 DEBUG(dbgs() << "\tNeg Part = "); 2871 DEBUG(dbgs() << *CI[K].NegPart); 2872 DEBUG(dbgs() << "\tUpper Bound = "); 2873 if (CI[K].Iterations) 2874 DEBUG(dbgs() << *CI[K].Iterations); 2875 else 2876 DEBUG(dbgs() << "+inf"); 2877 DEBUG(dbgs() << '\n'); 2878 } 2879 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2880 #endif 2881 return CI; 2882 } 2883 2884 2885 // Looks through all the bounds info and 2886 // computes the lower bound given the current direction settings 2887 // at each level. If the lower bound for any level is -inf, 2888 // the result is -inf. 2889 const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { 2890 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2891 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2892 if (Bound[K].Lower[Bound[K].Direction]) 2893 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2894 else 2895 Sum = nullptr; 2896 } 2897 return Sum; 2898 } 2899 2900 2901 // Looks through all the bounds info and 2902 // computes the upper bound given the current direction settings 2903 // at each level. If the upper bound at any level is +inf, 2904 // the result is +inf. 2905 const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { 2906 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2907 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2908 if (Bound[K].Upper[Bound[K].Direction]) 2909 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2910 else 2911 Sum = nullptr; 2912 } 2913 return Sum; 2914 } 2915 2916 2917 //===----------------------------------------------------------------------===// 2918 // Constraint manipulation for Delta test. 2919 2920 // Given a linear SCEV, 2921 // return the coefficient (the step) 2922 // corresponding to the specified loop. 2923 // If there isn't one, return 0. 2924 // For example, given a*i + b*j + c*k, zeroing the coefficient 2925 // corresponding to the j loop would yield b. 2926 const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr, 2927 const Loop *TargetLoop) const { 2928 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2929 if (!AddRec) 2930 return SE->getConstant(Expr->getType(), 0); 2931 if (AddRec->getLoop() == TargetLoop) 2932 return AddRec->getStepRecurrence(*SE); 2933 return findCoefficient(AddRec->getStart(), TargetLoop); 2934 } 2935 2936 2937 // Given a linear SCEV, 2938 // return the SCEV given by zeroing out the coefficient 2939 // corresponding to the specified loop. 2940 // For example, given a*i + b*j + c*k, zeroing the coefficient 2941 // corresponding to the j loop would yield a*i + c*k. 2942 const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr, 2943 const Loop *TargetLoop) const { 2944 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2945 if (!AddRec) 2946 return Expr; // ignore 2947 if (AddRec->getLoop() == TargetLoop) 2948 return AddRec->getStart(); 2949 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2950 AddRec->getStepRecurrence(*SE), 2951 AddRec->getLoop(), 2952 AddRec->getNoWrapFlags()); 2953 } 2954 2955 2956 // Given a linear SCEV Expr, 2957 // return the SCEV given by adding some Value to the 2958 // coefficient corresponding to the specified TargetLoop. 2959 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2960 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2961 const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, 2962 const Loop *TargetLoop, 2963 const SCEV *Value) const { 2964 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2965 if (!AddRec) // create a new addRec 2966 return SE->getAddRecExpr(Expr, 2967 Value, 2968 TargetLoop, 2969 SCEV::FlagAnyWrap); // Worst case, with no info. 2970 if (AddRec->getLoop() == TargetLoop) { 2971 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2972 if (Sum->isZero()) 2973 return AddRec->getStart(); 2974 return SE->getAddRecExpr(AddRec->getStart(), 2975 Sum, 2976 AddRec->getLoop(), 2977 AddRec->getNoWrapFlags()); 2978 } 2979 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2980 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2981 return SE->getAddRecExpr( 2982 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2983 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2984 AddRec->getNoWrapFlags()); 2985 } 2986 2987 2988 // Review the constraints, looking for opportunities 2989 // to simplify a subscript pair (Src and Dst). 2990 // Return true if some simplification occurs. 2991 // If the simplification isn't exact (that is, if it is conservative 2992 // in terms of dependence), set consistent to false. 2993 // Corresponds to Figure 5 from the paper 2994 // 2995 // Practical Dependence Testing 2996 // Goff, Kennedy, Tseng 2997 // PLDI 1991 2998 bool DependenceAnalysis::propagate(const SCEV *&Src, 2999 const SCEV *&Dst, 3000 SmallBitVector &Loops, 3001 SmallVectorImpl<Constraint> &Constraints, 3002 bool &Consistent) { 3003 bool Result = false; 3004 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 3005 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3006 DEBUG(Constraints[LI].dump(dbgs())); 3007 if (Constraints[LI].isDistance()) 3008 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3009 else if (Constraints[LI].isLine()) 3010 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3011 else if (Constraints[LI].isPoint()) 3012 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3013 } 3014 return Result; 3015 } 3016 3017 3018 // Attempt to propagate a distance 3019 // constraint into a subscript pair (Src and Dst). 3020 // Return true if some simplification occurs. 3021 // If the simplification isn't exact (that is, if it is conservative 3022 // in terms of dependence), set consistent to false. 3023 bool DependenceAnalysis::propagateDistance(const SCEV *&Src, 3024 const SCEV *&Dst, 3025 Constraint &CurConstraint, 3026 bool &Consistent) { 3027 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3028 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3029 const SCEV *A_K = findCoefficient(Src, CurLoop); 3030 if (A_K->isZero()) 3031 return false; 3032 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3033 Src = SE->getMinusSCEV(Src, DA_K); 3034 Src = zeroCoefficient(Src, CurLoop); 3035 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3036 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3037 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3038 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3039 if (!findCoefficient(Dst, CurLoop)->isZero()) 3040 Consistent = false; 3041 return true; 3042 } 3043 3044 3045 // Attempt to propagate a line 3046 // constraint into a subscript pair (Src and Dst). 3047 // Return true if some simplification occurs. 3048 // If the simplification isn't exact (that is, if it is conservative 3049 // in terms of dependence), set consistent to false. 3050 bool DependenceAnalysis::propagateLine(const SCEV *&Src, 3051 const SCEV *&Dst, 3052 Constraint &CurConstraint, 3053 bool &Consistent) { 3054 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3055 const SCEV *A = CurConstraint.getA(); 3056 const SCEV *B = CurConstraint.getB(); 3057 const SCEV *C = CurConstraint.getC(); 3058 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3059 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3060 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3061 if (A->isZero()) { 3062 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3063 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3064 if (!Bconst || !Cconst) return false; 3065 APInt Beta = Bconst->getValue()->getValue(); 3066 APInt Charlie = Cconst->getValue()->getValue(); 3067 APInt CdivB = Charlie.sdiv(Beta); 3068 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3069 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3070 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3071 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3072 Dst = zeroCoefficient(Dst, CurLoop); 3073 if (!findCoefficient(Src, CurLoop)->isZero()) 3074 Consistent = false; 3075 } 3076 else if (B->isZero()) { 3077 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3078 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3079 if (!Aconst || !Cconst) return false; 3080 APInt Alpha = Aconst->getValue()->getValue(); 3081 APInt Charlie = Cconst->getValue()->getValue(); 3082 APInt CdivA = Charlie.sdiv(Alpha); 3083 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3084 const SCEV *A_K = findCoefficient(Src, CurLoop); 3085 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3086 Src = zeroCoefficient(Src, CurLoop); 3087 if (!findCoefficient(Dst, CurLoop)->isZero()) 3088 Consistent = false; 3089 } 3090 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3091 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3092 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3093 if (!Aconst || !Cconst) return false; 3094 APInt Alpha = Aconst->getValue()->getValue(); 3095 APInt Charlie = Cconst->getValue()->getValue(); 3096 APInt CdivA = Charlie.sdiv(Alpha); 3097 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3098 const SCEV *A_K = findCoefficient(Src, CurLoop); 3099 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3100 Src = zeroCoefficient(Src, CurLoop); 3101 Dst = addToCoefficient(Dst, CurLoop, A_K); 3102 if (!findCoefficient(Dst, CurLoop)->isZero()) 3103 Consistent = false; 3104 } 3105 else { 3106 // paper is incorrect here, or perhaps just misleading 3107 const SCEV *A_K = findCoefficient(Src, CurLoop); 3108 Src = SE->getMulExpr(Src, A); 3109 Dst = SE->getMulExpr(Dst, A); 3110 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3111 Src = zeroCoefficient(Src, CurLoop); 3112 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3113 if (!findCoefficient(Dst, CurLoop)->isZero()) 3114 Consistent = false; 3115 } 3116 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3117 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3118 return true; 3119 } 3120 3121 3122 // Attempt to propagate a point 3123 // constraint into a subscript pair (Src and Dst). 3124 // Return true if some simplification occurs. 3125 bool DependenceAnalysis::propagatePoint(const SCEV *&Src, 3126 const SCEV *&Dst, 3127 Constraint &CurConstraint) { 3128 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3129 const SCEV *A_K = findCoefficient(Src, CurLoop); 3130 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3131 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3132 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3133 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3134 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3135 Src = zeroCoefficient(Src, CurLoop); 3136 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3137 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3138 Dst = zeroCoefficient(Dst, CurLoop); 3139 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3140 return true; 3141 } 3142 3143 3144 // Update direction vector entry based on the current constraint. 3145 void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, 3146 const Constraint &CurConstraint 3147 ) const { 3148 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3149 DEBUG(CurConstraint.dump(dbgs())); 3150 if (CurConstraint.isAny()) 3151 ; // use defaults 3152 else if (CurConstraint.isDistance()) { 3153 // this one is consistent, the others aren't 3154 Level.Scalar = false; 3155 Level.Distance = CurConstraint.getD(); 3156 unsigned NewDirection = Dependence::DVEntry::NONE; 3157 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3158 NewDirection = Dependence::DVEntry::EQ; 3159 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3160 NewDirection |= Dependence::DVEntry::LT; 3161 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3162 NewDirection |= Dependence::DVEntry::GT; 3163 Level.Direction &= NewDirection; 3164 } 3165 else if (CurConstraint.isLine()) { 3166 Level.Scalar = false; 3167 Level.Distance = nullptr; 3168 // direction should be accurate 3169 } 3170 else if (CurConstraint.isPoint()) { 3171 Level.Scalar = false; 3172 Level.Distance = nullptr; 3173 unsigned NewDirection = Dependence::DVEntry::NONE; 3174 if (!isKnownPredicate(CmpInst::ICMP_NE, 3175 CurConstraint.getY(), 3176 CurConstraint.getX())) 3177 // if X may be = Y 3178 NewDirection |= Dependence::DVEntry::EQ; 3179 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3180 CurConstraint.getY(), 3181 CurConstraint.getX())) 3182 // if Y may be > X 3183 NewDirection |= Dependence::DVEntry::LT; 3184 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3185 CurConstraint.getY(), 3186 CurConstraint.getX())) 3187 // if Y may be < X 3188 NewDirection |= Dependence::DVEntry::GT; 3189 Level.Direction &= NewDirection; 3190 } 3191 else 3192 llvm_unreachable("constraint has unexpected kind"); 3193 } 3194 3195 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3196 /// source and destination array references are recurrences on a nested loop, 3197 /// this function flattens the nested recurrences into separate recurrences 3198 /// for each loop level. 3199 bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, 3200 const SCEV *DstSCEV, 3201 SmallVectorImpl<Subscript> &Pair, 3202 const SCEV *ElementSize) { 3203 const SCEVUnknown *SrcBase = 3204 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); 3205 const SCEVUnknown *DstBase = 3206 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); 3207 3208 if (!SrcBase || !DstBase || SrcBase != DstBase) 3209 return false; 3210 3211 SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); 3212 DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); 3213 3214 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3215 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3216 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3217 return false; 3218 3219 // First step: collect parametric terms in both array references. 3220 SmallVector<const SCEV *, 4> Terms; 3221 SrcAR->collectParametricTerms(*SE, Terms); 3222 DstAR->collectParametricTerms(*SE, Terms); 3223 3224 // Second step: find subscript sizes. 3225 SmallVector<const SCEV *, 4> Sizes; 3226 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3227 3228 // Third step: compute the access functions for each subscript. 3229 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3230 SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); 3231 DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); 3232 3233 // Fail when there is only a subscript: that's a linearized access function. 3234 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3235 SrcSubscripts.size() != DstSubscripts.size()) 3236 return false; 3237 3238 int size = SrcSubscripts.size(); 3239 3240 DEBUG({ 3241 dbgs() << "\nSrcSubscripts: "; 3242 for (int i = 0; i < size; i++) 3243 dbgs() << *SrcSubscripts[i]; 3244 dbgs() << "\nDstSubscripts: "; 3245 for (int i = 0; i < size; i++) 3246 dbgs() << *DstSubscripts[i]; 3247 }); 3248 3249 // The delinearization transforms a single-subscript MIV dependence test into 3250 // a multi-subscript SIV dependence test that is easier to compute. So we 3251 // resize Pair to contain as many pairs of subscripts as the delinearization 3252 // has found, and then initialize the pairs following the delinearization. 3253 Pair.resize(size); 3254 for (int i = 0; i < size; ++i) { 3255 Pair[i].Src = SrcSubscripts[i]; 3256 Pair[i].Dst = DstSubscripts[i]; 3257 unifySubscriptType(&Pair[i]); 3258 3259 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3260 // delinearization has found, and add these constraints to the dependence 3261 // check to avoid memory accesses overflow from one dimension into another. 3262 // This is related to the problem of determining the existence of data 3263 // dependences in array accesses using a different number of subscripts: in 3264 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3265 } 3266 3267 return true; 3268 } 3269 3270 //===----------------------------------------------------------------------===// 3271 3272 #ifndef NDEBUG 3273 // For debugging purposes, dump a small bit vector to dbgs(). 3274 static void dumpSmallBitVector(SmallBitVector &BV) { 3275 dbgs() << "{"; 3276 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3277 dbgs() << VI; 3278 if (BV.find_next(VI) >= 0) 3279 dbgs() << ' '; 3280 } 3281 dbgs() << "}\n"; 3282 } 3283 #endif 3284 3285 3286 // depends - 3287 // Returns NULL if there is no dependence. 3288 // Otherwise, return a Dependence with as many details as possible. 3289 // Corresponds to Section 3.1 in the paper 3290 // 3291 // Practical Dependence Testing 3292 // Goff, Kennedy, Tseng 3293 // PLDI 1991 3294 // 3295 // Care is required to keep the routine below, getSplitIteration(), 3296 // up to date with respect to this routine. 3297 std::unique_ptr<Dependence> 3298 DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, 3299 bool PossiblyLoopIndependent) { 3300 if (Src == Dst) 3301 PossiblyLoopIndependent = false; 3302 3303 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3304 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3305 // if both instructions don't reference memory, there's no dependence 3306 return nullptr; 3307 3308 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3309 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3310 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3311 return make_unique<Dependence>(Src, Dst); 3312 } 3313 3314 Value *SrcPtr = getPointerOperand(Src); 3315 Value *DstPtr = getPointerOperand(Dst); 3316 3317 switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { 3318 case AliasAnalysis::MayAlias: 3319 case AliasAnalysis::PartialAlias: 3320 // cannot analyse objects if we don't understand their aliasing. 3321 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3322 return make_unique<Dependence>(Src, Dst); 3323 case AliasAnalysis::NoAlias: 3324 // If the objects noalias, they are distinct, accesses are independent. 3325 DEBUG(dbgs() << "no alias\n"); 3326 return nullptr; 3327 case AliasAnalysis::MustAlias: 3328 break; // The underlying objects alias; test accesses for dependence. 3329 } 3330 3331 // establish loop nesting levels 3332 establishNestingLevels(Src, Dst); 3333 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3334 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3335 3336 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3337 ++TotalArrayPairs; 3338 3339 // See if there are GEPs we can use. 3340 bool UsefulGEP = false; 3341 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3342 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3343 if (SrcGEP && DstGEP && 3344 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3345 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3346 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3347 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3348 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3349 3350 UsefulGEP = 3351 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3352 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3353 } 3354 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3355 SmallVector<Subscript, 4> Pair(Pairs); 3356 if (UsefulGEP) { 3357 DEBUG(dbgs() << " using GEPs\n"); 3358 unsigned P = 0; 3359 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3360 SrcEnd = SrcGEP->idx_end(), 3361 DstIdx = DstGEP->idx_begin(); 3362 SrcIdx != SrcEnd; 3363 ++SrcIdx, ++DstIdx, ++P) { 3364 Pair[P].Src = SE->getSCEV(*SrcIdx); 3365 Pair[P].Dst = SE->getSCEV(*DstIdx); 3366 unifySubscriptType(&Pair[P]); 3367 } 3368 } 3369 else { 3370 DEBUG(dbgs() << " ignoring GEPs\n"); 3371 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3372 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3373 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3374 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3375 Pair[0].Src = SrcSCEV; 3376 Pair[0].Dst = DstSCEV; 3377 } 3378 3379 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3380 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3381 DEBUG(dbgs() << " delinerized GEP\n"); 3382 Pairs = Pair.size(); 3383 } 3384 3385 for (unsigned P = 0; P < Pairs; ++P) { 3386 Pair[P].Loops.resize(MaxLevels + 1); 3387 Pair[P].GroupLoops.resize(MaxLevels + 1); 3388 Pair[P].Group.resize(Pairs); 3389 removeMatchingExtensions(&Pair[P]); 3390 Pair[P].Classification = 3391 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3392 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3393 Pair[P].Loops); 3394 Pair[P].GroupLoops = Pair[P].Loops; 3395 Pair[P].Group.set(P); 3396 DEBUG(dbgs() << " subscript " << P << "\n"); 3397 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3398 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3399 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3400 DEBUG(dbgs() << "\tloops = "); 3401 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3402 } 3403 3404 SmallBitVector Separable(Pairs); 3405 SmallBitVector Coupled(Pairs); 3406 3407 // Partition subscripts into separable and minimally-coupled groups 3408 // Algorithm in paper is algorithmically better; 3409 // this may be faster in practice. Check someday. 3410 // 3411 // Here's an example of how it works. Consider this code: 3412 // 3413 // for (i = ...) { 3414 // for (j = ...) { 3415 // for (k = ...) { 3416 // for (l = ...) { 3417 // for (m = ...) { 3418 // A[i][j][k][m] = ...; 3419 // ... = A[0][j][l][i + j]; 3420 // } 3421 // } 3422 // } 3423 // } 3424 // } 3425 // 3426 // There are 4 subscripts here: 3427 // 0 [i] and [0] 3428 // 1 [j] and [j] 3429 // 2 [k] and [l] 3430 // 3 [m] and [i + j] 3431 // 3432 // We've already classified each subscript pair as ZIV, SIV, etc., 3433 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3434 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3435 // and set Pair[P].Group = {P}. 3436 // 3437 // Src Dst Classification Loops GroupLoops Group 3438 // 0 [i] [0] SIV {1} {1} {0} 3439 // 1 [j] [j] SIV {2} {2} {1} 3440 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3441 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3442 // 3443 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3444 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3445 // 3446 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3447 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3448 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3449 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3450 // to either Separable or Coupled). 3451 // 3452 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3453 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3454 // so Pair[3].Group = {0, 1, 3} and Done = false. 3455 // 3456 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3457 // Since Done remains true, we add 2 to the set of Separable pairs. 3458 // 3459 // Finally, we consider 3. There's nothing to compare it with, 3460 // so Done remains true and we add it to the Coupled set. 3461 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3462 // 3463 // In the end, we've got 1 separable subscript and 1 coupled group. 3464 for (unsigned SI = 0; SI < Pairs; ++SI) { 3465 if (Pair[SI].Classification == Subscript::NonLinear) { 3466 // ignore these, but collect loops for later 3467 ++NonlinearSubscriptPairs; 3468 collectCommonLoops(Pair[SI].Src, 3469 LI->getLoopFor(Src->getParent()), 3470 Pair[SI].Loops); 3471 collectCommonLoops(Pair[SI].Dst, 3472 LI->getLoopFor(Dst->getParent()), 3473 Pair[SI].Loops); 3474 Result.Consistent = false; 3475 } 3476 else if (Pair[SI].Classification == Subscript::ZIV) { 3477 // always separable 3478 Separable.set(SI); 3479 } 3480 else { 3481 // SIV, RDIV, or MIV, so check for coupled group 3482 bool Done = true; 3483 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3484 SmallBitVector Intersection = Pair[SI].GroupLoops; 3485 Intersection &= Pair[SJ].GroupLoops; 3486 if (Intersection.any()) { 3487 // accumulate set of all the loops in group 3488 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3489 // accumulate set of all subscripts in group 3490 Pair[SJ].Group |= Pair[SI].Group; 3491 Done = false; 3492 } 3493 } 3494 if (Done) { 3495 if (Pair[SI].Group.count() == 1) { 3496 Separable.set(SI); 3497 ++SeparableSubscriptPairs; 3498 } 3499 else { 3500 Coupled.set(SI); 3501 ++CoupledSubscriptPairs; 3502 } 3503 } 3504 } 3505 } 3506 3507 DEBUG(dbgs() << " Separable = "); 3508 DEBUG(dumpSmallBitVector(Separable)); 3509 DEBUG(dbgs() << " Coupled = "); 3510 DEBUG(dumpSmallBitVector(Coupled)); 3511 3512 Constraint NewConstraint; 3513 NewConstraint.setAny(SE); 3514 3515 // test separable subscripts 3516 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3517 DEBUG(dbgs() << "testing subscript " << SI); 3518 switch (Pair[SI].Classification) { 3519 case Subscript::ZIV: 3520 DEBUG(dbgs() << ", ZIV\n"); 3521 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3522 return nullptr; 3523 break; 3524 case Subscript::SIV: { 3525 DEBUG(dbgs() << ", SIV\n"); 3526 unsigned Level; 3527 const SCEV *SplitIter = nullptr; 3528 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3529 Result, NewConstraint, SplitIter)) 3530 return nullptr; 3531 break; 3532 } 3533 case Subscript::RDIV: 3534 DEBUG(dbgs() << ", RDIV\n"); 3535 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3536 return nullptr; 3537 break; 3538 case Subscript::MIV: 3539 DEBUG(dbgs() << ", MIV\n"); 3540 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3541 return nullptr; 3542 break; 3543 default: 3544 llvm_unreachable("subscript has unexpected classification"); 3545 } 3546 } 3547 3548 if (Coupled.count()) { 3549 // test coupled subscript groups 3550 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3551 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3552 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3553 for (unsigned II = 0; II <= MaxLevels; ++II) 3554 Constraints[II].setAny(SE); 3555 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3556 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3557 SmallBitVector Group(Pair[SI].Group); 3558 SmallBitVector Sivs(Pairs); 3559 SmallBitVector Mivs(Pairs); 3560 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3561 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3562 DEBUG(dbgs() << SJ << " "); 3563 if (Pair[SJ].Classification == Subscript::SIV) 3564 Sivs.set(SJ); 3565 else 3566 Mivs.set(SJ); 3567 } 3568 DEBUG(dbgs() << "}\n"); 3569 while (Sivs.any()) { 3570 bool Changed = false; 3571 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3572 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3573 // SJ is an SIV subscript that's part of the current coupled group 3574 unsigned Level; 3575 const SCEV *SplitIter = nullptr; 3576 DEBUG(dbgs() << "SIV\n"); 3577 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3578 Result, NewConstraint, SplitIter)) 3579 return nullptr; 3580 ConstrainedLevels.set(Level); 3581 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3582 if (Constraints[Level].isEmpty()) { 3583 ++DeltaIndependence; 3584 return nullptr; 3585 } 3586 Changed = true; 3587 } 3588 Sivs.reset(SJ); 3589 } 3590 if (Changed) { 3591 // propagate, possibly creating new SIVs and ZIVs 3592 DEBUG(dbgs() << " propagating\n"); 3593 DEBUG(dbgs() << "\tMivs = "); 3594 DEBUG(dumpSmallBitVector(Mivs)); 3595 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3596 // SJ is an MIV subscript that's part of the current coupled group 3597 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3598 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3599 Constraints, Result.Consistent)) { 3600 DEBUG(dbgs() << "\t Changed\n"); 3601 ++DeltaPropagations; 3602 Pair[SJ].Classification = 3603 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3604 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3605 Pair[SJ].Loops); 3606 switch (Pair[SJ].Classification) { 3607 case Subscript::ZIV: 3608 DEBUG(dbgs() << "ZIV\n"); 3609 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3610 return nullptr; 3611 Mivs.reset(SJ); 3612 break; 3613 case Subscript::SIV: 3614 Sivs.set(SJ); 3615 Mivs.reset(SJ); 3616 break; 3617 case Subscript::RDIV: 3618 case Subscript::MIV: 3619 break; 3620 default: 3621 llvm_unreachable("bad subscript classification"); 3622 } 3623 } 3624 } 3625 } 3626 } 3627 3628 // test & propagate remaining RDIVs 3629 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3630 if (Pair[SJ].Classification == Subscript::RDIV) { 3631 DEBUG(dbgs() << "RDIV test\n"); 3632 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3633 return nullptr; 3634 // I don't yet understand how to propagate RDIV results 3635 Mivs.reset(SJ); 3636 } 3637 } 3638 3639 // test remaining MIVs 3640 // This code is temporary. 3641 // Better to somehow test all remaining subscripts simultaneously. 3642 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3643 if (Pair[SJ].Classification == Subscript::MIV) { 3644 DEBUG(dbgs() << "MIV test\n"); 3645 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3646 return nullptr; 3647 } 3648 else 3649 llvm_unreachable("expected only MIV subscripts at this point"); 3650 } 3651 3652 // update Result.DV from constraint vector 3653 DEBUG(dbgs() << " updating\n"); 3654 for (int SJ = ConstrainedLevels.find_first(); 3655 SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { 3656 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3657 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3658 return nullptr; 3659 } 3660 } 3661 } 3662 3663 // Make sure the Scalar flags are set correctly. 3664 SmallBitVector CompleteLoops(MaxLevels + 1); 3665 for (unsigned SI = 0; SI < Pairs; ++SI) 3666 CompleteLoops |= Pair[SI].Loops; 3667 for (unsigned II = 1; II <= CommonLevels; ++II) 3668 if (CompleteLoops[II]) 3669 Result.DV[II - 1].Scalar = false; 3670 3671 if (PossiblyLoopIndependent) { 3672 // Make sure the LoopIndependent flag is set correctly. 3673 // All directions must include equal, otherwise no 3674 // loop-independent dependence is possible. 3675 for (unsigned II = 1; II <= CommonLevels; ++II) { 3676 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3677 Result.LoopIndependent = false; 3678 break; 3679 } 3680 } 3681 } 3682 else { 3683 // On the other hand, if all directions are equal and there's no 3684 // loop-independent dependence possible, then no dependence exists. 3685 bool AllEqual = true; 3686 for (unsigned II = 1; II <= CommonLevels; ++II) { 3687 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3688 AllEqual = false; 3689 break; 3690 } 3691 } 3692 if (AllEqual) 3693 return nullptr; 3694 } 3695 3696 auto Final = make_unique<FullDependence>(Result); 3697 Result.DV = nullptr; 3698 return std::move(Final); 3699 } 3700 3701 3702 3703 //===----------------------------------------------------------------------===// 3704 // getSplitIteration - 3705 // Rather than spend rarely-used space recording the splitting iteration 3706 // during the Weak-Crossing SIV test, we re-compute it on demand. 3707 // The re-computation is basically a repeat of the entire dependence test, 3708 // though simplified since we know that the dependence exists. 3709 // It's tedious, since we must go through all propagations, etc. 3710 // 3711 // Care is required to keep this code up to date with respect to the routine 3712 // above, depends(). 3713 // 3714 // Generally, the dependence analyzer will be used to build 3715 // a dependence graph for a function (basically a map from instructions 3716 // to dependences). Looking for cycles in the graph shows us loops 3717 // that cannot be trivially vectorized/parallelized. 3718 // 3719 // We can try to improve the situation by examining all the dependences 3720 // that make up the cycle, looking for ones we can break. 3721 // Sometimes, peeling the first or last iteration of a loop will break 3722 // dependences, and we've got flags for those possibilities. 3723 // Sometimes, splitting a loop at some other iteration will do the trick, 3724 // and we've got a flag for that case. Rather than waste the space to 3725 // record the exact iteration (since we rarely know), we provide 3726 // a method that calculates the iteration. It's a drag that it must work 3727 // from scratch, but wonderful in that it's possible. 3728 // 3729 // Here's an example: 3730 // 3731 // for (i = 0; i < 10; i++) 3732 // A[i] = ... 3733 // ... = A[11 - i] 3734 // 3735 // There's a loop-carried flow dependence from the store to the load, 3736 // found by the weak-crossing SIV test. The dependence will have a flag, 3737 // indicating that the dependence can be broken by splitting the loop. 3738 // Calling getSplitIteration will return 5. 3739 // Splitting the loop breaks the dependence, like so: 3740 // 3741 // for (i = 0; i <= 5; i++) 3742 // A[i] = ... 3743 // ... = A[11 - i] 3744 // for (i = 6; i < 10; i++) 3745 // A[i] = ... 3746 // ... = A[11 - i] 3747 // 3748 // breaks the dependence and allows us to vectorize/parallelize 3749 // both loops. 3750 const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, 3751 unsigned SplitLevel) { 3752 assert(Dep.isSplitable(SplitLevel) && 3753 "Dep should be splitable at SplitLevel"); 3754 Instruction *Src = Dep.getSrc(); 3755 Instruction *Dst = Dep.getDst(); 3756 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3757 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3758 assert(isLoadOrStore(Src)); 3759 assert(isLoadOrStore(Dst)); 3760 Value *SrcPtr = getPointerOperand(Src); 3761 Value *DstPtr = getPointerOperand(Dst); 3762 assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == 3763 AliasAnalysis::MustAlias); 3764 3765 // establish loop nesting levels 3766 establishNestingLevels(Src, Dst); 3767 3768 FullDependence Result(Src, Dst, false, CommonLevels); 3769 3770 // See if there are GEPs we can use. 3771 bool UsefulGEP = false; 3772 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3773 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3774 if (SrcGEP && DstGEP && 3775 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3776 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3777 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3778 UsefulGEP = 3779 isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3780 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); 3781 } 3782 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3783 SmallVector<Subscript, 4> Pair(Pairs); 3784 if (UsefulGEP) { 3785 unsigned P = 0; 3786 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3787 SrcEnd = SrcGEP->idx_end(), 3788 DstIdx = DstGEP->idx_begin(); 3789 SrcIdx != SrcEnd; 3790 ++SrcIdx, ++DstIdx, ++P) { 3791 Pair[P].Src = SE->getSCEV(*SrcIdx); 3792 Pair[P].Dst = SE->getSCEV(*DstIdx); 3793 } 3794 } 3795 else { 3796 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3797 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3798 Pair[0].Src = SrcSCEV; 3799 Pair[0].Dst = DstSCEV; 3800 } 3801 3802 if (Delinearize && Pairs == 1 && CommonLevels > 1 && 3803 tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { 3804 DEBUG(dbgs() << " delinerized GEP\n"); 3805 Pairs = Pair.size(); 3806 } 3807 3808 for (unsigned P = 0; P < Pairs; ++P) { 3809 Pair[P].Loops.resize(MaxLevels + 1); 3810 Pair[P].GroupLoops.resize(MaxLevels + 1); 3811 Pair[P].Group.resize(Pairs); 3812 removeMatchingExtensions(&Pair[P]); 3813 Pair[P].Classification = 3814 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3815 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3816 Pair[P].Loops); 3817 Pair[P].GroupLoops = Pair[P].Loops; 3818 Pair[P].Group.set(P); 3819 } 3820 3821 SmallBitVector Separable(Pairs); 3822 SmallBitVector Coupled(Pairs); 3823 3824 // partition subscripts into separable and minimally-coupled groups 3825 for (unsigned SI = 0; SI < Pairs; ++SI) { 3826 if (Pair[SI].Classification == Subscript::NonLinear) { 3827 // ignore these, but collect loops for later 3828 collectCommonLoops(Pair[SI].Src, 3829 LI->getLoopFor(Src->getParent()), 3830 Pair[SI].Loops); 3831 collectCommonLoops(Pair[SI].Dst, 3832 LI->getLoopFor(Dst->getParent()), 3833 Pair[SI].Loops); 3834 Result.Consistent = false; 3835 } 3836 else if (Pair[SI].Classification == Subscript::ZIV) 3837 Separable.set(SI); 3838 else { 3839 // SIV, RDIV, or MIV, so check for coupled group 3840 bool Done = true; 3841 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3842 SmallBitVector Intersection = Pair[SI].GroupLoops; 3843 Intersection &= Pair[SJ].GroupLoops; 3844 if (Intersection.any()) { 3845 // accumulate set of all the loops in group 3846 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3847 // accumulate set of all subscripts in group 3848 Pair[SJ].Group |= Pair[SI].Group; 3849 Done = false; 3850 } 3851 } 3852 if (Done) { 3853 if (Pair[SI].Group.count() == 1) 3854 Separable.set(SI); 3855 else 3856 Coupled.set(SI); 3857 } 3858 } 3859 } 3860 3861 Constraint NewConstraint; 3862 NewConstraint.setAny(SE); 3863 3864 // test separable subscripts 3865 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3866 switch (Pair[SI].Classification) { 3867 case Subscript::SIV: { 3868 unsigned Level; 3869 const SCEV *SplitIter = nullptr; 3870 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3871 Result, NewConstraint, SplitIter); 3872 if (Level == SplitLevel) { 3873 assert(SplitIter != nullptr); 3874 return SplitIter; 3875 } 3876 break; 3877 } 3878 case Subscript::ZIV: 3879 case Subscript::RDIV: 3880 case Subscript::MIV: 3881 break; 3882 default: 3883 llvm_unreachable("subscript has unexpected classification"); 3884 } 3885 } 3886 3887 if (Coupled.count()) { 3888 // test coupled subscript groups 3889 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3890 for (unsigned II = 0; II <= MaxLevels; ++II) 3891 Constraints[II].setAny(SE); 3892 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3893 SmallBitVector Group(Pair[SI].Group); 3894 SmallBitVector Sivs(Pairs); 3895 SmallBitVector Mivs(Pairs); 3896 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3897 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3898 if (Pair[SJ].Classification == Subscript::SIV) 3899 Sivs.set(SJ); 3900 else 3901 Mivs.set(SJ); 3902 } 3903 while (Sivs.any()) { 3904 bool Changed = false; 3905 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3906 // SJ is an SIV subscript that's part of the current coupled group 3907 unsigned Level; 3908 const SCEV *SplitIter = nullptr; 3909 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3910 Result, NewConstraint, SplitIter); 3911 if (Level == SplitLevel && SplitIter) 3912 return SplitIter; 3913 ConstrainedLevels.set(Level); 3914 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3915 Changed = true; 3916 Sivs.reset(SJ); 3917 } 3918 if (Changed) { 3919 // propagate, possibly creating new SIVs and ZIVs 3920 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3921 // SJ is an MIV subscript that's part of the current coupled group 3922 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3923 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3924 Pair[SJ].Classification = 3925 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3926 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3927 Pair[SJ].Loops); 3928 switch (Pair[SJ].Classification) { 3929 case Subscript::ZIV: 3930 Mivs.reset(SJ); 3931 break; 3932 case Subscript::SIV: 3933 Sivs.set(SJ); 3934 Mivs.reset(SJ); 3935 break; 3936 case Subscript::RDIV: 3937 case Subscript::MIV: 3938 break; 3939 default: 3940 llvm_unreachable("bad subscript classification"); 3941 } 3942 } 3943 } 3944 } 3945 } 3946 } 3947 } 3948 llvm_unreachable("somehow reached end of routine"); 3949 return nullptr; 3950 } 3951