1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // Since Clang linearizes some array subscripts, the dependence 28 // analysis is using SCEV->delinearize to recover the representation of multiple 29 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 30 // delinearization is controlled by the flag -da-delinearize. 31 // 32 // We should pay some careful attention to the possibility of integer overflow 33 // in the implementation of the various tests. This could happen with Add, 34 // Subtract, or Multiply, with both APInt's and SCEV's. 35 // 36 // Some non-linear subscript pairs can be handled by the GCD test 37 // (and perhaps other tests). 38 // Should explore how often these things occur. 39 // 40 // Finally, it seems like certain test cases expose weaknesses in the SCEV 41 // simplification, especially in the handling of sign and zero extensions. 42 // It could be useful to spend time exploring these. 43 // 44 // Please note that this is work in progress and the interface is subject to 45 // change. 46 // 47 //===----------------------------------------------------------------------===// 48 // // 49 // In memory of Ken Kennedy, 1945 - 2007 // 50 // // 51 //===----------------------------------------------------------------------===// 52 53 #include "llvm/Analysis/DependenceAnalysis.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/Analysis/AliasAnalysis.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 113 //===----------------------------------------------------------------------===// 114 // basics 115 116 DependenceAnalysis::Result 117 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 118 auto &AA = FAM.getResult<AAManager>(F); 119 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 120 auto &LI = FAM.getResult<LoopAnalysis>(F); 121 return DependenceInfo(&F, &AA, &SE, &LI); 122 } 123 124 AnalysisKey DependenceAnalysis::Key; 125 126 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 127 "Dependence Analysis", true, true) 128 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 129 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 131 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 132 true, true) 133 134 char DependenceAnalysisWrapperPass::ID = 0; 135 136 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 137 return new DependenceAnalysisWrapperPass(); 138 } 139 140 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 141 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 142 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 143 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 144 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 145 return false; 146 } 147 148 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 149 150 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 151 152 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 153 AU.setPreservesAll(); 154 AU.addRequiredTransitive<AAResultsWrapperPass>(); 155 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 156 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 157 } 158 159 160 // Used to test the dependence analyzer. 161 // Looks through the function, noting loads and stores. 162 // Calls depends() on every possible pair and prints out the result. 163 // Ignores all other instructions. 164 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 165 auto *F = DA->getFunction(); 166 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 167 ++SrcI) { 168 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 169 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 170 DstI != DstE; ++DstI) { 171 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 172 OS << "da analyze - "; 173 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 174 D->dump(OS); 175 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 176 if (D->isSplitable(Level)) { 177 OS << "da analyze - split level = " << Level; 178 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 179 OS << "!\n"; 180 } 181 } 182 } 183 else 184 OS << "none!\n"; 185 } 186 } 187 } 188 } 189 } 190 191 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 192 const Module *) const { 193 dumpExampleDependence(OS, info.get()); 194 } 195 196 //===----------------------------------------------------------------------===// 197 // Dependence methods 198 199 // Returns true if this is an input dependence. 200 bool Dependence::isInput() const { 201 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 202 } 203 204 205 // Returns true if this is an output dependence. 206 bool Dependence::isOutput() const { 207 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 208 } 209 210 211 // Returns true if this is an flow (aka true) dependence. 212 bool Dependence::isFlow() const { 213 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 214 } 215 216 217 // Returns true if this is an anti dependence. 218 bool Dependence::isAnti() const { 219 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 220 } 221 222 223 // Returns true if a particular level is scalar; that is, 224 // if no subscript in the source or destination mention the induction 225 // variable associated with the loop at this level. 226 // Leave this out of line, so it will serve as a virtual method anchor 227 bool Dependence::isScalar(unsigned level) const { 228 return false; 229 } 230 231 232 //===----------------------------------------------------------------------===// 233 // FullDependence methods 234 235 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 236 bool PossiblyLoopIndependent, 237 unsigned CommonLevels) 238 : Dependence(Source, Destination), Levels(CommonLevels), 239 LoopIndependent(PossiblyLoopIndependent) { 240 Consistent = true; 241 if (CommonLevels) 242 DV = make_unique<DVEntry[]>(CommonLevels); 243 } 244 245 // The rest are simple getters that hide the implementation. 246 247 // getDirection - Returns the direction associated with a particular level. 248 unsigned FullDependence::getDirection(unsigned Level) const { 249 assert(0 < Level && Level <= Levels && "Level out of range"); 250 return DV[Level - 1].Direction; 251 } 252 253 254 // Returns the distance (or NULL) associated with a particular level. 255 const SCEV *FullDependence::getDistance(unsigned Level) const { 256 assert(0 < Level && Level <= Levels && "Level out of range"); 257 return DV[Level - 1].Distance; 258 } 259 260 261 // Returns true if a particular level is scalar; that is, 262 // if no subscript in the source or destination mention the induction 263 // variable associated with the loop at this level. 264 bool FullDependence::isScalar(unsigned Level) const { 265 assert(0 < Level && Level <= Levels && "Level out of range"); 266 return DV[Level - 1].Scalar; 267 } 268 269 270 // Returns true if peeling the first iteration from this loop 271 // will break this dependence. 272 bool FullDependence::isPeelFirst(unsigned Level) const { 273 assert(0 < Level && Level <= Levels && "Level out of range"); 274 return DV[Level - 1].PeelFirst; 275 } 276 277 278 // Returns true if peeling the last iteration from this loop 279 // will break this dependence. 280 bool FullDependence::isPeelLast(unsigned Level) const { 281 assert(0 < Level && Level <= Levels && "Level out of range"); 282 return DV[Level - 1].PeelLast; 283 } 284 285 286 // Returns true if splitting this loop will break the dependence. 287 bool FullDependence::isSplitable(unsigned Level) const { 288 assert(0 < Level && Level <= Levels && "Level out of range"); 289 return DV[Level - 1].Splitable; 290 } 291 292 293 //===----------------------------------------------------------------------===// 294 // DependenceInfo::Constraint methods 295 296 // If constraint is a point <X, Y>, returns X. 297 // Otherwise assert. 298 const SCEV *DependenceInfo::Constraint::getX() const { 299 assert(Kind == Point && "Kind should be Point"); 300 return A; 301 } 302 303 304 // If constraint is a point <X, Y>, returns Y. 305 // Otherwise assert. 306 const SCEV *DependenceInfo::Constraint::getY() const { 307 assert(Kind == Point && "Kind should be Point"); 308 return B; 309 } 310 311 312 // If constraint is a line AX + BY = C, returns A. 313 // Otherwise assert. 314 const SCEV *DependenceInfo::Constraint::getA() const { 315 assert((Kind == Line || Kind == Distance) && 316 "Kind should be Line (or Distance)"); 317 return A; 318 } 319 320 321 // If constraint is a line AX + BY = C, returns B. 322 // Otherwise assert. 323 const SCEV *DependenceInfo::Constraint::getB() const { 324 assert((Kind == Line || Kind == Distance) && 325 "Kind should be Line (or Distance)"); 326 return B; 327 } 328 329 330 // If constraint is a line AX + BY = C, returns C. 331 // Otherwise assert. 332 const SCEV *DependenceInfo::Constraint::getC() const { 333 assert((Kind == Line || Kind == Distance) && 334 "Kind should be Line (or Distance)"); 335 return C; 336 } 337 338 339 // If constraint is a distance, returns D. 340 // Otherwise assert. 341 const SCEV *DependenceInfo::Constraint::getD() const { 342 assert(Kind == Distance && "Kind should be Distance"); 343 return SE->getNegativeSCEV(C); 344 } 345 346 347 // Returns the loop associated with this constraint. 348 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 349 assert((Kind == Distance || Kind == Line || Kind == Point) && 350 "Kind should be Distance, Line, or Point"); 351 return AssociatedLoop; 352 } 353 354 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 355 const Loop *CurLoop) { 356 Kind = Point; 357 A = X; 358 B = Y; 359 AssociatedLoop = CurLoop; 360 } 361 362 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 363 const SCEV *CC, const Loop *CurLoop) { 364 Kind = Line; 365 A = AA; 366 B = BB; 367 C = CC; 368 AssociatedLoop = CurLoop; 369 } 370 371 void DependenceInfo::Constraint::setDistance(const SCEV *D, 372 const Loop *CurLoop) { 373 Kind = Distance; 374 A = SE->getOne(D->getType()); 375 B = SE->getNegativeSCEV(A); 376 C = SE->getNegativeSCEV(D); 377 AssociatedLoop = CurLoop; 378 } 379 380 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 381 382 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 383 SE = NewSE; 384 Kind = Any; 385 } 386 387 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 388 // For debugging purposes. Dumps the constraint out to OS. 389 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 390 if (isEmpty()) 391 OS << " Empty\n"; 392 else if (isAny()) 393 OS << " Any\n"; 394 else if (isPoint()) 395 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 396 else if (isDistance()) 397 OS << " Distance is " << *getD() << 398 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 399 else if (isLine()) 400 OS << " Line is " << *getA() << "*X + " << 401 *getB() << "*Y = " << *getC() << "\n"; 402 else 403 llvm_unreachable("unknown constraint type in Constraint::dump"); 404 } 405 #endif 406 407 408 // Updates X with the intersection 409 // of the Constraints X and Y. Returns true if X has changed. 410 // Corresponds to Figure 4 from the paper 411 // 412 // Practical Dependence Testing 413 // Goff, Kennedy, Tseng 414 // PLDI 1991 415 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 416 ++DeltaApplications; 417 DEBUG(dbgs() << "\tintersect constraints\n"); 418 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 419 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 420 assert(!Y->isPoint() && "Y must not be a Point"); 421 if (X->isAny()) { 422 if (Y->isAny()) 423 return false; 424 *X = *Y; 425 return true; 426 } 427 if (X->isEmpty()) 428 return false; 429 if (Y->isEmpty()) { 430 X->setEmpty(); 431 return true; 432 } 433 434 if (X->isDistance() && Y->isDistance()) { 435 DEBUG(dbgs() << "\t intersect 2 distances\n"); 436 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 437 return false; 438 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 439 X->setEmpty(); 440 ++DeltaSuccesses; 441 return true; 442 } 443 // Hmmm, interesting situation. 444 // I guess if either is constant, keep it and ignore the other. 445 if (isa<SCEVConstant>(Y->getD())) { 446 *X = *Y; 447 return true; 448 } 449 return false; 450 } 451 452 // At this point, the pseudo-code in Figure 4 of the paper 453 // checks if (X->isPoint() && Y->isPoint()). 454 // This case can't occur in our implementation, 455 // since a Point can only arise as the result of intersecting 456 // two Line constraints, and the right-hand value, Y, is never 457 // the result of an intersection. 458 assert(!(X->isPoint() && Y->isPoint()) && 459 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 460 461 if (X->isLine() && Y->isLine()) { 462 DEBUG(dbgs() << "\t intersect 2 lines\n"); 463 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 464 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 465 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 466 // slopes are equal, so lines are parallel 467 DEBUG(dbgs() << "\t\tsame slope\n"); 468 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 469 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 471 return false; 472 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 473 X->setEmpty(); 474 ++DeltaSuccesses; 475 return true; 476 } 477 return false; 478 } 479 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 480 // slopes differ, so lines intersect 481 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 482 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 483 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 484 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 485 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 486 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 487 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 488 const SCEVConstant *C1A2_C2A1 = 489 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 490 const SCEVConstant *C1B2_C2B1 = 491 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 492 const SCEVConstant *A1B2_A2B1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 494 const SCEVConstant *A2B1_A1B2 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 496 if (!C1B2_C2B1 || !C1A2_C2A1 || 497 !A1B2_A2B1 || !A2B1_A1B2) 498 return false; 499 APInt Xtop = C1B2_C2B1->getAPInt(); 500 APInt Xbot = A1B2_A2B1->getAPInt(); 501 APInt Ytop = C1A2_C2A1->getAPInt(); 502 APInt Ybot = A2B1_A1B2->getAPInt(); 503 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 504 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 505 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 506 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 507 APInt Xq = Xtop; // these need to be initialized, even 508 APInt Xr = Xtop; // though they're just going to be overwritten 509 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 510 APInt Yq = Ytop; 511 APInt Yr = Ytop; 512 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 513 if (Xr != 0 || Yr != 0) { 514 X->setEmpty(); 515 ++DeltaSuccesses; 516 return true; 517 } 518 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 519 if (Xq.slt(0) || Yq.slt(0)) { 520 X->setEmpty(); 521 ++DeltaSuccesses; 522 return true; 523 } 524 if (const SCEVConstant *CUB = 525 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 526 const APInt &UpperBound = CUB->getAPInt(); 527 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 528 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 529 X->setEmpty(); 530 ++DeltaSuccesses; 531 return true; 532 } 533 } 534 X->setPoint(SE->getConstant(Xq), 535 SE->getConstant(Yq), 536 X->getAssociatedLoop()); 537 ++DeltaSuccesses; 538 return true; 539 } 540 return false; 541 } 542 543 // if (X->isLine() && Y->isPoint()) This case can't occur. 544 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 545 546 if (X->isPoint() && Y->isLine()) { 547 DEBUG(dbgs() << "\t intersect Point and Line\n"); 548 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 549 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 550 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 551 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 552 return false; 553 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 554 X->setEmpty(); 555 ++DeltaSuccesses; 556 return true; 557 } 558 return false; 559 } 560 561 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 562 return false; 563 } 564 565 566 //===----------------------------------------------------------------------===// 567 // DependenceInfo methods 568 569 // For debugging purposes. Dumps a dependence to OS. 570 void Dependence::dump(raw_ostream &OS) const { 571 bool Splitable = false; 572 if (isConfused()) 573 OS << "confused"; 574 else { 575 if (isConsistent()) 576 OS << "consistent "; 577 if (isFlow()) 578 OS << "flow"; 579 else if (isOutput()) 580 OS << "output"; 581 else if (isAnti()) 582 OS << "anti"; 583 else if (isInput()) 584 OS << "input"; 585 unsigned Levels = getLevels(); 586 OS << " ["; 587 for (unsigned II = 1; II <= Levels; ++II) { 588 if (isSplitable(II)) 589 Splitable = true; 590 if (isPeelFirst(II)) 591 OS << 'p'; 592 const SCEV *Distance = getDistance(II); 593 if (Distance) 594 OS << *Distance; 595 else if (isScalar(II)) 596 OS << "S"; 597 else { 598 unsigned Direction = getDirection(II); 599 if (Direction == DVEntry::ALL) 600 OS << "*"; 601 else { 602 if (Direction & DVEntry::LT) 603 OS << "<"; 604 if (Direction & DVEntry::EQ) 605 OS << "="; 606 if (Direction & DVEntry::GT) 607 OS << ">"; 608 } 609 } 610 if (isPeelLast(II)) 611 OS << 'p'; 612 if (II < Levels) 613 OS << " "; 614 } 615 if (isLoopIndependent()) 616 OS << "|<"; 617 OS << "]"; 618 if (Splitable) 619 OS << " splitable"; 620 } 621 OS << "!\n"; 622 } 623 624 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 625 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 626 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 627 // Otherwise the underlying objects are checked to see if they point to 628 // different identifiable objects. 629 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 630 const DataLayout &DL, 631 const MemoryLocation &LocA, 632 const MemoryLocation &LocB) { 633 // Check the original locations (minus size) for noalias, which can happen for 634 // tbaa, incompatible underlying object locations, etc. 635 MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags); 636 MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags); 637 if (AA->alias(LocAS, LocBS) == NoAlias) 638 return NoAlias; 639 640 // Check the underlying objects are the same 641 const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); 642 const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); 643 644 // If the underlying objects are the same, they must alias 645 if (AObj == BObj) 646 return MustAlias; 647 648 // We may have hit the recursion limit for underlying objects, or have 649 // underlying objects where we don't know they will alias. 650 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 651 return MayAlias; 652 653 // Otherwise we know the objects are different and both identified objects so 654 // must not alias. 655 return NoAlias; 656 } 657 658 659 // Returns true if the load or store can be analyzed. Atomic and volatile 660 // operations have properties which this analysis does not understand. 661 static 662 bool isLoadOrStore(const Instruction *I) { 663 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 664 return LI->isUnordered(); 665 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 666 return SI->isUnordered(); 667 return false; 668 } 669 670 671 // Examines the loop nesting of the Src and Dst 672 // instructions and establishes their shared loops. Sets the variables 673 // CommonLevels, SrcLevels, and MaxLevels. 674 // The source and destination instructions needn't be contained in the same 675 // loop. The routine establishNestingLevels finds the level of most deeply 676 // nested loop that contains them both, CommonLevels. An instruction that's 677 // not contained in a loop is at level = 0. MaxLevels is equal to the level 678 // of the source plus the level of the destination, minus CommonLevels. 679 // This lets us allocate vectors MaxLevels in length, with room for every 680 // distinct loop referenced in both the source and destination subscripts. 681 // The variable SrcLevels is the nesting depth of the source instruction. 682 // It's used to help calculate distinct loops referenced by the destination. 683 // Here's the map from loops to levels: 684 // 0 - unused 685 // 1 - outermost common loop 686 // ... - other common loops 687 // CommonLevels - innermost common loop 688 // ... - loops containing Src but not Dst 689 // SrcLevels - innermost loop containing Src but not Dst 690 // ... - loops containing Dst but not Src 691 // MaxLevels - innermost loops containing Dst but not Src 692 // Consider the follow code fragment: 693 // for (a = ...) { 694 // for (b = ...) { 695 // for (c = ...) { 696 // for (d = ...) { 697 // A[] = ...; 698 // } 699 // } 700 // for (e = ...) { 701 // for (f = ...) { 702 // for (g = ...) { 703 // ... = A[]; 704 // } 705 // } 706 // } 707 // } 708 // } 709 // If we're looking at the possibility of a dependence between the store 710 // to A (the Src) and the load from A (the Dst), we'll note that they 711 // have 2 loops in common, so CommonLevels will equal 2 and the direction 712 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 713 // A map from loop names to loop numbers would look like 714 // a - 1 715 // b - 2 = CommonLevels 716 // c - 3 717 // d - 4 = SrcLevels 718 // e - 5 719 // f - 6 720 // g - 7 = MaxLevels 721 void DependenceInfo::establishNestingLevels(const Instruction *Src, 722 const Instruction *Dst) { 723 const BasicBlock *SrcBlock = Src->getParent(); 724 const BasicBlock *DstBlock = Dst->getParent(); 725 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 726 unsigned DstLevel = LI->getLoopDepth(DstBlock); 727 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 728 const Loop *DstLoop = LI->getLoopFor(DstBlock); 729 SrcLevels = SrcLevel; 730 MaxLevels = SrcLevel + DstLevel; 731 while (SrcLevel > DstLevel) { 732 SrcLoop = SrcLoop->getParentLoop(); 733 SrcLevel--; 734 } 735 while (DstLevel > SrcLevel) { 736 DstLoop = DstLoop->getParentLoop(); 737 DstLevel--; 738 } 739 while (SrcLoop != DstLoop) { 740 SrcLoop = SrcLoop->getParentLoop(); 741 DstLoop = DstLoop->getParentLoop(); 742 SrcLevel--; 743 } 744 CommonLevels = SrcLevel; 745 MaxLevels -= CommonLevels; 746 } 747 748 749 // Given one of the loops containing the source, return 750 // its level index in our numbering scheme. 751 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 752 return SrcLoop->getLoopDepth(); 753 } 754 755 756 // Given one of the loops containing the destination, 757 // return its level index in our numbering scheme. 758 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 759 unsigned D = DstLoop->getLoopDepth(); 760 if (D > CommonLevels) 761 return D - CommonLevels + SrcLevels; 762 else 763 return D; 764 } 765 766 767 // Returns true if Expression is loop invariant in LoopNest. 768 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 769 const Loop *LoopNest) const { 770 if (!LoopNest) 771 return true; 772 return SE->isLoopInvariant(Expression, LoopNest) && 773 isLoopInvariant(Expression, LoopNest->getParentLoop()); 774 } 775 776 777 778 // Finds the set of loops from the LoopNest that 779 // have a level <= CommonLevels and are referred to by the SCEV Expression. 780 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 781 const Loop *LoopNest, 782 SmallBitVector &Loops) const { 783 while (LoopNest) { 784 unsigned Level = LoopNest->getLoopDepth(); 785 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 786 Loops.set(Level); 787 LoopNest = LoopNest->getParentLoop(); 788 } 789 } 790 791 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 792 793 unsigned widestWidthSeen = 0; 794 Type *widestType; 795 796 // Go through each pair and find the widest bit to which we need 797 // to extend all of them. 798 for (Subscript *Pair : Pairs) { 799 const SCEV *Src = Pair->Src; 800 const SCEV *Dst = Pair->Dst; 801 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 802 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 803 if (SrcTy == nullptr || DstTy == nullptr) { 804 assert(SrcTy == DstTy && "This function only unify integer types and " 805 "expect Src and Dst share the same type " 806 "otherwise."); 807 continue; 808 } 809 if (SrcTy->getBitWidth() > widestWidthSeen) { 810 widestWidthSeen = SrcTy->getBitWidth(); 811 widestType = SrcTy; 812 } 813 if (DstTy->getBitWidth() > widestWidthSeen) { 814 widestWidthSeen = DstTy->getBitWidth(); 815 widestType = DstTy; 816 } 817 } 818 819 820 assert(widestWidthSeen > 0); 821 822 // Now extend each pair to the widest seen. 823 for (Subscript *Pair : Pairs) { 824 const SCEV *Src = Pair->Src; 825 const SCEV *Dst = Pair->Dst; 826 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 827 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 828 if (SrcTy == nullptr || DstTy == nullptr) { 829 assert(SrcTy == DstTy && "This function only unify integer types and " 830 "expect Src and Dst share the same type " 831 "otherwise."); 832 continue; 833 } 834 if (SrcTy->getBitWidth() < widestWidthSeen) 835 // Sign-extend Src to widestType 836 Pair->Src = SE->getSignExtendExpr(Src, widestType); 837 if (DstTy->getBitWidth() < widestWidthSeen) { 838 // Sign-extend Dst to widestType 839 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 840 } 841 } 842 } 843 844 // removeMatchingExtensions - Examines a subscript pair. 845 // If the source and destination are identically sign (or zero) 846 // extended, it strips off the extension in an effect to simplify 847 // the actual analysis. 848 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 849 const SCEV *Src = Pair->Src; 850 const SCEV *Dst = Pair->Dst; 851 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 852 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 853 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 854 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 855 const SCEV *SrcCastOp = SrcCast->getOperand(); 856 const SCEV *DstCastOp = DstCast->getOperand(); 857 if (SrcCastOp->getType() == DstCastOp->getType()) { 858 Pair->Src = SrcCastOp; 859 Pair->Dst = DstCastOp; 860 } 861 } 862 } 863 864 865 // Examine the scev and return true iff it's linear. 866 // Collect any loops mentioned in the set of "Loops". 867 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 868 SmallBitVector &Loops) { 869 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 870 if (!AddRec) 871 return isLoopInvariant(Src, LoopNest); 872 const SCEV *Start = AddRec->getStart(); 873 const SCEV *Step = AddRec->getStepRecurrence(*SE); 874 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 875 if (!isa<SCEVCouldNotCompute>(UB)) { 876 if (SE->getTypeSizeInBits(Start->getType()) < 877 SE->getTypeSizeInBits(UB->getType())) { 878 if (!AddRec->getNoWrapFlags()) 879 return false; 880 } 881 } 882 if (!isLoopInvariant(Step, LoopNest)) 883 return false; 884 Loops.set(mapSrcLoop(AddRec->getLoop())); 885 return checkSrcSubscript(Start, LoopNest, Loops); 886 } 887 888 889 890 // Examine the scev and return true iff it's linear. 891 // Collect any loops mentioned in the set of "Loops". 892 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 893 SmallBitVector &Loops) { 894 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 895 if (!AddRec) 896 return isLoopInvariant(Dst, LoopNest); 897 const SCEV *Start = AddRec->getStart(); 898 const SCEV *Step = AddRec->getStepRecurrence(*SE); 899 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 900 if (!isa<SCEVCouldNotCompute>(UB)) { 901 if (SE->getTypeSizeInBits(Start->getType()) < 902 SE->getTypeSizeInBits(UB->getType())) { 903 if (!AddRec->getNoWrapFlags()) 904 return false; 905 } 906 } 907 if (!isLoopInvariant(Step, LoopNest)) 908 return false; 909 Loops.set(mapDstLoop(AddRec->getLoop())); 910 return checkDstSubscript(Start, LoopNest, Loops); 911 } 912 913 914 // Examines the subscript pair (the Src and Dst SCEVs) 915 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 916 // Collects the associated loops in a set. 917 DependenceInfo::Subscript::ClassificationKind 918 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 919 const SCEV *Dst, const Loop *DstLoopNest, 920 SmallBitVector &Loops) { 921 SmallBitVector SrcLoops(MaxLevels + 1); 922 SmallBitVector DstLoops(MaxLevels + 1); 923 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 924 return Subscript::NonLinear; 925 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 926 return Subscript::NonLinear; 927 Loops = SrcLoops; 928 Loops |= DstLoops; 929 unsigned N = Loops.count(); 930 if (N == 0) 931 return Subscript::ZIV; 932 if (N == 1) 933 return Subscript::SIV; 934 if (N == 2 && (SrcLoops.count() == 0 || 935 DstLoops.count() == 0 || 936 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 937 return Subscript::RDIV; 938 return Subscript::MIV; 939 } 940 941 942 // A wrapper around SCEV::isKnownPredicate. 943 // Looks for cases where we're interested in comparing for equality. 944 // If both X and Y have been identically sign or zero extended, 945 // it strips off the (confusing) extensions before invoking 946 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 947 // will be similarly updated. 948 // 949 // If SCEV::isKnownPredicate can't prove the predicate, 950 // we try simple subtraction, which seems to help in some cases 951 // involving symbolics. 952 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 953 const SCEV *Y) const { 954 if (Pred == CmpInst::ICMP_EQ || 955 Pred == CmpInst::ICMP_NE) { 956 if ((isa<SCEVSignExtendExpr>(X) && 957 isa<SCEVSignExtendExpr>(Y)) || 958 (isa<SCEVZeroExtendExpr>(X) && 959 isa<SCEVZeroExtendExpr>(Y))) { 960 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 961 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 962 const SCEV *Xop = CX->getOperand(); 963 const SCEV *Yop = CY->getOperand(); 964 if (Xop->getType() == Yop->getType()) { 965 X = Xop; 966 Y = Yop; 967 } 968 } 969 } 970 if (SE->isKnownPredicate(Pred, X, Y)) 971 return true; 972 // If SE->isKnownPredicate can't prove the condition, 973 // we try the brute-force approach of subtracting 974 // and testing the difference. 975 // By testing with SE->isKnownPredicate first, we avoid 976 // the possibility of overflow when the arguments are constants. 977 const SCEV *Delta = SE->getMinusSCEV(X, Y); 978 switch (Pred) { 979 case CmpInst::ICMP_EQ: 980 return Delta->isZero(); 981 case CmpInst::ICMP_NE: 982 return SE->isKnownNonZero(Delta); 983 case CmpInst::ICMP_SGE: 984 return SE->isKnownNonNegative(Delta); 985 case CmpInst::ICMP_SLE: 986 return SE->isKnownNonPositive(Delta); 987 case CmpInst::ICMP_SGT: 988 return SE->isKnownPositive(Delta); 989 case CmpInst::ICMP_SLT: 990 return SE->isKnownNegative(Delta); 991 default: 992 llvm_unreachable("unexpected predicate in isKnownPredicate"); 993 } 994 } 995 996 997 // All subscripts are all the same type. 998 // Loop bound may be smaller (e.g., a char). 999 // Should zero extend loop bound, since it's always >= 0. 1000 // This routine collects upper bound and extends or truncates if needed. 1001 // Truncating is safe when subscripts are known not to wrap. Cases without 1002 // nowrap flags should have been rejected earlier. 1003 // Return null if no bound available. 1004 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1005 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1006 const SCEV *UB = SE->getBackedgeTakenCount(L); 1007 return SE->getTruncateOrZeroExtend(UB, T); 1008 } 1009 return nullptr; 1010 } 1011 1012 1013 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1014 // If the cast fails, returns NULL. 1015 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1016 Type *T) const { 1017 if (const SCEV *UB = collectUpperBound(L, T)) 1018 return dyn_cast<SCEVConstant>(UB); 1019 return nullptr; 1020 } 1021 1022 1023 // testZIV - 1024 // When we have a pair of subscripts of the form [c1] and [c2], 1025 // where c1 and c2 are both loop invariant, we attack it using 1026 // the ZIV test. Basically, we test by comparing the two values, 1027 // but there are actually three possible results: 1028 // 1) the values are equal, so there's a dependence 1029 // 2) the values are different, so there's no dependence 1030 // 3) the values might be equal, so we have to assume a dependence. 1031 // 1032 // Return true if dependence disproved. 1033 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1034 FullDependence &Result) const { 1035 DEBUG(dbgs() << " src = " << *Src << "\n"); 1036 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1037 ++ZIVapplications; 1038 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1039 DEBUG(dbgs() << " provably dependent\n"); 1040 return false; // provably dependent 1041 } 1042 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1043 DEBUG(dbgs() << " provably independent\n"); 1044 ++ZIVindependence; 1045 return true; // provably independent 1046 } 1047 DEBUG(dbgs() << " possibly dependent\n"); 1048 Result.Consistent = false; 1049 return false; // possibly dependent 1050 } 1051 1052 1053 // strongSIVtest - 1054 // From the paper, Practical Dependence Testing, Section 4.2.1 1055 // 1056 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1057 // where i is an induction variable, c1 and c2 are loop invariant, 1058 // and a is a constant, we can solve it exactly using the Strong SIV test. 1059 // 1060 // Can prove independence. Failing that, can compute distance (and direction). 1061 // In the presence of symbolic terms, we can sometimes make progress. 1062 // 1063 // If there's a dependence, 1064 // 1065 // c1 + a*i = c2 + a*i' 1066 // 1067 // The dependence distance is 1068 // 1069 // d = i' - i = (c1 - c2)/a 1070 // 1071 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1072 // loop's upper bound. If a dependence exists, the dependence direction is 1073 // defined as 1074 // 1075 // { < if d > 0 1076 // direction = { = if d = 0 1077 // { > if d < 0 1078 // 1079 // Return true if dependence disproved. 1080 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1081 const SCEV *DstConst, const Loop *CurLoop, 1082 unsigned Level, FullDependence &Result, 1083 Constraint &NewConstraint) const { 1084 DEBUG(dbgs() << "\tStrong SIV test\n"); 1085 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1086 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1087 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1088 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1089 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1090 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1091 ++StrongSIVapplications; 1092 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1093 Level--; 1094 1095 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1096 DEBUG(dbgs() << "\t Delta = " << *Delta); 1097 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1098 1099 // check that |Delta| < iteration count 1100 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1101 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1102 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1103 const SCEV *AbsDelta = 1104 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1105 const SCEV *AbsCoeff = 1106 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1107 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1108 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1109 // Distance greater than trip count - no dependence 1110 ++StrongSIVindependence; 1111 ++StrongSIVsuccesses; 1112 return true; 1113 } 1114 } 1115 1116 // Can we compute distance? 1117 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1118 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1119 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1120 APInt Distance = ConstDelta; // these need to be initialized 1121 APInt Remainder = ConstDelta; 1122 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1123 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1124 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1125 // Make sure Coeff divides Delta exactly 1126 if (Remainder != 0) { 1127 // Coeff doesn't divide Distance, no dependence 1128 ++StrongSIVindependence; 1129 ++StrongSIVsuccesses; 1130 return true; 1131 } 1132 Result.DV[Level].Distance = SE->getConstant(Distance); 1133 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1134 if (Distance.sgt(0)) 1135 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1136 else if (Distance.slt(0)) 1137 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1138 else 1139 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1140 ++StrongSIVsuccesses; 1141 } 1142 else if (Delta->isZero()) { 1143 // since 0/X == 0 1144 Result.DV[Level].Distance = Delta; 1145 NewConstraint.setDistance(Delta, CurLoop); 1146 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1147 ++StrongSIVsuccesses; 1148 } 1149 else { 1150 if (Coeff->isOne()) { 1151 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1152 Result.DV[Level].Distance = Delta; // since X/1 == X 1153 NewConstraint.setDistance(Delta, CurLoop); 1154 } 1155 else { 1156 Result.Consistent = false; 1157 NewConstraint.setLine(Coeff, 1158 SE->getNegativeSCEV(Coeff), 1159 SE->getNegativeSCEV(Delta), CurLoop); 1160 } 1161 1162 // maybe we can get a useful direction 1163 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1164 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1165 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1166 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1167 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1168 // The double negatives above are confusing. 1169 // It helps to read !SE->isKnownNonZero(Delta) 1170 // as "Delta might be Zero" 1171 unsigned NewDirection = Dependence::DVEntry::NONE; 1172 if ((DeltaMaybePositive && CoeffMaybePositive) || 1173 (DeltaMaybeNegative && CoeffMaybeNegative)) 1174 NewDirection = Dependence::DVEntry::LT; 1175 if (DeltaMaybeZero) 1176 NewDirection |= Dependence::DVEntry::EQ; 1177 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1178 (DeltaMaybePositive && CoeffMaybeNegative)) 1179 NewDirection |= Dependence::DVEntry::GT; 1180 if (NewDirection < Result.DV[Level].Direction) 1181 ++StrongSIVsuccesses; 1182 Result.DV[Level].Direction &= NewDirection; 1183 } 1184 return false; 1185 } 1186 1187 1188 // weakCrossingSIVtest - 1189 // From the paper, Practical Dependence Testing, Section 4.2.2 1190 // 1191 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1192 // where i is an induction variable, c1 and c2 are loop invariant, 1193 // and a is a constant, we can solve it exactly using the 1194 // Weak-Crossing SIV test. 1195 // 1196 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1197 // the two lines, where i = i', yielding 1198 // 1199 // c1 + a*i = c2 - a*i 1200 // 2a*i = c2 - c1 1201 // i = (c2 - c1)/2a 1202 // 1203 // If i < 0, there is no dependence. 1204 // If i > upperbound, there is no dependence. 1205 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1206 // If i = upperbound, there's a dependence with distance = 0. 1207 // If i is integral, there's a dependence (all directions). 1208 // If the non-integer part = 1/2, there's a dependence (<> directions). 1209 // Otherwise, there's no dependence. 1210 // 1211 // Can prove independence. Failing that, 1212 // can sometimes refine the directions. 1213 // Can determine iteration for splitting. 1214 // 1215 // Return true if dependence disproved. 1216 bool DependenceInfo::weakCrossingSIVtest( 1217 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1218 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1219 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1220 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1221 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1222 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1223 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1224 ++WeakCrossingSIVapplications; 1225 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1226 Level--; 1227 Result.Consistent = false; 1228 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1229 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1230 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1231 if (Delta->isZero()) { 1232 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1233 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1234 ++WeakCrossingSIVsuccesses; 1235 if (!Result.DV[Level].Direction) { 1236 ++WeakCrossingSIVindependence; 1237 return true; 1238 } 1239 Result.DV[Level].Distance = Delta; // = 0 1240 return false; 1241 } 1242 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1243 if (!ConstCoeff) 1244 return false; 1245 1246 Result.DV[Level].Splitable = true; 1247 if (SE->isKnownNegative(ConstCoeff)) { 1248 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1249 assert(ConstCoeff && 1250 "dynamic cast of negative of ConstCoeff should yield constant"); 1251 Delta = SE->getNegativeSCEV(Delta); 1252 } 1253 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1254 1255 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1256 SplitIter = SE->getUDivExpr( 1257 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1258 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1259 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1260 1261 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1262 if (!ConstDelta) 1263 return false; 1264 1265 // We're certain that ConstCoeff > 0; therefore, 1266 // if Delta < 0, then no dependence. 1267 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1268 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1269 if (SE->isKnownNegative(Delta)) { 1270 // No dependence, Delta < 0 1271 ++WeakCrossingSIVindependence; 1272 ++WeakCrossingSIVsuccesses; 1273 return true; 1274 } 1275 1276 // We're certain that Delta > 0 and ConstCoeff > 0. 1277 // Check Delta/(2*ConstCoeff) against upper loop bound 1278 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1279 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1280 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1281 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1282 ConstantTwo); 1283 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1284 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1285 // Delta too big, no dependence 1286 ++WeakCrossingSIVindependence; 1287 ++WeakCrossingSIVsuccesses; 1288 return true; 1289 } 1290 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1291 // i = i' = UB 1292 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1293 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1294 ++WeakCrossingSIVsuccesses; 1295 if (!Result.DV[Level].Direction) { 1296 ++WeakCrossingSIVindependence; 1297 return true; 1298 } 1299 Result.DV[Level].Splitable = false; 1300 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1301 return false; 1302 } 1303 } 1304 1305 // check that Coeff divides Delta 1306 APInt APDelta = ConstDelta->getAPInt(); 1307 APInt APCoeff = ConstCoeff->getAPInt(); 1308 APInt Distance = APDelta; // these need to be initialzed 1309 APInt Remainder = APDelta; 1310 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1311 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1312 if (Remainder != 0) { 1313 // Coeff doesn't divide Delta, no dependence 1314 ++WeakCrossingSIVindependence; 1315 ++WeakCrossingSIVsuccesses; 1316 return true; 1317 } 1318 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1319 1320 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1321 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1322 Remainder = Distance.srem(Two); 1323 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1324 if (Remainder != 0) { 1325 // Equal direction isn't possible 1326 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1327 ++WeakCrossingSIVsuccesses; 1328 } 1329 return false; 1330 } 1331 1332 1333 // Kirch's algorithm, from 1334 // 1335 // Optimizing Supercompilers for Supercomputers 1336 // Michael Wolfe 1337 // MIT Press, 1989 1338 // 1339 // Program 2.1, page 29. 1340 // Computes the GCD of AM and BM. 1341 // Also finds a solution to the equation ax - by = gcd(a, b). 1342 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1343 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1344 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1345 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1346 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1347 APInt G0 = AM.abs(); 1348 APInt G1 = BM.abs(); 1349 APInt Q = G0; // these need to be initialized 1350 APInt R = G0; 1351 APInt::sdivrem(G0, G1, Q, R); 1352 while (R != 0) { 1353 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1354 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1355 G0 = G1; G1 = R; 1356 APInt::sdivrem(G0, G1, Q, R); 1357 } 1358 G = G1; 1359 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1360 X = AM.slt(0) ? -A1 : A1; 1361 Y = BM.slt(0) ? B1 : -B1; 1362 1363 // make sure gcd divides Delta 1364 R = Delta.srem(G); 1365 if (R != 0) 1366 return true; // gcd doesn't divide Delta, no dependence 1367 Q = Delta.sdiv(G); 1368 X *= Q; 1369 Y *= Q; 1370 return false; 1371 } 1372 1373 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1374 APInt Q = A; // these need to be initialized 1375 APInt R = A; 1376 APInt::sdivrem(A, B, Q, R); 1377 if (R == 0) 1378 return Q; 1379 if ((A.sgt(0) && B.sgt(0)) || 1380 (A.slt(0) && B.slt(0))) 1381 return Q; 1382 else 1383 return Q - 1; 1384 } 1385 1386 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1387 APInt Q = A; // these need to be initialized 1388 APInt R = A; 1389 APInt::sdivrem(A, B, Q, R); 1390 if (R == 0) 1391 return Q; 1392 if ((A.sgt(0) && B.sgt(0)) || 1393 (A.slt(0) && B.slt(0))) 1394 return Q + 1; 1395 else 1396 return Q; 1397 } 1398 1399 1400 static 1401 APInt maxAPInt(APInt A, APInt B) { 1402 return A.sgt(B) ? A : B; 1403 } 1404 1405 1406 static 1407 APInt minAPInt(APInt A, APInt B) { 1408 return A.slt(B) ? A : B; 1409 } 1410 1411 1412 // exactSIVtest - 1413 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1414 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1415 // and a2 are constant, we can solve it exactly using an algorithm developed 1416 // by Banerjee and Wolfe. See Section 2.5.3 in 1417 // 1418 // Optimizing Supercompilers for Supercomputers 1419 // Michael Wolfe 1420 // MIT Press, 1989 1421 // 1422 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1423 // so use them if possible. They're also a bit better with symbolics and, 1424 // in the case of the strong SIV test, can compute Distances. 1425 // 1426 // Return true if dependence disproved. 1427 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1428 const SCEV *SrcConst, const SCEV *DstConst, 1429 const Loop *CurLoop, unsigned Level, 1430 FullDependence &Result, 1431 Constraint &NewConstraint) const { 1432 DEBUG(dbgs() << "\tExact SIV test\n"); 1433 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1434 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1435 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1436 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1437 ++ExactSIVapplications; 1438 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1439 Level--; 1440 Result.Consistent = false; 1441 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1442 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1443 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1444 Delta, CurLoop); 1445 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1446 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1447 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1448 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1449 return false; 1450 1451 // find gcd 1452 APInt G, X, Y; 1453 APInt AM = ConstSrcCoeff->getAPInt(); 1454 APInt BM = ConstDstCoeff->getAPInt(); 1455 unsigned Bits = AM.getBitWidth(); 1456 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1457 // gcd doesn't divide Delta, no dependence 1458 ++ExactSIVindependence; 1459 ++ExactSIVsuccesses; 1460 return true; 1461 } 1462 1463 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1464 1465 // since SCEV construction normalizes, LM = 0 1466 APInt UM(Bits, 1, true); 1467 bool UMvalid = false; 1468 // UM is perhaps unavailable, let's check 1469 if (const SCEVConstant *CUB = 1470 collectConstantUpperBound(CurLoop, Delta->getType())) { 1471 UM = CUB->getAPInt(); 1472 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1473 UMvalid = true; 1474 } 1475 1476 APInt TU(APInt::getSignedMaxValue(Bits)); 1477 APInt TL(APInt::getSignedMinValue(Bits)); 1478 1479 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1480 APInt TMUL = BM.sdiv(G); 1481 if (TMUL.sgt(0)) { 1482 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1483 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1484 if (UMvalid) { 1485 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1486 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1487 } 1488 } 1489 else { 1490 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1491 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1492 if (UMvalid) { 1493 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1494 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1495 } 1496 } 1497 1498 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1499 TMUL = AM.sdiv(G); 1500 if (TMUL.sgt(0)) { 1501 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1502 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1503 if (UMvalid) { 1504 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1505 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1506 } 1507 } 1508 else { 1509 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1510 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1511 if (UMvalid) { 1512 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1513 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1514 } 1515 } 1516 if (TL.sgt(TU)) { 1517 ++ExactSIVindependence; 1518 ++ExactSIVsuccesses; 1519 return true; 1520 } 1521 1522 // explore directions 1523 unsigned NewDirection = Dependence::DVEntry::NONE; 1524 1525 // less than 1526 APInt SaveTU(TU); // save these 1527 APInt SaveTL(TL); 1528 DEBUG(dbgs() << "\t exploring LT direction\n"); 1529 TMUL = AM - BM; 1530 if (TMUL.sgt(0)) { 1531 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1532 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1533 } 1534 else { 1535 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1536 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1537 } 1538 if (TL.sle(TU)) { 1539 NewDirection |= Dependence::DVEntry::LT; 1540 ++ExactSIVsuccesses; 1541 } 1542 1543 // equal 1544 TU = SaveTU; // restore 1545 TL = SaveTL; 1546 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1547 if (TMUL.sgt(0)) { 1548 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1549 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1550 } 1551 else { 1552 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1553 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1554 } 1555 TMUL = BM - AM; 1556 if (TMUL.sgt(0)) { 1557 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1558 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1559 } 1560 else { 1561 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1562 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1563 } 1564 if (TL.sle(TU)) { 1565 NewDirection |= Dependence::DVEntry::EQ; 1566 ++ExactSIVsuccesses; 1567 } 1568 1569 // greater than 1570 TU = SaveTU; // restore 1571 TL = SaveTL; 1572 DEBUG(dbgs() << "\t exploring GT direction\n"); 1573 if (TMUL.sgt(0)) { 1574 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1575 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1576 } 1577 else { 1578 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1579 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1580 } 1581 if (TL.sle(TU)) { 1582 NewDirection |= Dependence::DVEntry::GT; 1583 ++ExactSIVsuccesses; 1584 } 1585 1586 // finished 1587 Result.DV[Level].Direction &= NewDirection; 1588 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1589 ++ExactSIVindependence; 1590 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1591 } 1592 1593 1594 1595 // Return true if the divisor evenly divides the dividend. 1596 static 1597 bool isRemainderZero(const SCEVConstant *Dividend, 1598 const SCEVConstant *Divisor) { 1599 const APInt &ConstDividend = Dividend->getAPInt(); 1600 const APInt &ConstDivisor = Divisor->getAPInt(); 1601 return ConstDividend.srem(ConstDivisor) == 0; 1602 } 1603 1604 1605 // weakZeroSrcSIVtest - 1606 // From the paper, Practical Dependence Testing, Section 4.2.2 1607 // 1608 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1609 // where i is an induction variable, c1 and c2 are loop invariant, 1610 // and a is a constant, we can solve it exactly using the 1611 // Weak-Zero SIV test. 1612 // 1613 // Given 1614 // 1615 // c1 = c2 + a*i 1616 // 1617 // we get 1618 // 1619 // (c1 - c2)/a = i 1620 // 1621 // If i is not an integer, there's no dependence. 1622 // If i < 0 or > UB, there's no dependence. 1623 // If i = 0, the direction is <= and peeling the 1624 // 1st iteration will break the dependence. 1625 // If i = UB, the direction is >= and peeling the 1626 // last iteration will break the dependence. 1627 // Otherwise, the direction is *. 1628 // 1629 // Can prove independence. Failing that, we can sometimes refine 1630 // the directions. Can sometimes show that first or last 1631 // iteration carries all the dependences (so worth peeling). 1632 // 1633 // (see also weakZeroDstSIVtest) 1634 // 1635 // Return true if dependence disproved. 1636 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1637 const SCEV *SrcConst, 1638 const SCEV *DstConst, 1639 const Loop *CurLoop, unsigned Level, 1640 FullDependence &Result, 1641 Constraint &NewConstraint) const { 1642 // For the WeakSIV test, it's possible the loop isn't common to 1643 // the Src and Dst loops. If it isn't, then there's no need to 1644 // record a direction. 1645 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1646 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1647 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1648 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1649 ++WeakZeroSIVapplications; 1650 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1651 Level--; 1652 Result.Consistent = false; 1653 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1654 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1655 CurLoop); 1656 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1657 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1658 if (Level < CommonLevels) { 1659 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1660 Result.DV[Level].PeelFirst = true; 1661 ++WeakZeroSIVsuccesses; 1662 } 1663 return false; // dependences caused by first iteration 1664 } 1665 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1666 if (!ConstCoeff) 1667 return false; 1668 const SCEV *AbsCoeff = 1669 SE->isKnownNegative(ConstCoeff) ? 1670 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1671 const SCEV *NewDelta = 1672 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1673 1674 // check that Delta/SrcCoeff < iteration count 1675 // really check NewDelta < count*AbsCoeff 1676 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1677 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1678 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1679 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1680 ++WeakZeroSIVindependence; 1681 ++WeakZeroSIVsuccesses; 1682 return true; 1683 } 1684 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1685 // dependences caused by last iteration 1686 if (Level < CommonLevels) { 1687 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1688 Result.DV[Level].PeelLast = true; 1689 ++WeakZeroSIVsuccesses; 1690 } 1691 return false; 1692 } 1693 } 1694 1695 // check that Delta/SrcCoeff >= 0 1696 // really check that NewDelta >= 0 1697 if (SE->isKnownNegative(NewDelta)) { 1698 // No dependence, newDelta < 0 1699 ++WeakZeroSIVindependence; 1700 ++WeakZeroSIVsuccesses; 1701 return true; 1702 } 1703 1704 // if SrcCoeff doesn't divide Delta, then no dependence 1705 if (isa<SCEVConstant>(Delta) && 1706 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1707 ++WeakZeroSIVindependence; 1708 ++WeakZeroSIVsuccesses; 1709 return true; 1710 } 1711 return false; 1712 } 1713 1714 1715 // weakZeroDstSIVtest - 1716 // From the paper, Practical Dependence Testing, Section 4.2.2 1717 // 1718 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1719 // where i is an induction variable, c1 and c2 are loop invariant, 1720 // and a is a constant, we can solve it exactly using the 1721 // Weak-Zero SIV test. 1722 // 1723 // Given 1724 // 1725 // c1 + a*i = c2 1726 // 1727 // we get 1728 // 1729 // i = (c2 - c1)/a 1730 // 1731 // If i is not an integer, there's no dependence. 1732 // If i < 0 or > UB, there's no dependence. 1733 // If i = 0, the direction is <= and peeling the 1734 // 1st iteration will break the dependence. 1735 // If i = UB, the direction is >= and peeling the 1736 // last iteration will break the dependence. 1737 // Otherwise, the direction is *. 1738 // 1739 // Can prove independence. Failing that, we can sometimes refine 1740 // the directions. Can sometimes show that first or last 1741 // iteration carries all the dependences (so worth peeling). 1742 // 1743 // (see also weakZeroSrcSIVtest) 1744 // 1745 // Return true if dependence disproved. 1746 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1747 const SCEV *SrcConst, 1748 const SCEV *DstConst, 1749 const Loop *CurLoop, unsigned Level, 1750 FullDependence &Result, 1751 Constraint &NewConstraint) const { 1752 // For the WeakSIV test, it's possible the loop isn't common to the 1753 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1754 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1755 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1756 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1757 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1758 ++WeakZeroSIVapplications; 1759 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1760 Level--; 1761 Result.Consistent = false; 1762 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1763 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1764 CurLoop); 1765 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1766 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1767 if (Level < CommonLevels) { 1768 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1769 Result.DV[Level].PeelFirst = true; 1770 ++WeakZeroSIVsuccesses; 1771 } 1772 return false; // dependences caused by first iteration 1773 } 1774 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1775 if (!ConstCoeff) 1776 return false; 1777 const SCEV *AbsCoeff = 1778 SE->isKnownNegative(ConstCoeff) ? 1779 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1780 const SCEV *NewDelta = 1781 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1782 1783 // check that Delta/SrcCoeff < iteration count 1784 // really check NewDelta < count*AbsCoeff 1785 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1786 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1787 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1788 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1789 ++WeakZeroSIVindependence; 1790 ++WeakZeroSIVsuccesses; 1791 return true; 1792 } 1793 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1794 // dependences caused by last iteration 1795 if (Level < CommonLevels) { 1796 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1797 Result.DV[Level].PeelLast = true; 1798 ++WeakZeroSIVsuccesses; 1799 } 1800 return false; 1801 } 1802 } 1803 1804 // check that Delta/SrcCoeff >= 0 1805 // really check that NewDelta >= 0 1806 if (SE->isKnownNegative(NewDelta)) { 1807 // No dependence, newDelta < 0 1808 ++WeakZeroSIVindependence; 1809 ++WeakZeroSIVsuccesses; 1810 return true; 1811 } 1812 1813 // if SrcCoeff doesn't divide Delta, then no dependence 1814 if (isa<SCEVConstant>(Delta) && 1815 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1816 ++WeakZeroSIVindependence; 1817 ++WeakZeroSIVsuccesses; 1818 return true; 1819 } 1820 return false; 1821 } 1822 1823 1824 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1825 // Things of the form [c1 + a*i] and [c2 + b*j], 1826 // where i and j are induction variable, c1 and c2 are loop invariant, 1827 // and a and b are constants. 1828 // Returns true if any possible dependence is disproved. 1829 // Marks the result as inconsistent. 1830 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1831 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1832 const SCEV *SrcConst, const SCEV *DstConst, 1833 const Loop *SrcLoop, const Loop *DstLoop, 1834 FullDependence &Result) const { 1835 DEBUG(dbgs() << "\tExact RDIV test\n"); 1836 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1837 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1838 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1839 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1840 ++ExactRDIVapplications; 1841 Result.Consistent = false; 1842 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1843 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1844 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1845 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1846 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1847 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1848 return false; 1849 1850 // find gcd 1851 APInt G, X, Y; 1852 APInt AM = ConstSrcCoeff->getAPInt(); 1853 APInt BM = ConstDstCoeff->getAPInt(); 1854 unsigned Bits = AM.getBitWidth(); 1855 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1856 // gcd doesn't divide Delta, no dependence 1857 ++ExactRDIVindependence; 1858 return true; 1859 } 1860 1861 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1862 1863 // since SCEV construction seems to normalize, LM = 0 1864 APInt SrcUM(Bits, 1, true); 1865 bool SrcUMvalid = false; 1866 // SrcUM is perhaps unavailable, let's check 1867 if (const SCEVConstant *UpperBound = 1868 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1869 SrcUM = UpperBound->getAPInt(); 1870 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1871 SrcUMvalid = true; 1872 } 1873 1874 APInt DstUM(Bits, 1, true); 1875 bool DstUMvalid = false; 1876 // UM is perhaps unavailable, let's check 1877 if (const SCEVConstant *UpperBound = 1878 collectConstantUpperBound(DstLoop, Delta->getType())) { 1879 DstUM = UpperBound->getAPInt(); 1880 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1881 DstUMvalid = true; 1882 } 1883 1884 APInt TU(APInt::getSignedMaxValue(Bits)); 1885 APInt TL(APInt::getSignedMinValue(Bits)); 1886 1887 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1888 APInt TMUL = BM.sdiv(G); 1889 if (TMUL.sgt(0)) { 1890 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1891 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1892 if (SrcUMvalid) { 1893 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1894 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1895 } 1896 } 1897 else { 1898 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1899 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1900 if (SrcUMvalid) { 1901 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1902 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1903 } 1904 } 1905 1906 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1907 TMUL = AM.sdiv(G); 1908 if (TMUL.sgt(0)) { 1909 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1910 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1911 if (DstUMvalid) { 1912 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1913 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1914 } 1915 } 1916 else { 1917 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1918 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1919 if (DstUMvalid) { 1920 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1921 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1922 } 1923 } 1924 if (TL.sgt(TU)) 1925 ++ExactRDIVindependence; 1926 return TL.sgt(TU); 1927 } 1928 1929 1930 // symbolicRDIVtest - 1931 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1932 // introduce a special case of Banerjee's Inequalities (also called the 1933 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1934 // particularly cases with symbolics. Since it's only able to disprove 1935 // dependence (not compute distances or directions), we'll use it as a 1936 // fall back for the other tests. 1937 // 1938 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1939 // where i and j are induction variables and c1 and c2 are loop invariants, 1940 // we can use the symbolic tests to disprove some dependences, serving as a 1941 // backup for the RDIV test. Note that i and j can be the same variable, 1942 // letting this test serve as a backup for the various SIV tests. 1943 // 1944 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1945 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1946 // loop bounds for the i and j loops, respectively. So, ... 1947 // 1948 // c1 + a1*i = c2 + a2*j 1949 // a1*i - a2*j = c2 - c1 1950 // 1951 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1952 // range of the maximum and minimum possible values of a1*i - a2*j. 1953 // Considering the signs of a1 and a2, we have 4 possible cases: 1954 // 1955 // 1) If a1 >= 0 and a2 >= 0, then 1956 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1957 // -a2*N2 <= c2 - c1 <= a1*N1 1958 // 1959 // 2) If a1 >= 0 and a2 <= 0, then 1960 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1961 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1962 // 1963 // 3) If a1 <= 0 and a2 >= 0, then 1964 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1965 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1966 // 1967 // 4) If a1 <= 0 and a2 <= 0, then 1968 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1969 // a1*N1 <= c2 - c1 <= -a2*N2 1970 // 1971 // return true if dependence disproved 1972 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 1973 const SCEV *C1, const SCEV *C2, 1974 const Loop *Loop1, 1975 const Loop *Loop2) const { 1976 ++SymbolicRDIVapplications; 1977 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1978 DEBUG(dbgs() << "\t A1 = " << *A1); 1979 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1980 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1981 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1982 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1983 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1984 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1985 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1986 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1987 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1988 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1989 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1990 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1991 if (SE->isKnownNonNegative(A1)) { 1992 if (SE->isKnownNonNegative(A2)) { 1993 // A1 >= 0 && A2 >= 0 1994 if (N1) { 1995 // make sure that c2 - c1 <= a1*N1 1996 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1997 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1998 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1999 ++SymbolicRDIVindependence; 2000 return true; 2001 } 2002 } 2003 if (N2) { 2004 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2005 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2006 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2007 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2008 ++SymbolicRDIVindependence; 2009 return true; 2010 } 2011 } 2012 } 2013 else if (SE->isKnownNonPositive(A2)) { 2014 // a1 >= 0 && a2 <= 0 2015 if (N1 && N2) { 2016 // make sure that c2 - c1 <= a1*N1 - a2*N2 2017 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2018 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2019 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2020 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2021 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2022 ++SymbolicRDIVindependence; 2023 return true; 2024 } 2025 } 2026 // make sure that 0 <= c2 - c1 2027 if (SE->isKnownNegative(C2_C1)) { 2028 ++SymbolicRDIVindependence; 2029 return true; 2030 } 2031 } 2032 } 2033 else if (SE->isKnownNonPositive(A1)) { 2034 if (SE->isKnownNonNegative(A2)) { 2035 // a1 <= 0 && a2 >= 0 2036 if (N1 && N2) { 2037 // make sure that a1*N1 - a2*N2 <= c2 - c1 2038 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2039 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2040 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2041 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2042 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2043 ++SymbolicRDIVindependence; 2044 return true; 2045 } 2046 } 2047 // make sure that c2 - c1 <= 0 2048 if (SE->isKnownPositive(C2_C1)) { 2049 ++SymbolicRDIVindependence; 2050 return true; 2051 } 2052 } 2053 else if (SE->isKnownNonPositive(A2)) { 2054 // a1 <= 0 && a2 <= 0 2055 if (N1) { 2056 // make sure that a1*N1 <= c2 - c1 2057 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2058 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2059 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2060 ++SymbolicRDIVindependence; 2061 return true; 2062 } 2063 } 2064 if (N2) { 2065 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2066 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2067 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2068 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2069 ++SymbolicRDIVindependence; 2070 return true; 2071 } 2072 } 2073 } 2074 } 2075 return false; 2076 } 2077 2078 2079 // testSIV - 2080 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2081 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2082 // a2 are constant, we attack it with an SIV test. While they can all be 2083 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2084 // they apply; they're cheaper and sometimes more precise. 2085 // 2086 // Return true if dependence disproved. 2087 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2088 FullDependence &Result, Constraint &NewConstraint, 2089 const SCEV *&SplitIter) const { 2090 DEBUG(dbgs() << " src = " << *Src << "\n"); 2091 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2092 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2093 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2094 if (SrcAddRec && DstAddRec) { 2095 const SCEV *SrcConst = SrcAddRec->getStart(); 2096 const SCEV *DstConst = DstAddRec->getStart(); 2097 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2098 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2099 const Loop *CurLoop = SrcAddRec->getLoop(); 2100 assert(CurLoop == DstAddRec->getLoop() && 2101 "both loops in SIV should be same"); 2102 Level = mapSrcLoop(CurLoop); 2103 bool disproven; 2104 if (SrcCoeff == DstCoeff) 2105 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2106 Level, Result, NewConstraint); 2107 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2108 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2109 Level, Result, NewConstraint, SplitIter); 2110 else 2111 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2112 Level, Result, NewConstraint); 2113 return disproven || 2114 gcdMIVtest(Src, Dst, Result) || 2115 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2116 } 2117 if (SrcAddRec) { 2118 const SCEV *SrcConst = SrcAddRec->getStart(); 2119 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2120 const SCEV *DstConst = Dst; 2121 const Loop *CurLoop = SrcAddRec->getLoop(); 2122 Level = mapSrcLoop(CurLoop); 2123 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2124 Level, Result, NewConstraint) || 2125 gcdMIVtest(Src, Dst, Result); 2126 } 2127 if (DstAddRec) { 2128 const SCEV *DstConst = DstAddRec->getStart(); 2129 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2130 const SCEV *SrcConst = Src; 2131 const Loop *CurLoop = DstAddRec->getLoop(); 2132 Level = mapDstLoop(CurLoop); 2133 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2134 CurLoop, Level, Result, NewConstraint) || 2135 gcdMIVtest(Src, Dst, Result); 2136 } 2137 llvm_unreachable("SIV test expected at least one AddRec"); 2138 return false; 2139 } 2140 2141 2142 // testRDIV - 2143 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2144 // where i and j are induction variables, c1 and c2 are loop invariant, 2145 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2146 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2147 // It doesn't make sense to talk about distance or direction in this case, 2148 // so there's no point in making special versions of the Strong SIV test or 2149 // the Weak-crossing SIV test. 2150 // 2151 // With minor algebra, this test can also be used for things like 2152 // [c1 + a1*i + a2*j][c2]. 2153 // 2154 // Return true if dependence disproved. 2155 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2156 FullDependence &Result) const { 2157 // we have 3 possible situations here: 2158 // 1) [a*i + b] and [c*j + d] 2159 // 2) [a*i + c*j + b] and [d] 2160 // 3) [b] and [a*i + c*j + d] 2161 // We need to find what we've got and get organized 2162 2163 const SCEV *SrcConst, *DstConst; 2164 const SCEV *SrcCoeff, *DstCoeff; 2165 const Loop *SrcLoop, *DstLoop; 2166 2167 DEBUG(dbgs() << " src = " << *Src << "\n"); 2168 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2169 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2170 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2171 if (SrcAddRec && DstAddRec) { 2172 SrcConst = SrcAddRec->getStart(); 2173 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2174 SrcLoop = SrcAddRec->getLoop(); 2175 DstConst = DstAddRec->getStart(); 2176 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2177 DstLoop = DstAddRec->getLoop(); 2178 } 2179 else if (SrcAddRec) { 2180 if (const SCEVAddRecExpr *tmpAddRec = 2181 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2182 SrcConst = tmpAddRec->getStart(); 2183 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2184 SrcLoop = tmpAddRec->getLoop(); 2185 DstConst = Dst; 2186 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2187 DstLoop = SrcAddRec->getLoop(); 2188 } 2189 else 2190 llvm_unreachable("RDIV reached by surprising SCEVs"); 2191 } 2192 else if (DstAddRec) { 2193 if (const SCEVAddRecExpr *tmpAddRec = 2194 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2195 DstConst = tmpAddRec->getStart(); 2196 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2197 DstLoop = tmpAddRec->getLoop(); 2198 SrcConst = Src; 2199 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2200 SrcLoop = DstAddRec->getLoop(); 2201 } 2202 else 2203 llvm_unreachable("RDIV reached by surprising SCEVs"); 2204 } 2205 else 2206 llvm_unreachable("RDIV expected at least one AddRec"); 2207 return exactRDIVtest(SrcCoeff, DstCoeff, 2208 SrcConst, DstConst, 2209 SrcLoop, DstLoop, 2210 Result) || 2211 gcdMIVtest(Src, Dst, Result) || 2212 symbolicRDIVtest(SrcCoeff, DstCoeff, 2213 SrcConst, DstConst, 2214 SrcLoop, DstLoop); 2215 } 2216 2217 2218 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2219 // Return true if dependence disproved. 2220 // Can sometimes refine direction vectors. 2221 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2222 const SmallBitVector &Loops, 2223 FullDependence &Result) const { 2224 DEBUG(dbgs() << " src = " << *Src << "\n"); 2225 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2226 Result.Consistent = false; 2227 return gcdMIVtest(Src, Dst, Result) || 2228 banerjeeMIVtest(Src, Dst, Loops, Result); 2229 } 2230 2231 2232 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2233 // in this case 10. If there is no constant part, returns NULL. 2234 static 2235 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2236 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2237 return Constant; 2238 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2239 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2240 return Constant; 2241 return nullptr; 2242 } 2243 2244 2245 //===----------------------------------------------------------------------===// 2246 // gcdMIVtest - 2247 // Tests an MIV subscript pair for dependence. 2248 // Returns true if any possible dependence is disproved. 2249 // Marks the result as inconsistent. 2250 // Can sometimes disprove the equal direction for 1 or more loops, 2251 // as discussed in Michael Wolfe's book, 2252 // High Performance Compilers for Parallel Computing, page 235. 2253 // 2254 // We spend some effort (code!) to handle cases like 2255 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2256 // but M and N are just loop-invariant variables. 2257 // This should help us handle linearized subscripts; 2258 // also makes this test a useful backup to the various SIV tests. 2259 // 2260 // It occurs to me that the presence of loop-invariant variables 2261 // changes the nature of the test from "greatest common divisor" 2262 // to "a common divisor". 2263 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2264 FullDependence &Result) const { 2265 DEBUG(dbgs() << "starting gcd\n"); 2266 ++GCDapplications; 2267 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2268 APInt RunningGCD = APInt::getNullValue(BitWidth); 2269 2270 // Examine Src coefficients. 2271 // Compute running GCD and record source constant. 2272 // Because we're looking for the constant at the end of the chain, 2273 // we can't quit the loop just because the GCD == 1. 2274 const SCEV *Coefficients = Src; 2275 while (const SCEVAddRecExpr *AddRec = 2276 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2277 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2278 // If the coefficient is the product of a constant and other stuff, 2279 // we can use the constant in the GCD computation. 2280 const auto *Constant = getConstantPart(Coeff); 2281 if (!Constant) 2282 return false; 2283 APInt ConstCoeff = Constant->getAPInt(); 2284 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2285 Coefficients = AddRec->getStart(); 2286 } 2287 const SCEV *SrcConst = Coefficients; 2288 2289 // Examine Dst coefficients. 2290 // Compute running GCD and record destination constant. 2291 // Because we're looking for the constant at the end of the chain, 2292 // we can't quit the loop just because the GCD == 1. 2293 Coefficients = Dst; 2294 while (const SCEVAddRecExpr *AddRec = 2295 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2296 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2297 // If the coefficient is the product of a constant and other stuff, 2298 // we can use the constant in the GCD computation. 2299 const auto *Constant = getConstantPart(Coeff); 2300 if (!Constant) 2301 return false; 2302 APInt ConstCoeff = Constant->getAPInt(); 2303 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2304 Coefficients = AddRec->getStart(); 2305 } 2306 const SCEV *DstConst = Coefficients; 2307 2308 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2309 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2310 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2311 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2312 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2313 // If Delta is a sum of products, we may be able to make further progress. 2314 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2315 const SCEV *Operand = Sum->getOperand(Op); 2316 if (isa<SCEVConstant>(Operand)) { 2317 assert(!Constant && "Surprised to find multiple constants"); 2318 Constant = cast<SCEVConstant>(Operand); 2319 } 2320 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2321 // Search for constant operand to participate in GCD; 2322 // If none found; return false. 2323 const SCEVConstant *ConstOp = getConstantPart(Product); 2324 if (!ConstOp) 2325 return false; 2326 APInt ConstOpValue = ConstOp->getAPInt(); 2327 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2328 ConstOpValue.abs()); 2329 } 2330 else 2331 return false; 2332 } 2333 } 2334 if (!Constant) 2335 return false; 2336 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2337 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2338 if (ConstDelta == 0) 2339 return false; 2340 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2341 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2342 APInt Remainder = ConstDelta.srem(RunningGCD); 2343 if (Remainder != 0) { 2344 ++GCDindependence; 2345 return true; 2346 } 2347 2348 // Try to disprove equal directions. 2349 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2350 // the code above can't disprove the dependence because the GCD = 1. 2351 // So we consider what happen if i = i' and what happens if j = j'. 2352 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2353 // which is infeasible, so we can disallow the = direction for the i level. 2354 // Setting j = j' doesn't help matters, so we end up with a direction vector 2355 // of [<>, *] 2356 // 2357 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2358 // we need to remember that the constant part is 5 and the RunningGCD should 2359 // be initialized to ExtraGCD = 30. 2360 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2361 2362 bool Improved = false; 2363 Coefficients = Src; 2364 while (const SCEVAddRecExpr *AddRec = 2365 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2366 Coefficients = AddRec->getStart(); 2367 const Loop *CurLoop = AddRec->getLoop(); 2368 RunningGCD = ExtraGCD; 2369 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2370 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2371 const SCEV *Inner = Src; 2372 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2373 AddRec = cast<SCEVAddRecExpr>(Inner); 2374 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2375 if (CurLoop == AddRec->getLoop()) 2376 ; // SrcCoeff == Coeff 2377 else { 2378 // If the coefficient is the product of a constant and other stuff, 2379 // we can use the constant in the GCD computation. 2380 Constant = getConstantPart(Coeff); 2381 if (!Constant) 2382 return false; 2383 APInt ConstCoeff = Constant->getAPInt(); 2384 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2385 } 2386 Inner = AddRec->getStart(); 2387 } 2388 Inner = Dst; 2389 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2390 AddRec = cast<SCEVAddRecExpr>(Inner); 2391 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2392 if (CurLoop == AddRec->getLoop()) 2393 DstCoeff = Coeff; 2394 else { 2395 // If the coefficient is the product of a constant and other stuff, 2396 // we can use the constant in the GCD computation. 2397 Constant = getConstantPart(Coeff); 2398 if (!Constant) 2399 return false; 2400 APInt ConstCoeff = Constant->getAPInt(); 2401 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2402 } 2403 Inner = AddRec->getStart(); 2404 } 2405 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2406 // If the coefficient is the product of a constant and other stuff, 2407 // we can use the constant in the GCD computation. 2408 Constant = getConstantPart(Delta); 2409 if (!Constant) 2410 // The difference of the two coefficients might not be a product 2411 // or constant, in which case we give up on this direction. 2412 continue; 2413 APInt ConstCoeff = Constant->getAPInt(); 2414 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2415 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2416 if (RunningGCD != 0) { 2417 Remainder = ConstDelta.srem(RunningGCD); 2418 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2419 if (Remainder != 0) { 2420 unsigned Level = mapSrcLoop(CurLoop); 2421 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2422 Improved = true; 2423 } 2424 } 2425 } 2426 if (Improved) 2427 ++GCDsuccesses; 2428 DEBUG(dbgs() << "all done\n"); 2429 return false; 2430 } 2431 2432 2433 //===----------------------------------------------------------------------===// 2434 // banerjeeMIVtest - 2435 // Use Banerjee's Inequalities to test an MIV subscript pair. 2436 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2437 // Generally follows the discussion in Section 2.5.2 of 2438 // 2439 // Optimizing Supercompilers for Supercomputers 2440 // Michael Wolfe 2441 // 2442 // The inequalities given on page 25 are simplified in that loops are 2443 // normalized so that the lower bound is always 0 and the stride is always 1. 2444 // For example, Wolfe gives 2445 // 2446 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2447 // 2448 // where A_k is the coefficient of the kth index in the source subscript, 2449 // B_k is the coefficient of the kth index in the destination subscript, 2450 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2451 // index, and N_k is the stride of the kth index. Since all loops are normalized 2452 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2453 // equation to 2454 // 2455 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2456 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2457 // 2458 // Similar simplifications are possible for the other equations. 2459 // 2460 // When we can't determine the number of iterations for a loop, 2461 // we use NULL as an indicator for the worst case, infinity. 2462 // When computing the upper bound, NULL denotes +inf; 2463 // for the lower bound, NULL denotes -inf. 2464 // 2465 // Return true if dependence disproved. 2466 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2467 const SmallBitVector &Loops, 2468 FullDependence &Result) const { 2469 DEBUG(dbgs() << "starting Banerjee\n"); 2470 ++BanerjeeApplications; 2471 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2472 const SCEV *A0; 2473 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2474 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2475 const SCEV *B0; 2476 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2477 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2478 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2479 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2480 2481 // Compute bounds for all the * directions. 2482 DEBUG(dbgs() << "\tBounds[*]\n"); 2483 for (unsigned K = 1; K <= MaxLevels; ++K) { 2484 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2485 Bound[K].Direction = Dependence::DVEntry::ALL; 2486 Bound[K].DirSet = Dependence::DVEntry::NONE; 2487 findBoundsALL(A, B, Bound, K); 2488 #ifndef NDEBUG 2489 DEBUG(dbgs() << "\t " << K << '\t'); 2490 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2491 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2492 else 2493 DEBUG(dbgs() << "-inf\t"); 2494 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2495 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2496 else 2497 DEBUG(dbgs() << "+inf\n"); 2498 #endif 2499 } 2500 2501 // Test the *, *, *, ... case. 2502 bool Disproved = false; 2503 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2504 // Explore the direction vector hierarchy. 2505 unsigned DepthExpanded = 0; 2506 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2507 Loops, DepthExpanded, Delta); 2508 if (NewDeps > 0) { 2509 bool Improved = false; 2510 for (unsigned K = 1; K <= CommonLevels; ++K) { 2511 if (Loops[K]) { 2512 unsigned Old = Result.DV[K - 1].Direction; 2513 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2514 Improved |= Old != Result.DV[K - 1].Direction; 2515 if (!Result.DV[K - 1].Direction) { 2516 Improved = false; 2517 Disproved = true; 2518 break; 2519 } 2520 } 2521 } 2522 if (Improved) 2523 ++BanerjeeSuccesses; 2524 } 2525 else { 2526 ++BanerjeeIndependence; 2527 Disproved = true; 2528 } 2529 } 2530 else { 2531 ++BanerjeeIndependence; 2532 Disproved = true; 2533 } 2534 delete [] Bound; 2535 delete [] A; 2536 delete [] B; 2537 return Disproved; 2538 } 2539 2540 2541 // Hierarchically expands the direction vector 2542 // search space, combining the directions of discovered dependences 2543 // in the DirSet field of Bound. Returns the number of distinct 2544 // dependences discovered. If the dependence is disproved, 2545 // it will return 0. 2546 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2547 CoefficientInfo *B, BoundInfo *Bound, 2548 const SmallBitVector &Loops, 2549 unsigned &DepthExpanded, 2550 const SCEV *Delta) const { 2551 if (Level > CommonLevels) { 2552 // record result 2553 DEBUG(dbgs() << "\t["); 2554 for (unsigned K = 1; K <= CommonLevels; ++K) { 2555 if (Loops[K]) { 2556 Bound[K].DirSet |= Bound[K].Direction; 2557 #ifndef NDEBUG 2558 switch (Bound[K].Direction) { 2559 case Dependence::DVEntry::LT: 2560 DEBUG(dbgs() << " <"); 2561 break; 2562 case Dependence::DVEntry::EQ: 2563 DEBUG(dbgs() << " ="); 2564 break; 2565 case Dependence::DVEntry::GT: 2566 DEBUG(dbgs() << " >"); 2567 break; 2568 case Dependence::DVEntry::ALL: 2569 DEBUG(dbgs() << " *"); 2570 break; 2571 default: 2572 llvm_unreachable("unexpected Bound[K].Direction"); 2573 } 2574 #endif 2575 } 2576 } 2577 DEBUG(dbgs() << " ]\n"); 2578 return 1; 2579 } 2580 if (Loops[Level]) { 2581 if (Level > DepthExpanded) { 2582 DepthExpanded = Level; 2583 // compute bounds for <, =, > at current level 2584 findBoundsLT(A, B, Bound, Level); 2585 findBoundsGT(A, B, Bound, Level); 2586 findBoundsEQ(A, B, Bound, Level); 2587 #ifndef NDEBUG 2588 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2589 DEBUG(dbgs() << "\t <\t"); 2590 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2591 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2592 else 2593 DEBUG(dbgs() << "-inf\t"); 2594 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2595 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2596 else 2597 DEBUG(dbgs() << "+inf\n"); 2598 DEBUG(dbgs() << "\t =\t"); 2599 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2600 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2601 else 2602 DEBUG(dbgs() << "-inf\t"); 2603 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2604 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2605 else 2606 DEBUG(dbgs() << "+inf\n"); 2607 DEBUG(dbgs() << "\t >\t"); 2608 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2609 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2610 else 2611 DEBUG(dbgs() << "-inf\t"); 2612 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2613 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2614 else 2615 DEBUG(dbgs() << "+inf\n"); 2616 #endif 2617 } 2618 2619 unsigned NewDeps = 0; 2620 2621 // test bounds for <, *, *, ... 2622 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2623 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2624 Loops, DepthExpanded, Delta); 2625 2626 // Test bounds for =, *, *, ... 2627 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2628 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2629 Loops, DepthExpanded, Delta); 2630 2631 // test bounds for >, *, *, ... 2632 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2633 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2634 Loops, DepthExpanded, Delta); 2635 2636 Bound[Level].Direction = Dependence::DVEntry::ALL; 2637 return NewDeps; 2638 } 2639 else 2640 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2641 } 2642 2643 2644 // Returns true iff the current bounds are plausible. 2645 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2646 BoundInfo *Bound, const SCEV *Delta) const { 2647 Bound[Level].Direction = DirKind; 2648 if (const SCEV *LowerBound = getLowerBound(Bound)) 2649 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2650 return false; 2651 if (const SCEV *UpperBound = getUpperBound(Bound)) 2652 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2653 return false; 2654 return true; 2655 } 2656 2657 2658 // Computes the upper and lower bounds for level K 2659 // using the * direction. Records them in Bound. 2660 // Wolfe gives the equations 2661 // 2662 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2663 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2664 // 2665 // Since we normalize loops, we can simplify these equations to 2666 // 2667 // LB^*_k = (A^-_k - B^+_k)U_k 2668 // UB^*_k = (A^+_k - B^-_k)U_k 2669 // 2670 // We must be careful to handle the case where the upper bound is unknown. 2671 // Note that the lower bound is always <= 0 2672 // and the upper bound is always >= 0. 2673 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2674 BoundInfo *Bound, unsigned K) const { 2675 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2676 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2677 if (Bound[K].Iterations) { 2678 Bound[K].Lower[Dependence::DVEntry::ALL] = 2679 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2680 Bound[K].Iterations); 2681 Bound[K].Upper[Dependence::DVEntry::ALL] = 2682 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2683 Bound[K].Iterations); 2684 } 2685 else { 2686 // If the difference is 0, we won't need to know the number of iterations. 2687 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2688 Bound[K].Lower[Dependence::DVEntry::ALL] = 2689 SE->getZero(A[K].Coeff->getType()); 2690 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2691 Bound[K].Upper[Dependence::DVEntry::ALL] = 2692 SE->getZero(A[K].Coeff->getType()); 2693 } 2694 } 2695 2696 2697 // Computes the upper and lower bounds for level K 2698 // using the = direction. Records them in Bound. 2699 // Wolfe gives the equations 2700 // 2701 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2702 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2703 // 2704 // Since we normalize loops, we can simplify these equations to 2705 // 2706 // LB^=_k = (A_k - B_k)^- U_k 2707 // UB^=_k = (A_k - B_k)^+ U_k 2708 // 2709 // We must be careful to handle the case where the upper bound is unknown. 2710 // Note that the lower bound is always <= 0 2711 // and the upper bound is always >= 0. 2712 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2713 BoundInfo *Bound, unsigned K) const { 2714 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2715 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2716 if (Bound[K].Iterations) { 2717 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2718 const SCEV *NegativePart = getNegativePart(Delta); 2719 Bound[K].Lower[Dependence::DVEntry::EQ] = 2720 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2721 const SCEV *PositivePart = getPositivePart(Delta); 2722 Bound[K].Upper[Dependence::DVEntry::EQ] = 2723 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2724 } 2725 else { 2726 // If the positive/negative part of the difference is 0, 2727 // we won't need to know the number of iterations. 2728 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2729 const SCEV *NegativePart = getNegativePart(Delta); 2730 if (NegativePart->isZero()) 2731 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2732 const SCEV *PositivePart = getPositivePart(Delta); 2733 if (PositivePart->isZero()) 2734 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2735 } 2736 } 2737 2738 2739 // Computes the upper and lower bounds for level K 2740 // using the < direction. Records them in Bound. 2741 // Wolfe gives the equations 2742 // 2743 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2744 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2745 // 2746 // Since we normalize loops, we can simplify these equations to 2747 // 2748 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2749 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2750 // 2751 // We must be careful to handle the case where the upper bound is unknown. 2752 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2753 BoundInfo *Bound, unsigned K) const { 2754 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2755 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2756 if (Bound[K].Iterations) { 2757 const SCEV *Iter_1 = SE->getMinusSCEV( 2758 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2759 const SCEV *NegPart = 2760 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2761 Bound[K].Lower[Dependence::DVEntry::LT] = 2762 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2763 const SCEV *PosPart = 2764 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2765 Bound[K].Upper[Dependence::DVEntry::LT] = 2766 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2767 } 2768 else { 2769 // If the positive/negative part of the difference is 0, 2770 // we won't need to know the number of iterations. 2771 const SCEV *NegPart = 2772 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2773 if (NegPart->isZero()) 2774 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2775 const SCEV *PosPart = 2776 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2777 if (PosPart->isZero()) 2778 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2779 } 2780 } 2781 2782 2783 // Computes the upper and lower bounds for level K 2784 // using the > direction. Records them in Bound. 2785 // Wolfe gives the equations 2786 // 2787 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2788 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2789 // 2790 // Since we normalize loops, we can simplify these equations to 2791 // 2792 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2793 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2794 // 2795 // We must be careful to handle the case where the upper bound is unknown. 2796 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2797 BoundInfo *Bound, unsigned K) const { 2798 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2799 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2800 if (Bound[K].Iterations) { 2801 const SCEV *Iter_1 = SE->getMinusSCEV( 2802 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2803 const SCEV *NegPart = 2804 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2805 Bound[K].Lower[Dependence::DVEntry::GT] = 2806 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2807 const SCEV *PosPart = 2808 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2809 Bound[K].Upper[Dependence::DVEntry::GT] = 2810 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2811 } 2812 else { 2813 // If the positive/negative part of the difference is 0, 2814 // we won't need to know the number of iterations. 2815 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2816 if (NegPart->isZero()) 2817 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2818 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2819 if (PosPart->isZero()) 2820 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2821 } 2822 } 2823 2824 2825 // X^+ = max(X, 0) 2826 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2827 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2828 } 2829 2830 2831 // X^- = min(X, 0) 2832 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2833 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2834 } 2835 2836 2837 // Walks through the subscript, 2838 // collecting each coefficient, the associated loop bounds, 2839 // and recording its positive and negative parts for later use. 2840 DependenceInfo::CoefficientInfo * 2841 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2842 const SCEV *&Constant) const { 2843 const SCEV *Zero = SE->getZero(Subscript->getType()); 2844 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2845 for (unsigned K = 1; K <= MaxLevels; ++K) { 2846 CI[K].Coeff = Zero; 2847 CI[K].PosPart = Zero; 2848 CI[K].NegPart = Zero; 2849 CI[K].Iterations = nullptr; 2850 } 2851 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2852 const Loop *L = AddRec->getLoop(); 2853 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2854 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2855 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2856 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2857 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2858 Subscript = AddRec->getStart(); 2859 } 2860 Constant = Subscript; 2861 #ifndef NDEBUG 2862 DEBUG(dbgs() << "\tCoefficient Info\n"); 2863 for (unsigned K = 1; K <= MaxLevels; ++K) { 2864 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2865 DEBUG(dbgs() << "\tPos Part = "); 2866 DEBUG(dbgs() << *CI[K].PosPart); 2867 DEBUG(dbgs() << "\tNeg Part = "); 2868 DEBUG(dbgs() << *CI[K].NegPart); 2869 DEBUG(dbgs() << "\tUpper Bound = "); 2870 if (CI[K].Iterations) 2871 DEBUG(dbgs() << *CI[K].Iterations); 2872 else 2873 DEBUG(dbgs() << "+inf"); 2874 DEBUG(dbgs() << '\n'); 2875 } 2876 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2877 #endif 2878 return CI; 2879 } 2880 2881 2882 // Looks through all the bounds info and 2883 // computes the lower bound given the current direction settings 2884 // at each level. If the lower bound for any level is -inf, 2885 // the result is -inf. 2886 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2887 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2888 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2889 if (Bound[K].Lower[Bound[K].Direction]) 2890 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2891 else 2892 Sum = nullptr; 2893 } 2894 return Sum; 2895 } 2896 2897 2898 // Looks through all the bounds info and 2899 // computes the upper bound given the current direction settings 2900 // at each level. If the upper bound at any level is +inf, 2901 // the result is +inf. 2902 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2903 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2904 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2905 if (Bound[K].Upper[Bound[K].Direction]) 2906 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2907 else 2908 Sum = nullptr; 2909 } 2910 return Sum; 2911 } 2912 2913 2914 //===----------------------------------------------------------------------===// 2915 // Constraint manipulation for Delta test. 2916 2917 // Given a linear SCEV, 2918 // return the coefficient (the step) 2919 // corresponding to the specified loop. 2920 // If there isn't one, return 0. 2921 // For example, given a*i + b*j + c*k, finding the coefficient 2922 // corresponding to the j loop would yield b. 2923 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2924 const Loop *TargetLoop) const { 2925 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2926 if (!AddRec) 2927 return SE->getZero(Expr->getType()); 2928 if (AddRec->getLoop() == TargetLoop) 2929 return AddRec->getStepRecurrence(*SE); 2930 return findCoefficient(AddRec->getStart(), TargetLoop); 2931 } 2932 2933 2934 // Given a linear SCEV, 2935 // return the SCEV given by zeroing out the coefficient 2936 // corresponding to the specified loop. 2937 // For example, given a*i + b*j + c*k, zeroing the coefficient 2938 // corresponding to the j loop would yield a*i + c*k. 2939 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 2940 const Loop *TargetLoop) const { 2941 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2942 if (!AddRec) 2943 return Expr; // ignore 2944 if (AddRec->getLoop() == TargetLoop) 2945 return AddRec->getStart(); 2946 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2947 AddRec->getStepRecurrence(*SE), 2948 AddRec->getLoop(), 2949 AddRec->getNoWrapFlags()); 2950 } 2951 2952 2953 // Given a linear SCEV Expr, 2954 // return the SCEV given by adding some Value to the 2955 // coefficient corresponding to the specified TargetLoop. 2956 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2957 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2958 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 2959 const Loop *TargetLoop, 2960 const SCEV *Value) const { 2961 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2962 if (!AddRec) // create a new addRec 2963 return SE->getAddRecExpr(Expr, 2964 Value, 2965 TargetLoop, 2966 SCEV::FlagAnyWrap); // Worst case, with no info. 2967 if (AddRec->getLoop() == TargetLoop) { 2968 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2969 if (Sum->isZero()) 2970 return AddRec->getStart(); 2971 return SE->getAddRecExpr(AddRec->getStart(), 2972 Sum, 2973 AddRec->getLoop(), 2974 AddRec->getNoWrapFlags()); 2975 } 2976 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2977 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2978 return SE->getAddRecExpr( 2979 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2980 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2981 AddRec->getNoWrapFlags()); 2982 } 2983 2984 2985 // Review the constraints, looking for opportunities 2986 // to simplify a subscript pair (Src and Dst). 2987 // Return true if some simplification occurs. 2988 // If the simplification isn't exact (that is, if it is conservative 2989 // in terms of dependence), set consistent to false. 2990 // Corresponds to Figure 5 from the paper 2991 // 2992 // Practical Dependence Testing 2993 // Goff, Kennedy, Tseng 2994 // PLDI 1991 2995 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 2996 SmallBitVector &Loops, 2997 SmallVectorImpl<Constraint> &Constraints, 2998 bool &Consistent) { 2999 bool Result = false; 3000 for (unsigned LI : Loops.set_bits()) { 3001 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3002 DEBUG(Constraints[LI].dump(dbgs())); 3003 if (Constraints[LI].isDistance()) 3004 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3005 else if (Constraints[LI].isLine()) 3006 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3007 else if (Constraints[LI].isPoint()) 3008 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3009 } 3010 return Result; 3011 } 3012 3013 3014 // Attempt to propagate a distance 3015 // constraint into a subscript pair (Src and Dst). 3016 // Return true if some simplification occurs. 3017 // If the simplification isn't exact (that is, if it is conservative 3018 // in terms of dependence), set consistent to false. 3019 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3020 Constraint &CurConstraint, 3021 bool &Consistent) { 3022 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3023 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3024 const SCEV *A_K = findCoefficient(Src, CurLoop); 3025 if (A_K->isZero()) 3026 return false; 3027 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3028 Src = SE->getMinusSCEV(Src, DA_K); 3029 Src = zeroCoefficient(Src, CurLoop); 3030 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3031 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3032 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3033 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3034 if (!findCoefficient(Dst, CurLoop)->isZero()) 3035 Consistent = false; 3036 return true; 3037 } 3038 3039 3040 // Attempt to propagate a line 3041 // constraint into a subscript pair (Src and Dst). 3042 // Return true if some simplification occurs. 3043 // If the simplification isn't exact (that is, if it is conservative 3044 // in terms of dependence), set consistent to false. 3045 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3046 Constraint &CurConstraint, 3047 bool &Consistent) { 3048 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3049 const SCEV *A = CurConstraint.getA(); 3050 const SCEV *B = CurConstraint.getB(); 3051 const SCEV *C = CurConstraint.getC(); 3052 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3053 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3054 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3055 if (A->isZero()) { 3056 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3057 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3058 if (!Bconst || !Cconst) return false; 3059 APInt Beta = Bconst->getAPInt(); 3060 APInt Charlie = Cconst->getAPInt(); 3061 APInt CdivB = Charlie.sdiv(Beta); 3062 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3063 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3064 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3065 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3066 Dst = zeroCoefficient(Dst, CurLoop); 3067 if (!findCoefficient(Src, CurLoop)->isZero()) 3068 Consistent = false; 3069 } 3070 else if (B->isZero()) { 3071 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3072 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3073 if (!Aconst || !Cconst) return false; 3074 APInt Alpha = Aconst->getAPInt(); 3075 APInt Charlie = Cconst->getAPInt(); 3076 APInt CdivA = Charlie.sdiv(Alpha); 3077 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3078 const SCEV *A_K = findCoefficient(Src, CurLoop); 3079 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3080 Src = zeroCoefficient(Src, CurLoop); 3081 if (!findCoefficient(Dst, CurLoop)->isZero()) 3082 Consistent = false; 3083 } 3084 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3085 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3086 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3087 if (!Aconst || !Cconst) return false; 3088 APInt Alpha = Aconst->getAPInt(); 3089 APInt Charlie = Cconst->getAPInt(); 3090 APInt CdivA = Charlie.sdiv(Alpha); 3091 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3092 const SCEV *A_K = findCoefficient(Src, CurLoop); 3093 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3094 Src = zeroCoefficient(Src, CurLoop); 3095 Dst = addToCoefficient(Dst, CurLoop, A_K); 3096 if (!findCoefficient(Dst, CurLoop)->isZero()) 3097 Consistent = false; 3098 } 3099 else { 3100 // paper is incorrect here, or perhaps just misleading 3101 const SCEV *A_K = findCoefficient(Src, CurLoop); 3102 Src = SE->getMulExpr(Src, A); 3103 Dst = SE->getMulExpr(Dst, A); 3104 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3105 Src = zeroCoefficient(Src, CurLoop); 3106 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3107 if (!findCoefficient(Dst, CurLoop)->isZero()) 3108 Consistent = false; 3109 } 3110 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3111 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3112 return true; 3113 } 3114 3115 3116 // Attempt to propagate a point 3117 // constraint into a subscript pair (Src and Dst). 3118 // Return true if some simplification occurs. 3119 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3120 Constraint &CurConstraint) { 3121 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3122 const SCEV *A_K = findCoefficient(Src, CurLoop); 3123 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3124 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3125 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3126 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3127 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3128 Src = zeroCoefficient(Src, CurLoop); 3129 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3130 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3131 Dst = zeroCoefficient(Dst, CurLoop); 3132 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3133 return true; 3134 } 3135 3136 3137 // Update direction vector entry based on the current constraint. 3138 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3139 const Constraint &CurConstraint) const { 3140 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3141 DEBUG(CurConstraint.dump(dbgs())); 3142 if (CurConstraint.isAny()) 3143 ; // use defaults 3144 else if (CurConstraint.isDistance()) { 3145 // this one is consistent, the others aren't 3146 Level.Scalar = false; 3147 Level.Distance = CurConstraint.getD(); 3148 unsigned NewDirection = Dependence::DVEntry::NONE; 3149 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3150 NewDirection = Dependence::DVEntry::EQ; 3151 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3152 NewDirection |= Dependence::DVEntry::LT; 3153 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3154 NewDirection |= Dependence::DVEntry::GT; 3155 Level.Direction &= NewDirection; 3156 } 3157 else if (CurConstraint.isLine()) { 3158 Level.Scalar = false; 3159 Level.Distance = nullptr; 3160 // direction should be accurate 3161 } 3162 else if (CurConstraint.isPoint()) { 3163 Level.Scalar = false; 3164 Level.Distance = nullptr; 3165 unsigned NewDirection = Dependence::DVEntry::NONE; 3166 if (!isKnownPredicate(CmpInst::ICMP_NE, 3167 CurConstraint.getY(), 3168 CurConstraint.getX())) 3169 // if X may be = Y 3170 NewDirection |= Dependence::DVEntry::EQ; 3171 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3172 CurConstraint.getY(), 3173 CurConstraint.getX())) 3174 // if Y may be > X 3175 NewDirection |= Dependence::DVEntry::LT; 3176 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3177 CurConstraint.getY(), 3178 CurConstraint.getX())) 3179 // if Y may be < X 3180 NewDirection |= Dependence::DVEntry::GT; 3181 Level.Direction &= NewDirection; 3182 } 3183 else 3184 llvm_unreachable("constraint has unexpected kind"); 3185 } 3186 3187 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3188 /// source and destination array references are recurrences on a nested loop, 3189 /// this function flattens the nested recurrences into separate recurrences 3190 /// for each loop level. 3191 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3192 SmallVectorImpl<Subscript> &Pair) { 3193 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3194 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3195 Value *SrcPtr = getLoadStorePointerOperand(Src); 3196 Value *DstPtr = getLoadStorePointerOperand(Dst); 3197 3198 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3199 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3200 3201 // Below code mimics the code in Delinearization.cpp 3202 const SCEV *SrcAccessFn = 3203 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3204 const SCEV *DstAccessFn = 3205 SE->getSCEVAtScope(DstPtr, DstLoop); 3206 3207 const SCEVUnknown *SrcBase = 3208 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3209 const SCEVUnknown *DstBase = 3210 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3211 3212 if (!SrcBase || !DstBase || SrcBase != DstBase) 3213 return false; 3214 3215 const SCEV *ElementSize = SE->getElementSize(Src); 3216 if (ElementSize != SE->getElementSize(Dst)) 3217 return false; 3218 3219 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3220 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3221 3222 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3223 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3224 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3225 return false; 3226 3227 // First step: collect parametric terms in both array references. 3228 SmallVector<const SCEV *, 4> Terms; 3229 SE->collectParametricTerms(SrcAR, Terms); 3230 SE->collectParametricTerms(DstAR, Terms); 3231 3232 // Second step: find subscript sizes. 3233 SmallVector<const SCEV *, 4> Sizes; 3234 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3235 3236 // Third step: compute the access functions for each subscript. 3237 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3238 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3239 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3240 3241 // Fail when there is only a subscript: that's a linearized access function. 3242 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3243 SrcSubscripts.size() != DstSubscripts.size()) 3244 return false; 3245 3246 int size = SrcSubscripts.size(); 3247 3248 DEBUG({ 3249 dbgs() << "\nSrcSubscripts: "; 3250 for (int i = 0; i < size; i++) 3251 dbgs() << *SrcSubscripts[i]; 3252 dbgs() << "\nDstSubscripts: "; 3253 for (int i = 0; i < size; i++) 3254 dbgs() << *DstSubscripts[i]; 3255 }); 3256 3257 // The delinearization transforms a single-subscript MIV dependence test into 3258 // a multi-subscript SIV dependence test that is easier to compute. So we 3259 // resize Pair to contain as many pairs of subscripts as the delinearization 3260 // has found, and then initialize the pairs following the delinearization. 3261 Pair.resize(size); 3262 for (int i = 0; i < size; ++i) { 3263 Pair[i].Src = SrcSubscripts[i]; 3264 Pair[i].Dst = DstSubscripts[i]; 3265 unifySubscriptType(&Pair[i]); 3266 3267 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3268 // delinearization has found, and add these constraints to the dependence 3269 // check to avoid memory accesses overflow from one dimension into another. 3270 // This is related to the problem of determining the existence of data 3271 // dependences in array accesses using a different number of subscripts: in 3272 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3273 } 3274 3275 return true; 3276 } 3277 3278 //===----------------------------------------------------------------------===// 3279 3280 #ifndef NDEBUG 3281 // For debugging purposes, dump a small bit vector to dbgs(). 3282 static void dumpSmallBitVector(SmallBitVector &BV) { 3283 dbgs() << "{"; 3284 for (unsigned VI : BV.set_bits()) { 3285 dbgs() << VI; 3286 if (BV.find_next(VI) >= 0) 3287 dbgs() << ' '; 3288 } 3289 dbgs() << "}\n"; 3290 } 3291 #endif 3292 3293 // depends - 3294 // Returns NULL if there is no dependence. 3295 // Otherwise, return a Dependence with as many details as possible. 3296 // Corresponds to Section 3.1 in the paper 3297 // 3298 // Practical Dependence Testing 3299 // Goff, Kennedy, Tseng 3300 // PLDI 1991 3301 // 3302 // Care is required to keep the routine below, getSplitIteration(), 3303 // up to date with respect to this routine. 3304 std::unique_ptr<Dependence> 3305 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3306 bool PossiblyLoopIndependent) { 3307 if (Src == Dst) 3308 PossiblyLoopIndependent = false; 3309 3310 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3311 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3312 // if both instructions don't reference memory, there's no dependence 3313 return nullptr; 3314 3315 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3316 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3317 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3318 return make_unique<Dependence>(Src, Dst); 3319 } 3320 3321 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3322 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3323 Value *SrcPtr = getLoadStorePointerOperand(Src); 3324 Value *DstPtr = getLoadStorePointerOperand(Dst); 3325 3326 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3327 MemoryLocation::get(Dst), 3328 MemoryLocation::get(Src))) { 3329 case MayAlias: 3330 case PartialAlias: 3331 // cannot analyse objects if we don't understand their aliasing. 3332 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3333 return make_unique<Dependence>(Src, Dst); 3334 case NoAlias: 3335 // If the objects noalias, they are distinct, accesses are independent. 3336 DEBUG(dbgs() << "no alias\n"); 3337 return nullptr; 3338 case MustAlias: 3339 break; // The underlying objects alias; test accesses for dependence. 3340 } 3341 3342 // establish loop nesting levels 3343 establishNestingLevels(Src, Dst); 3344 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3345 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3346 3347 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3348 ++TotalArrayPairs; 3349 3350 unsigned Pairs = 1; 3351 SmallVector<Subscript, 2> Pair(Pairs); 3352 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3353 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3354 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3355 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3356 Pair[0].Src = SrcSCEV; 3357 Pair[0].Dst = DstSCEV; 3358 3359 if (Delinearize) { 3360 if (tryDelinearize(Src, Dst, Pair)) { 3361 DEBUG(dbgs() << " delinearized\n"); 3362 Pairs = Pair.size(); 3363 } 3364 } 3365 3366 for (unsigned P = 0; P < Pairs; ++P) { 3367 Pair[P].Loops.resize(MaxLevels + 1); 3368 Pair[P].GroupLoops.resize(MaxLevels + 1); 3369 Pair[P].Group.resize(Pairs); 3370 removeMatchingExtensions(&Pair[P]); 3371 Pair[P].Classification = 3372 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3373 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3374 Pair[P].Loops); 3375 Pair[P].GroupLoops = Pair[P].Loops; 3376 Pair[P].Group.set(P); 3377 DEBUG(dbgs() << " subscript " << P << "\n"); 3378 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3379 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3380 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3381 DEBUG(dbgs() << "\tloops = "); 3382 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3383 } 3384 3385 SmallBitVector Separable(Pairs); 3386 SmallBitVector Coupled(Pairs); 3387 3388 // Partition subscripts into separable and minimally-coupled groups 3389 // Algorithm in paper is algorithmically better; 3390 // this may be faster in practice. Check someday. 3391 // 3392 // Here's an example of how it works. Consider this code: 3393 // 3394 // for (i = ...) { 3395 // for (j = ...) { 3396 // for (k = ...) { 3397 // for (l = ...) { 3398 // for (m = ...) { 3399 // A[i][j][k][m] = ...; 3400 // ... = A[0][j][l][i + j]; 3401 // } 3402 // } 3403 // } 3404 // } 3405 // } 3406 // 3407 // There are 4 subscripts here: 3408 // 0 [i] and [0] 3409 // 1 [j] and [j] 3410 // 2 [k] and [l] 3411 // 3 [m] and [i + j] 3412 // 3413 // We've already classified each subscript pair as ZIV, SIV, etc., 3414 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3415 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3416 // and set Pair[P].Group = {P}. 3417 // 3418 // Src Dst Classification Loops GroupLoops Group 3419 // 0 [i] [0] SIV {1} {1} {0} 3420 // 1 [j] [j] SIV {2} {2} {1} 3421 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3422 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3423 // 3424 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3425 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3426 // 3427 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3428 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3429 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3430 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3431 // to either Separable or Coupled). 3432 // 3433 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3434 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3435 // so Pair[3].Group = {0, 1, 3} and Done = false. 3436 // 3437 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3438 // Since Done remains true, we add 2 to the set of Separable pairs. 3439 // 3440 // Finally, we consider 3. There's nothing to compare it with, 3441 // so Done remains true and we add it to the Coupled set. 3442 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3443 // 3444 // In the end, we've got 1 separable subscript and 1 coupled group. 3445 for (unsigned SI = 0; SI < Pairs; ++SI) { 3446 if (Pair[SI].Classification == Subscript::NonLinear) { 3447 // ignore these, but collect loops for later 3448 ++NonlinearSubscriptPairs; 3449 collectCommonLoops(Pair[SI].Src, 3450 LI->getLoopFor(Src->getParent()), 3451 Pair[SI].Loops); 3452 collectCommonLoops(Pair[SI].Dst, 3453 LI->getLoopFor(Dst->getParent()), 3454 Pair[SI].Loops); 3455 Result.Consistent = false; 3456 } else if (Pair[SI].Classification == Subscript::ZIV) { 3457 // always separable 3458 Separable.set(SI); 3459 } 3460 else { 3461 // SIV, RDIV, or MIV, so check for coupled group 3462 bool Done = true; 3463 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3464 SmallBitVector Intersection = Pair[SI].GroupLoops; 3465 Intersection &= Pair[SJ].GroupLoops; 3466 if (Intersection.any()) { 3467 // accumulate set of all the loops in group 3468 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3469 // accumulate set of all subscripts in group 3470 Pair[SJ].Group |= Pair[SI].Group; 3471 Done = false; 3472 } 3473 } 3474 if (Done) { 3475 if (Pair[SI].Group.count() == 1) { 3476 Separable.set(SI); 3477 ++SeparableSubscriptPairs; 3478 } 3479 else { 3480 Coupled.set(SI); 3481 ++CoupledSubscriptPairs; 3482 } 3483 } 3484 } 3485 } 3486 3487 DEBUG(dbgs() << " Separable = "); 3488 DEBUG(dumpSmallBitVector(Separable)); 3489 DEBUG(dbgs() << " Coupled = "); 3490 DEBUG(dumpSmallBitVector(Coupled)); 3491 3492 Constraint NewConstraint; 3493 NewConstraint.setAny(SE); 3494 3495 // test separable subscripts 3496 for (unsigned SI : Separable.set_bits()) { 3497 DEBUG(dbgs() << "testing subscript " << SI); 3498 switch (Pair[SI].Classification) { 3499 case Subscript::ZIV: 3500 DEBUG(dbgs() << ", ZIV\n"); 3501 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3502 return nullptr; 3503 break; 3504 case Subscript::SIV: { 3505 DEBUG(dbgs() << ", SIV\n"); 3506 unsigned Level; 3507 const SCEV *SplitIter = nullptr; 3508 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3509 SplitIter)) 3510 return nullptr; 3511 break; 3512 } 3513 case Subscript::RDIV: 3514 DEBUG(dbgs() << ", RDIV\n"); 3515 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3516 return nullptr; 3517 break; 3518 case Subscript::MIV: 3519 DEBUG(dbgs() << ", MIV\n"); 3520 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3521 return nullptr; 3522 break; 3523 default: 3524 llvm_unreachable("subscript has unexpected classification"); 3525 } 3526 } 3527 3528 if (Coupled.count()) { 3529 // test coupled subscript groups 3530 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3531 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3532 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3533 for (unsigned II = 0; II <= MaxLevels; ++II) 3534 Constraints[II].setAny(SE); 3535 for (unsigned SI : Coupled.set_bits()) { 3536 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3537 SmallBitVector Group(Pair[SI].Group); 3538 SmallBitVector Sivs(Pairs); 3539 SmallBitVector Mivs(Pairs); 3540 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3541 SmallVector<Subscript *, 4> PairsInGroup; 3542 for (unsigned SJ : Group.set_bits()) { 3543 DEBUG(dbgs() << SJ << " "); 3544 if (Pair[SJ].Classification == Subscript::SIV) 3545 Sivs.set(SJ); 3546 else 3547 Mivs.set(SJ); 3548 PairsInGroup.push_back(&Pair[SJ]); 3549 } 3550 unifySubscriptType(PairsInGroup); 3551 DEBUG(dbgs() << "}\n"); 3552 while (Sivs.any()) { 3553 bool Changed = false; 3554 for (unsigned SJ : Sivs.set_bits()) { 3555 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3556 // SJ is an SIV subscript that's part of the current coupled group 3557 unsigned Level; 3558 const SCEV *SplitIter = nullptr; 3559 DEBUG(dbgs() << "SIV\n"); 3560 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3561 SplitIter)) 3562 return nullptr; 3563 ConstrainedLevels.set(Level); 3564 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3565 if (Constraints[Level].isEmpty()) { 3566 ++DeltaIndependence; 3567 return nullptr; 3568 } 3569 Changed = true; 3570 } 3571 Sivs.reset(SJ); 3572 } 3573 if (Changed) { 3574 // propagate, possibly creating new SIVs and ZIVs 3575 DEBUG(dbgs() << " propagating\n"); 3576 DEBUG(dbgs() << "\tMivs = "); 3577 DEBUG(dumpSmallBitVector(Mivs)); 3578 for (unsigned SJ : Mivs.set_bits()) { 3579 // SJ is an MIV subscript that's part of the current coupled group 3580 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3581 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3582 Constraints, Result.Consistent)) { 3583 DEBUG(dbgs() << "\t Changed\n"); 3584 ++DeltaPropagations; 3585 Pair[SJ].Classification = 3586 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3587 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3588 Pair[SJ].Loops); 3589 switch (Pair[SJ].Classification) { 3590 case Subscript::ZIV: 3591 DEBUG(dbgs() << "ZIV\n"); 3592 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3593 return nullptr; 3594 Mivs.reset(SJ); 3595 break; 3596 case Subscript::SIV: 3597 Sivs.set(SJ); 3598 Mivs.reset(SJ); 3599 break; 3600 case Subscript::RDIV: 3601 case Subscript::MIV: 3602 break; 3603 default: 3604 llvm_unreachable("bad subscript classification"); 3605 } 3606 } 3607 } 3608 } 3609 } 3610 3611 // test & propagate remaining RDIVs 3612 for (unsigned SJ : Mivs.set_bits()) { 3613 if (Pair[SJ].Classification == Subscript::RDIV) { 3614 DEBUG(dbgs() << "RDIV test\n"); 3615 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3616 return nullptr; 3617 // I don't yet understand how to propagate RDIV results 3618 Mivs.reset(SJ); 3619 } 3620 } 3621 3622 // test remaining MIVs 3623 // This code is temporary. 3624 // Better to somehow test all remaining subscripts simultaneously. 3625 for (unsigned SJ : Mivs.set_bits()) { 3626 if (Pair[SJ].Classification == Subscript::MIV) { 3627 DEBUG(dbgs() << "MIV test\n"); 3628 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3629 return nullptr; 3630 } 3631 else 3632 llvm_unreachable("expected only MIV subscripts at this point"); 3633 } 3634 3635 // update Result.DV from constraint vector 3636 DEBUG(dbgs() << " updating\n"); 3637 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3638 if (SJ > CommonLevels) 3639 break; 3640 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3641 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3642 return nullptr; 3643 } 3644 } 3645 } 3646 3647 // Make sure the Scalar flags are set correctly. 3648 SmallBitVector CompleteLoops(MaxLevels + 1); 3649 for (unsigned SI = 0; SI < Pairs; ++SI) 3650 CompleteLoops |= Pair[SI].Loops; 3651 for (unsigned II = 1; II <= CommonLevels; ++II) 3652 if (CompleteLoops[II]) 3653 Result.DV[II - 1].Scalar = false; 3654 3655 if (PossiblyLoopIndependent) { 3656 // Make sure the LoopIndependent flag is set correctly. 3657 // All directions must include equal, otherwise no 3658 // loop-independent dependence is possible. 3659 for (unsigned II = 1; II <= CommonLevels; ++II) { 3660 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3661 Result.LoopIndependent = false; 3662 break; 3663 } 3664 } 3665 } 3666 else { 3667 // On the other hand, if all directions are equal and there's no 3668 // loop-independent dependence possible, then no dependence exists. 3669 bool AllEqual = true; 3670 for (unsigned II = 1; II <= CommonLevels; ++II) { 3671 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3672 AllEqual = false; 3673 break; 3674 } 3675 } 3676 if (AllEqual) 3677 return nullptr; 3678 } 3679 3680 return make_unique<FullDependence>(std::move(Result)); 3681 } 3682 3683 3684 3685 //===----------------------------------------------------------------------===// 3686 // getSplitIteration - 3687 // Rather than spend rarely-used space recording the splitting iteration 3688 // during the Weak-Crossing SIV test, we re-compute it on demand. 3689 // The re-computation is basically a repeat of the entire dependence test, 3690 // though simplified since we know that the dependence exists. 3691 // It's tedious, since we must go through all propagations, etc. 3692 // 3693 // Care is required to keep this code up to date with respect to the routine 3694 // above, depends(). 3695 // 3696 // Generally, the dependence analyzer will be used to build 3697 // a dependence graph for a function (basically a map from instructions 3698 // to dependences). Looking for cycles in the graph shows us loops 3699 // that cannot be trivially vectorized/parallelized. 3700 // 3701 // We can try to improve the situation by examining all the dependences 3702 // that make up the cycle, looking for ones we can break. 3703 // Sometimes, peeling the first or last iteration of a loop will break 3704 // dependences, and we've got flags for those possibilities. 3705 // Sometimes, splitting a loop at some other iteration will do the trick, 3706 // and we've got a flag for that case. Rather than waste the space to 3707 // record the exact iteration (since we rarely know), we provide 3708 // a method that calculates the iteration. It's a drag that it must work 3709 // from scratch, but wonderful in that it's possible. 3710 // 3711 // Here's an example: 3712 // 3713 // for (i = 0; i < 10; i++) 3714 // A[i] = ... 3715 // ... = A[11 - i] 3716 // 3717 // There's a loop-carried flow dependence from the store to the load, 3718 // found by the weak-crossing SIV test. The dependence will have a flag, 3719 // indicating that the dependence can be broken by splitting the loop. 3720 // Calling getSplitIteration will return 5. 3721 // Splitting the loop breaks the dependence, like so: 3722 // 3723 // for (i = 0; i <= 5; i++) 3724 // A[i] = ... 3725 // ... = A[11 - i] 3726 // for (i = 6; i < 10; i++) 3727 // A[i] = ... 3728 // ... = A[11 - i] 3729 // 3730 // breaks the dependence and allows us to vectorize/parallelize 3731 // both loops. 3732 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3733 unsigned SplitLevel) { 3734 assert(Dep.isSplitable(SplitLevel) && 3735 "Dep should be splitable at SplitLevel"); 3736 Instruction *Src = Dep.getSrc(); 3737 Instruction *Dst = Dep.getDst(); 3738 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3739 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3740 assert(isLoadOrStore(Src)); 3741 assert(isLoadOrStore(Dst)); 3742 Value *SrcPtr = getLoadStorePointerOperand(Src); 3743 Value *DstPtr = getLoadStorePointerOperand(Dst); 3744 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3745 MemoryLocation::get(Dst), 3746 MemoryLocation::get(Src)) == MustAlias); 3747 3748 // establish loop nesting levels 3749 establishNestingLevels(Src, Dst); 3750 3751 FullDependence Result(Src, Dst, false, CommonLevels); 3752 3753 unsigned Pairs = 1; 3754 SmallVector<Subscript, 2> Pair(Pairs); 3755 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3756 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3757 Pair[0].Src = SrcSCEV; 3758 Pair[0].Dst = DstSCEV; 3759 3760 if (Delinearize) { 3761 if (tryDelinearize(Src, Dst, Pair)) { 3762 DEBUG(dbgs() << " delinearized\n"); 3763 Pairs = Pair.size(); 3764 } 3765 } 3766 3767 for (unsigned P = 0; P < Pairs; ++P) { 3768 Pair[P].Loops.resize(MaxLevels + 1); 3769 Pair[P].GroupLoops.resize(MaxLevels + 1); 3770 Pair[P].Group.resize(Pairs); 3771 removeMatchingExtensions(&Pair[P]); 3772 Pair[P].Classification = 3773 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3774 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3775 Pair[P].Loops); 3776 Pair[P].GroupLoops = Pair[P].Loops; 3777 Pair[P].Group.set(P); 3778 } 3779 3780 SmallBitVector Separable(Pairs); 3781 SmallBitVector Coupled(Pairs); 3782 3783 // partition subscripts into separable and minimally-coupled groups 3784 for (unsigned SI = 0; SI < Pairs; ++SI) { 3785 if (Pair[SI].Classification == Subscript::NonLinear) { 3786 // ignore these, but collect loops for later 3787 collectCommonLoops(Pair[SI].Src, 3788 LI->getLoopFor(Src->getParent()), 3789 Pair[SI].Loops); 3790 collectCommonLoops(Pair[SI].Dst, 3791 LI->getLoopFor(Dst->getParent()), 3792 Pair[SI].Loops); 3793 Result.Consistent = false; 3794 } 3795 else if (Pair[SI].Classification == Subscript::ZIV) 3796 Separable.set(SI); 3797 else { 3798 // SIV, RDIV, or MIV, so check for coupled group 3799 bool Done = true; 3800 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3801 SmallBitVector Intersection = Pair[SI].GroupLoops; 3802 Intersection &= Pair[SJ].GroupLoops; 3803 if (Intersection.any()) { 3804 // accumulate set of all the loops in group 3805 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3806 // accumulate set of all subscripts in group 3807 Pair[SJ].Group |= Pair[SI].Group; 3808 Done = false; 3809 } 3810 } 3811 if (Done) { 3812 if (Pair[SI].Group.count() == 1) 3813 Separable.set(SI); 3814 else 3815 Coupled.set(SI); 3816 } 3817 } 3818 } 3819 3820 Constraint NewConstraint; 3821 NewConstraint.setAny(SE); 3822 3823 // test separable subscripts 3824 for (unsigned SI : Separable.set_bits()) { 3825 switch (Pair[SI].Classification) { 3826 case Subscript::SIV: { 3827 unsigned Level; 3828 const SCEV *SplitIter = nullptr; 3829 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3830 Result, NewConstraint, SplitIter); 3831 if (Level == SplitLevel) { 3832 assert(SplitIter != nullptr); 3833 return SplitIter; 3834 } 3835 break; 3836 } 3837 case Subscript::ZIV: 3838 case Subscript::RDIV: 3839 case Subscript::MIV: 3840 break; 3841 default: 3842 llvm_unreachable("subscript has unexpected classification"); 3843 } 3844 } 3845 3846 if (Coupled.count()) { 3847 // test coupled subscript groups 3848 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3849 for (unsigned II = 0; II <= MaxLevels; ++II) 3850 Constraints[II].setAny(SE); 3851 for (unsigned SI : Coupled.set_bits()) { 3852 SmallBitVector Group(Pair[SI].Group); 3853 SmallBitVector Sivs(Pairs); 3854 SmallBitVector Mivs(Pairs); 3855 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3856 for (unsigned SJ : Group.set_bits()) { 3857 if (Pair[SJ].Classification == Subscript::SIV) 3858 Sivs.set(SJ); 3859 else 3860 Mivs.set(SJ); 3861 } 3862 while (Sivs.any()) { 3863 bool Changed = false; 3864 for (unsigned SJ : Sivs.set_bits()) { 3865 // SJ is an SIV subscript that's part of the current coupled group 3866 unsigned Level; 3867 const SCEV *SplitIter = nullptr; 3868 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3869 Result, NewConstraint, SplitIter); 3870 if (Level == SplitLevel && SplitIter) 3871 return SplitIter; 3872 ConstrainedLevels.set(Level); 3873 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3874 Changed = true; 3875 Sivs.reset(SJ); 3876 } 3877 if (Changed) { 3878 // propagate, possibly creating new SIVs and ZIVs 3879 for (unsigned SJ : Mivs.set_bits()) { 3880 // SJ is an MIV subscript that's part of the current coupled group 3881 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3882 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3883 Pair[SJ].Classification = 3884 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3885 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3886 Pair[SJ].Loops); 3887 switch (Pair[SJ].Classification) { 3888 case Subscript::ZIV: 3889 Mivs.reset(SJ); 3890 break; 3891 case Subscript::SIV: 3892 Sivs.set(SJ); 3893 Mivs.reset(SJ); 3894 break; 3895 case Subscript::RDIV: 3896 case Subscript::MIV: 3897 break; 3898 default: 3899 llvm_unreachable("bad subscript classification"); 3900 } 3901 } 3902 } 3903 } 3904 } 3905 } 3906 } 3907 llvm_unreachable("somehow reached end of routine"); 3908 return nullptr; 3909 } 3910