1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 DependenceAnalysis::Result 118 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 119 auto &AA = FAM.getResult<AAManager>(F); 120 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 121 auto &LI = FAM.getResult<LoopAnalysis>(F); 122 return DependenceInfo(&F, &AA, &SE, &LI); 123 } 124 125 char DependenceAnalysis::PassID; 126 127 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 128 "Dependence Analysis", true, true) 129 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 132 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 133 true, true) 134 135 char DependenceAnalysisWrapperPass::ID = 0; 136 137 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 138 return new DependenceAnalysisWrapperPass(); 139 } 140 141 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 142 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 143 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 144 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 145 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 146 return false; 147 } 148 149 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 150 151 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 152 153 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 154 AU.setPreservesAll(); 155 AU.addRequiredTransitive<AAResultsWrapperPass>(); 156 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 157 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 158 } 159 160 161 // Used to test the dependence analyzer. 162 // Looks through the function, noting loads and stores. 163 // Calls depends() on every possible pair and prints out the result. 164 // Ignores all other instructions. 165 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 166 auto *F = DA->getFunction(); 167 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 168 ++SrcI) { 169 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 170 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 171 DstI != DstE; ++DstI) { 172 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 173 OS << "da analyze - "; 174 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 175 D->dump(OS); 176 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 177 if (D->isSplitable(Level)) { 178 OS << "da analyze - split level = " << Level; 179 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 180 OS << "!\n"; 181 } 182 } 183 } 184 else 185 OS << "none!\n"; 186 } 187 } 188 } 189 } 190 } 191 192 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 193 const Module *) const { 194 dumpExampleDependence(OS, info.get()); 195 } 196 197 //===----------------------------------------------------------------------===// 198 // Dependence methods 199 200 // Returns true if this is an input dependence. 201 bool Dependence::isInput() const { 202 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 203 } 204 205 206 // Returns true if this is an output dependence. 207 bool Dependence::isOutput() const { 208 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 209 } 210 211 212 // Returns true if this is an flow (aka true) dependence. 213 bool Dependence::isFlow() const { 214 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 215 } 216 217 218 // Returns true if this is an anti dependence. 219 bool Dependence::isAnti() const { 220 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 221 } 222 223 224 // Returns true if a particular level is scalar; that is, 225 // if no subscript in the source or destination mention the induction 226 // variable associated with the loop at this level. 227 // Leave this out of line, so it will serve as a virtual method anchor 228 bool Dependence::isScalar(unsigned level) const { 229 return false; 230 } 231 232 233 //===----------------------------------------------------------------------===// 234 // FullDependence methods 235 236 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 237 bool PossiblyLoopIndependent, 238 unsigned CommonLevels) 239 : Dependence(Source, Destination), Levels(CommonLevels), 240 LoopIndependent(PossiblyLoopIndependent) { 241 Consistent = true; 242 if (CommonLevels) 243 DV = make_unique<DVEntry[]>(CommonLevels); 244 } 245 246 // The rest are simple getters that hide the implementation. 247 248 // getDirection - Returns the direction associated with a particular level. 249 unsigned FullDependence::getDirection(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Direction; 252 } 253 254 255 // Returns the distance (or NULL) associated with a particular level. 256 const SCEV *FullDependence::getDistance(unsigned Level) const { 257 assert(0 < Level && Level <= Levels && "Level out of range"); 258 return DV[Level - 1].Distance; 259 } 260 261 262 // Returns true if a particular level is scalar; that is, 263 // if no subscript in the source or destination mention the induction 264 // variable associated with the loop at this level. 265 bool FullDependence::isScalar(unsigned Level) const { 266 assert(0 < Level && Level <= Levels && "Level out of range"); 267 return DV[Level - 1].Scalar; 268 } 269 270 271 // Returns true if peeling the first iteration from this loop 272 // will break this dependence. 273 bool FullDependence::isPeelFirst(unsigned Level) const { 274 assert(0 < Level && Level <= Levels && "Level out of range"); 275 return DV[Level - 1].PeelFirst; 276 } 277 278 279 // Returns true if peeling the last iteration from this loop 280 // will break this dependence. 281 bool FullDependence::isPeelLast(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].PeelLast; 284 } 285 286 287 // Returns true if splitting this loop will break the dependence. 288 bool FullDependence::isSplitable(unsigned Level) const { 289 assert(0 < Level && Level <= Levels && "Level out of range"); 290 return DV[Level - 1].Splitable; 291 } 292 293 294 //===----------------------------------------------------------------------===// 295 // DependenceInfo::Constraint methods 296 297 // If constraint is a point <X, Y>, returns X. 298 // Otherwise assert. 299 const SCEV *DependenceInfo::Constraint::getX() const { 300 assert(Kind == Point && "Kind should be Point"); 301 return A; 302 } 303 304 305 // If constraint is a point <X, Y>, returns Y. 306 // Otherwise assert. 307 const SCEV *DependenceInfo::Constraint::getY() const { 308 assert(Kind == Point && "Kind should be Point"); 309 return B; 310 } 311 312 313 // If constraint is a line AX + BY = C, returns A. 314 // Otherwise assert. 315 const SCEV *DependenceInfo::Constraint::getA() const { 316 assert((Kind == Line || Kind == Distance) && 317 "Kind should be Line (or Distance)"); 318 return A; 319 } 320 321 322 // If constraint is a line AX + BY = C, returns B. 323 // Otherwise assert. 324 const SCEV *DependenceInfo::Constraint::getB() const { 325 assert((Kind == Line || Kind == Distance) && 326 "Kind should be Line (or Distance)"); 327 return B; 328 } 329 330 331 // If constraint is a line AX + BY = C, returns C. 332 // Otherwise assert. 333 const SCEV *DependenceInfo::Constraint::getC() const { 334 assert((Kind == Line || Kind == Distance) && 335 "Kind should be Line (or Distance)"); 336 return C; 337 } 338 339 340 // If constraint is a distance, returns D. 341 // Otherwise assert. 342 const SCEV *DependenceInfo::Constraint::getD() const { 343 assert(Kind == Distance && "Kind should be Distance"); 344 return SE->getNegativeSCEV(C); 345 } 346 347 348 // Returns the loop associated with this constraint. 349 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 350 assert((Kind == Distance || Kind == Line || Kind == Point) && 351 "Kind should be Distance, Line, or Point"); 352 return AssociatedLoop; 353 } 354 355 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 356 const Loop *CurLoop) { 357 Kind = Point; 358 A = X; 359 B = Y; 360 AssociatedLoop = CurLoop; 361 } 362 363 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 364 const SCEV *CC, const Loop *CurLoop) { 365 Kind = Line; 366 A = AA; 367 B = BB; 368 C = CC; 369 AssociatedLoop = CurLoop; 370 } 371 372 void DependenceInfo::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getOne(D->getType()); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 382 383 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 384 SE = NewSE; 385 Kind = Any; 386 } 387 388 389 // For debugging purposes. Dumps the constraint out to OS. 390 void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 391 if (isEmpty()) 392 OS << " Empty\n"; 393 else if (isAny()) 394 OS << " Any\n"; 395 else if (isPoint()) 396 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 397 else if (isDistance()) 398 OS << " Distance is " << *getD() << 399 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 400 else if (isLine()) 401 OS << " Line is " << *getA() << "*X + " << 402 *getB() << "*Y = " << *getC() << "\n"; 403 else 404 llvm_unreachable("unknown constraint type in Constraint::dump"); 405 } 406 407 408 // Updates X with the intersection 409 // of the Constraints X and Y. Returns true if X has changed. 410 // Corresponds to Figure 4 from the paper 411 // 412 // Practical Dependence Testing 413 // Goff, Kennedy, Tseng 414 // PLDI 1991 415 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 416 ++DeltaApplications; 417 DEBUG(dbgs() << "\tintersect constraints\n"); 418 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 419 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 420 assert(!Y->isPoint() && "Y must not be a Point"); 421 if (X->isAny()) { 422 if (Y->isAny()) 423 return false; 424 *X = *Y; 425 return true; 426 } 427 if (X->isEmpty()) 428 return false; 429 if (Y->isEmpty()) { 430 X->setEmpty(); 431 return true; 432 } 433 434 if (X->isDistance() && Y->isDistance()) { 435 DEBUG(dbgs() << "\t intersect 2 distances\n"); 436 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 437 return false; 438 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 439 X->setEmpty(); 440 ++DeltaSuccesses; 441 return true; 442 } 443 // Hmmm, interesting situation. 444 // I guess if either is constant, keep it and ignore the other. 445 if (isa<SCEVConstant>(Y->getD())) { 446 *X = *Y; 447 return true; 448 } 449 return false; 450 } 451 452 // At this point, the pseudo-code in Figure 4 of the paper 453 // checks if (X->isPoint() && Y->isPoint()). 454 // This case can't occur in our implementation, 455 // since a Point can only arise as the result of intersecting 456 // two Line constraints, and the right-hand value, Y, is never 457 // the result of an intersection. 458 assert(!(X->isPoint() && Y->isPoint()) && 459 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 460 461 if (X->isLine() && Y->isLine()) { 462 DEBUG(dbgs() << "\t intersect 2 lines\n"); 463 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 464 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 465 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 466 // slopes are equal, so lines are parallel 467 DEBUG(dbgs() << "\t\tsame slope\n"); 468 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 469 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 471 return false; 472 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 473 X->setEmpty(); 474 ++DeltaSuccesses; 475 return true; 476 } 477 return false; 478 } 479 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 480 // slopes differ, so lines intersect 481 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 482 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 483 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 484 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 485 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 486 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 487 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 488 const SCEVConstant *C1A2_C2A1 = 489 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 490 const SCEVConstant *C1B2_C2B1 = 491 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 492 const SCEVConstant *A1B2_A2B1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 494 const SCEVConstant *A2B1_A1B2 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 496 if (!C1B2_C2B1 || !C1A2_C2A1 || 497 !A1B2_A2B1 || !A2B1_A1B2) 498 return false; 499 APInt Xtop = C1B2_C2B1->getAPInt(); 500 APInt Xbot = A1B2_A2B1->getAPInt(); 501 APInt Ytop = C1A2_C2A1->getAPInt(); 502 APInt Ybot = A2B1_A1B2->getAPInt(); 503 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 504 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 505 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 506 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 507 APInt Xq = Xtop; // these need to be initialized, even 508 APInt Xr = Xtop; // though they're just going to be overwritten 509 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 510 APInt Yq = Ytop; 511 APInt Yr = Ytop; 512 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 513 if (Xr != 0 || Yr != 0) { 514 X->setEmpty(); 515 ++DeltaSuccesses; 516 return true; 517 } 518 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 519 if (Xq.slt(0) || Yq.slt(0)) { 520 X->setEmpty(); 521 ++DeltaSuccesses; 522 return true; 523 } 524 if (const SCEVConstant *CUB = 525 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 526 APInt UpperBound = CUB->getAPInt(); 527 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 528 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 529 X->setEmpty(); 530 ++DeltaSuccesses; 531 return true; 532 } 533 } 534 X->setPoint(SE->getConstant(Xq), 535 SE->getConstant(Yq), 536 X->getAssociatedLoop()); 537 ++DeltaSuccesses; 538 return true; 539 } 540 return false; 541 } 542 543 // if (X->isLine() && Y->isPoint()) This case can't occur. 544 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 545 546 if (X->isPoint() && Y->isLine()) { 547 DEBUG(dbgs() << "\t intersect Point and Line\n"); 548 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 549 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 550 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 551 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 552 return false; 553 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 554 X->setEmpty(); 555 ++DeltaSuccesses; 556 return true; 557 } 558 return false; 559 } 560 561 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 562 return false; 563 } 564 565 566 //===----------------------------------------------------------------------===// 567 // DependenceInfo methods 568 569 // For debugging purposes. Dumps a dependence to OS. 570 void Dependence::dump(raw_ostream &OS) const { 571 bool Splitable = false; 572 if (isConfused()) 573 OS << "confused"; 574 else { 575 if (isConsistent()) 576 OS << "consistent "; 577 if (isFlow()) 578 OS << "flow"; 579 else if (isOutput()) 580 OS << "output"; 581 else if (isAnti()) 582 OS << "anti"; 583 else if (isInput()) 584 OS << "input"; 585 unsigned Levels = getLevels(); 586 OS << " ["; 587 for (unsigned II = 1; II <= Levels; ++II) { 588 if (isSplitable(II)) 589 Splitable = true; 590 if (isPeelFirst(II)) 591 OS << 'p'; 592 const SCEV *Distance = getDistance(II); 593 if (Distance) 594 OS << *Distance; 595 else if (isScalar(II)) 596 OS << "S"; 597 else { 598 unsigned Direction = getDirection(II); 599 if (Direction == DVEntry::ALL) 600 OS << "*"; 601 else { 602 if (Direction & DVEntry::LT) 603 OS << "<"; 604 if (Direction & DVEntry::EQ) 605 OS << "="; 606 if (Direction & DVEntry::GT) 607 OS << ">"; 608 } 609 } 610 if (isPeelLast(II)) 611 OS << 'p'; 612 if (II < Levels) 613 OS << " "; 614 } 615 if (isLoopIndependent()) 616 OS << "|<"; 617 OS << "]"; 618 if (Splitable) 619 OS << " splitable"; 620 } 621 OS << "!\n"; 622 } 623 624 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 625 const DataLayout &DL, const Value *A, 626 const Value *B) { 627 const Value *AObj = GetUnderlyingObject(A, DL); 628 const Value *BObj = GetUnderlyingObject(B, DL); 629 return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), 630 BObj, DL.getTypeStoreSize(BObj->getType())); 631 } 632 633 634 // Returns true if the load or store can be analyzed. Atomic and volatile 635 // operations have properties which this analysis does not understand. 636 static 637 bool isLoadOrStore(const Instruction *I) { 638 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 639 return LI->isUnordered(); 640 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 641 return SI->isUnordered(); 642 return false; 643 } 644 645 646 static 647 Value *getPointerOperand(Instruction *I) { 648 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 649 return LI->getPointerOperand(); 650 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 651 return SI->getPointerOperand(); 652 llvm_unreachable("Value is not load or store instruction"); 653 return nullptr; 654 } 655 656 657 // Examines the loop nesting of the Src and Dst 658 // instructions and establishes their shared loops. Sets the variables 659 // CommonLevels, SrcLevels, and MaxLevels. 660 // The source and destination instructions needn't be contained in the same 661 // loop. The routine establishNestingLevels finds the level of most deeply 662 // nested loop that contains them both, CommonLevels. An instruction that's 663 // not contained in a loop is at level = 0. MaxLevels is equal to the level 664 // of the source plus the level of the destination, minus CommonLevels. 665 // This lets us allocate vectors MaxLevels in length, with room for every 666 // distinct loop referenced in both the source and destination subscripts. 667 // The variable SrcLevels is the nesting depth of the source instruction. 668 // It's used to help calculate distinct loops referenced by the destination. 669 // Here's the map from loops to levels: 670 // 0 - unused 671 // 1 - outermost common loop 672 // ... - other common loops 673 // CommonLevels - innermost common loop 674 // ... - loops containing Src but not Dst 675 // SrcLevels - innermost loop containing Src but not Dst 676 // ... - loops containing Dst but not Src 677 // MaxLevels - innermost loops containing Dst but not Src 678 // Consider the follow code fragment: 679 // for (a = ...) { 680 // for (b = ...) { 681 // for (c = ...) { 682 // for (d = ...) { 683 // A[] = ...; 684 // } 685 // } 686 // for (e = ...) { 687 // for (f = ...) { 688 // for (g = ...) { 689 // ... = A[]; 690 // } 691 // } 692 // } 693 // } 694 // } 695 // If we're looking at the possibility of a dependence between the store 696 // to A (the Src) and the load from A (the Dst), we'll note that they 697 // have 2 loops in common, so CommonLevels will equal 2 and the direction 698 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 699 // A map from loop names to loop numbers would look like 700 // a - 1 701 // b - 2 = CommonLevels 702 // c - 3 703 // d - 4 = SrcLevels 704 // e - 5 705 // f - 6 706 // g - 7 = MaxLevels 707 void DependenceInfo::establishNestingLevels(const Instruction *Src, 708 const Instruction *Dst) { 709 const BasicBlock *SrcBlock = Src->getParent(); 710 const BasicBlock *DstBlock = Dst->getParent(); 711 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 712 unsigned DstLevel = LI->getLoopDepth(DstBlock); 713 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 714 const Loop *DstLoop = LI->getLoopFor(DstBlock); 715 SrcLevels = SrcLevel; 716 MaxLevels = SrcLevel + DstLevel; 717 while (SrcLevel > DstLevel) { 718 SrcLoop = SrcLoop->getParentLoop(); 719 SrcLevel--; 720 } 721 while (DstLevel > SrcLevel) { 722 DstLoop = DstLoop->getParentLoop(); 723 DstLevel--; 724 } 725 while (SrcLoop != DstLoop) { 726 SrcLoop = SrcLoop->getParentLoop(); 727 DstLoop = DstLoop->getParentLoop(); 728 SrcLevel--; 729 } 730 CommonLevels = SrcLevel; 731 MaxLevels -= CommonLevels; 732 } 733 734 735 // Given one of the loops containing the source, return 736 // its level index in our numbering scheme. 737 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 738 return SrcLoop->getLoopDepth(); 739 } 740 741 742 // Given one of the loops containing the destination, 743 // return its level index in our numbering scheme. 744 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 745 unsigned D = DstLoop->getLoopDepth(); 746 if (D > CommonLevels) 747 return D - CommonLevels + SrcLevels; 748 else 749 return D; 750 } 751 752 753 // Returns true if Expression is loop invariant in LoopNest. 754 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 755 const Loop *LoopNest) const { 756 if (!LoopNest) 757 return true; 758 return SE->isLoopInvariant(Expression, LoopNest) && 759 isLoopInvariant(Expression, LoopNest->getParentLoop()); 760 } 761 762 763 764 // Finds the set of loops from the LoopNest that 765 // have a level <= CommonLevels and are referred to by the SCEV Expression. 766 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 767 const Loop *LoopNest, 768 SmallBitVector &Loops) const { 769 while (LoopNest) { 770 unsigned Level = LoopNest->getLoopDepth(); 771 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 772 Loops.set(Level); 773 LoopNest = LoopNest->getParentLoop(); 774 } 775 } 776 777 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 778 779 unsigned widestWidthSeen = 0; 780 Type *widestType; 781 782 // Go through each pair and find the widest bit to which we need 783 // to extend all of them. 784 for (unsigned i = 0; i < Pairs.size(); i++) { 785 const SCEV *Src = Pairs[i]->Src; 786 const SCEV *Dst = Pairs[i]->Dst; 787 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 788 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 789 if (SrcTy == nullptr || DstTy == nullptr) { 790 assert(SrcTy == DstTy && "This function only unify integer types and " 791 "expect Src and Dst share the same type " 792 "otherwise."); 793 continue; 794 } 795 if (SrcTy->getBitWidth() > widestWidthSeen) { 796 widestWidthSeen = SrcTy->getBitWidth(); 797 widestType = SrcTy; 798 } 799 if (DstTy->getBitWidth() > widestWidthSeen) { 800 widestWidthSeen = DstTy->getBitWidth(); 801 widestType = DstTy; 802 } 803 } 804 805 806 assert(widestWidthSeen > 0); 807 808 // Now extend each pair to the widest seen. 809 for (unsigned i = 0; i < Pairs.size(); i++) { 810 const SCEV *Src = Pairs[i]->Src; 811 const SCEV *Dst = Pairs[i]->Dst; 812 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 813 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 814 if (SrcTy == nullptr || DstTy == nullptr) { 815 assert(SrcTy == DstTy && "This function only unify integer types and " 816 "expect Src and Dst share the same type " 817 "otherwise."); 818 continue; 819 } 820 if (SrcTy->getBitWidth() < widestWidthSeen) 821 // Sign-extend Src to widestType 822 Pairs[i]->Src = SE->getSignExtendExpr(Src, widestType); 823 if (DstTy->getBitWidth() < widestWidthSeen) { 824 // Sign-extend Dst to widestType 825 Pairs[i]->Dst = SE->getSignExtendExpr(Dst, widestType); 826 } 827 } 828 } 829 830 // removeMatchingExtensions - Examines a subscript pair. 831 // If the source and destination are identically sign (or zero) 832 // extended, it strips off the extension in an effect to simplify 833 // the actual analysis. 834 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 835 const SCEV *Src = Pair->Src; 836 const SCEV *Dst = Pair->Dst; 837 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 838 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 839 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 840 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 841 const SCEV *SrcCastOp = SrcCast->getOperand(); 842 const SCEV *DstCastOp = DstCast->getOperand(); 843 if (SrcCastOp->getType() == DstCastOp->getType()) { 844 Pair->Src = SrcCastOp; 845 Pair->Dst = DstCastOp; 846 } 847 } 848 } 849 850 851 // Examine the scev and return true iff it's linear. 852 // Collect any loops mentioned in the set of "Loops". 853 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 854 SmallBitVector &Loops) { 855 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 856 if (!AddRec) 857 return isLoopInvariant(Src, LoopNest); 858 const SCEV *Start = AddRec->getStart(); 859 const SCEV *Step = AddRec->getStepRecurrence(*SE); 860 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 861 if (!isa<SCEVCouldNotCompute>(UB)) { 862 if (SE->getTypeSizeInBits(Start->getType()) < 863 SE->getTypeSizeInBits(UB->getType())) { 864 if (!AddRec->getNoWrapFlags()) 865 return false; 866 } 867 } 868 if (!isLoopInvariant(Step, LoopNest)) 869 return false; 870 Loops.set(mapSrcLoop(AddRec->getLoop())); 871 return checkSrcSubscript(Start, LoopNest, Loops); 872 } 873 874 875 876 // Examine the scev and return true iff it's linear. 877 // Collect any loops mentioned in the set of "Loops". 878 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 879 SmallBitVector &Loops) { 880 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 881 if (!AddRec) 882 return isLoopInvariant(Dst, LoopNest); 883 const SCEV *Start = AddRec->getStart(); 884 const SCEV *Step = AddRec->getStepRecurrence(*SE); 885 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 886 if (!isa<SCEVCouldNotCompute>(UB)) { 887 if (SE->getTypeSizeInBits(Start->getType()) < 888 SE->getTypeSizeInBits(UB->getType())) { 889 if (!AddRec->getNoWrapFlags()) 890 return false; 891 } 892 } 893 if (!isLoopInvariant(Step, LoopNest)) 894 return false; 895 Loops.set(mapDstLoop(AddRec->getLoop())); 896 return checkDstSubscript(Start, LoopNest, Loops); 897 } 898 899 900 // Examines the subscript pair (the Src and Dst SCEVs) 901 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 902 // Collects the associated loops in a set. 903 DependenceInfo::Subscript::ClassificationKind 904 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 905 const SCEV *Dst, const Loop *DstLoopNest, 906 SmallBitVector &Loops) { 907 SmallBitVector SrcLoops(MaxLevels + 1); 908 SmallBitVector DstLoops(MaxLevels + 1); 909 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 910 return Subscript::NonLinear; 911 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 912 return Subscript::NonLinear; 913 Loops = SrcLoops; 914 Loops |= DstLoops; 915 unsigned N = Loops.count(); 916 if (N == 0) 917 return Subscript::ZIV; 918 if (N == 1) 919 return Subscript::SIV; 920 if (N == 2 && (SrcLoops.count() == 0 || 921 DstLoops.count() == 0 || 922 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 923 return Subscript::RDIV; 924 return Subscript::MIV; 925 } 926 927 928 // A wrapper around SCEV::isKnownPredicate. 929 // Looks for cases where we're interested in comparing for equality. 930 // If both X and Y have been identically sign or zero extended, 931 // it strips off the (confusing) extensions before invoking 932 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 933 // will be similarly updated. 934 // 935 // If SCEV::isKnownPredicate can't prove the predicate, 936 // we try simple subtraction, which seems to help in some cases 937 // involving symbolics. 938 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 939 const SCEV *Y) const { 940 if (Pred == CmpInst::ICMP_EQ || 941 Pred == CmpInst::ICMP_NE) { 942 if ((isa<SCEVSignExtendExpr>(X) && 943 isa<SCEVSignExtendExpr>(Y)) || 944 (isa<SCEVZeroExtendExpr>(X) && 945 isa<SCEVZeroExtendExpr>(Y))) { 946 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 947 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 948 const SCEV *Xop = CX->getOperand(); 949 const SCEV *Yop = CY->getOperand(); 950 if (Xop->getType() == Yop->getType()) { 951 X = Xop; 952 Y = Yop; 953 } 954 } 955 } 956 if (SE->isKnownPredicate(Pred, X, Y)) 957 return true; 958 // If SE->isKnownPredicate can't prove the condition, 959 // we try the brute-force approach of subtracting 960 // and testing the difference. 961 // By testing with SE->isKnownPredicate first, we avoid 962 // the possibility of overflow when the arguments are constants. 963 const SCEV *Delta = SE->getMinusSCEV(X, Y); 964 switch (Pred) { 965 case CmpInst::ICMP_EQ: 966 return Delta->isZero(); 967 case CmpInst::ICMP_NE: 968 return SE->isKnownNonZero(Delta); 969 case CmpInst::ICMP_SGE: 970 return SE->isKnownNonNegative(Delta); 971 case CmpInst::ICMP_SLE: 972 return SE->isKnownNonPositive(Delta); 973 case CmpInst::ICMP_SGT: 974 return SE->isKnownPositive(Delta); 975 case CmpInst::ICMP_SLT: 976 return SE->isKnownNegative(Delta); 977 default: 978 llvm_unreachable("unexpected predicate in isKnownPredicate"); 979 } 980 } 981 982 983 // All subscripts are all the same type. 984 // Loop bound may be smaller (e.g., a char). 985 // Should zero extend loop bound, since it's always >= 0. 986 // This routine collects upper bound and extends or truncates if needed. 987 // Truncating is safe when subscripts are known not to wrap. Cases without 988 // nowrap flags should have been rejected earlier. 989 // Return null if no bound available. 990 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 991 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 992 const SCEV *UB = SE->getBackedgeTakenCount(L); 993 return SE->getTruncateOrZeroExtend(UB, T); 994 } 995 return nullptr; 996 } 997 998 999 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1000 // If the cast fails, returns NULL. 1001 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1002 Type *T) const { 1003 if (const SCEV *UB = collectUpperBound(L, T)) 1004 return dyn_cast<SCEVConstant>(UB); 1005 return nullptr; 1006 } 1007 1008 1009 // testZIV - 1010 // When we have a pair of subscripts of the form [c1] and [c2], 1011 // where c1 and c2 are both loop invariant, we attack it using 1012 // the ZIV test. Basically, we test by comparing the two values, 1013 // but there are actually three possible results: 1014 // 1) the values are equal, so there's a dependence 1015 // 2) the values are different, so there's no dependence 1016 // 3) the values might be equal, so we have to assume a dependence. 1017 // 1018 // Return true if dependence disproved. 1019 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1020 FullDependence &Result) const { 1021 DEBUG(dbgs() << " src = " << *Src << "\n"); 1022 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1023 ++ZIVapplications; 1024 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1025 DEBUG(dbgs() << " provably dependent\n"); 1026 return false; // provably dependent 1027 } 1028 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1029 DEBUG(dbgs() << " provably independent\n"); 1030 ++ZIVindependence; 1031 return true; // provably independent 1032 } 1033 DEBUG(dbgs() << " possibly dependent\n"); 1034 Result.Consistent = false; 1035 return false; // possibly dependent 1036 } 1037 1038 1039 // strongSIVtest - 1040 // From the paper, Practical Dependence Testing, Section 4.2.1 1041 // 1042 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1043 // where i is an induction variable, c1 and c2 are loop invariant, 1044 // and a is a constant, we can solve it exactly using the Strong SIV test. 1045 // 1046 // Can prove independence. Failing that, can compute distance (and direction). 1047 // In the presence of symbolic terms, we can sometimes make progress. 1048 // 1049 // If there's a dependence, 1050 // 1051 // c1 + a*i = c2 + a*i' 1052 // 1053 // The dependence distance is 1054 // 1055 // d = i' - i = (c1 - c2)/a 1056 // 1057 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1058 // loop's upper bound. If a dependence exists, the dependence direction is 1059 // defined as 1060 // 1061 // { < if d > 0 1062 // direction = { = if d = 0 1063 // { > if d < 0 1064 // 1065 // Return true if dependence disproved. 1066 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1067 const SCEV *DstConst, const Loop *CurLoop, 1068 unsigned Level, FullDependence &Result, 1069 Constraint &NewConstraint) const { 1070 DEBUG(dbgs() << "\tStrong SIV test\n"); 1071 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1072 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1073 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1074 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1075 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1076 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1077 ++StrongSIVapplications; 1078 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1079 Level--; 1080 1081 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1082 DEBUG(dbgs() << "\t Delta = " << *Delta); 1083 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1084 1085 // check that |Delta| < iteration count 1086 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1087 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1088 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1089 const SCEV *AbsDelta = 1090 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1091 const SCEV *AbsCoeff = 1092 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1093 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1094 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1095 // Distance greater than trip count - no dependence 1096 ++StrongSIVindependence; 1097 ++StrongSIVsuccesses; 1098 return true; 1099 } 1100 } 1101 1102 // Can we compute distance? 1103 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1104 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1105 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1106 APInt Distance = ConstDelta; // these need to be initialized 1107 APInt Remainder = ConstDelta; 1108 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1109 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1110 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1111 // Make sure Coeff divides Delta exactly 1112 if (Remainder != 0) { 1113 // Coeff doesn't divide Distance, no dependence 1114 ++StrongSIVindependence; 1115 ++StrongSIVsuccesses; 1116 return true; 1117 } 1118 Result.DV[Level].Distance = SE->getConstant(Distance); 1119 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1120 if (Distance.sgt(0)) 1121 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1122 else if (Distance.slt(0)) 1123 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1124 else 1125 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1126 ++StrongSIVsuccesses; 1127 } 1128 else if (Delta->isZero()) { 1129 // since 0/X == 0 1130 Result.DV[Level].Distance = Delta; 1131 NewConstraint.setDistance(Delta, CurLoop); 1132 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1133 ++StrongSIVsuccesses; 1134 } 1135 else { 1136 if (Coeff->isOne()) { 1137 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1138 Result.DV[Level].Distance = Delta; // since X/1 == X 1139 NewConstraint.setDistance(Delta, CurLoop); 1140 } 1141 else { 1142 Result.Consistent = false; 1143 NewConstraint.setLine(Coeff, 1144 SE->getNegativeSCEV(Coeff), 1145 SE->getNegativeSCEV(Delta), CurLoop); 1146 } 1147 1148 // maybe we can get a useful direction 1149 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1150 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1151 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1152 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1153 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1154 // The double negatives above are confusing. 1155 // It helps to read !SE->isKnownNonZero(Delta) 1156 // as "Delta might be Zero" 1157 unsigned NewDirection = Dependence::DVEntry::NONE; 1158 if ((DeltaMaybePositive && CoeffMaybePositive) || 1159 (DeltaMaybeNegative && CoeffMaybeNegative)) 1160 NewDirection = Dependence::DVEntry::LT; 1161 if (DeltaMaybeZero) 1162 NewDirection |= Dependence::DVEntry::EQ; 1163 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1164 (DeltaMaybePositive && CoeffMaybeNegative)) 1165 NewDirection |= Dependence::DVEntry::GT; 1166 if (NewDirection < Result.DV[Level].Direction) 1167 ++StrongSIVsuccesses; 1168 Result.DV[Level].Direction &= NewDirection; 1169 } 1170 return false; 1171 } 1172 1173 1174 // weakCrossingSIVtest - 1175 // From the paper, Practical Dependence Testing, Section 4.2.2 1176 // 1177 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1178 // where i is an induction variable, c1 and c2 are loop invariant, 1179 // and a is a constant, we can solve it exactly using the 1180 // Weak-Crossing SIV test. 1181 // 1182 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1183 // the two lines, where i = i', yielding 1184 // 1185 // c1 + a*i = c2 - a*i 1186 // 2a*i = c2 - c1 1187 // i = (c2 - c1)/2a 1188 // 1189 // If i < 0, there is no dependence. 1190 // If i > upperbound, there is no dependence. 1191 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1192 // If i = upperbound, there's a dependence with distance = 0. 1193 // If i is integral, there's a dependence (all directions). 1194 // If the non-integer part = 1/2, there's a dependence (<> directions). 1195 // Otherwise, there's no dependence. 1196 // 1197 // Can prove independence. Failing that, 1198 // can sometimes refine the directions. 1199 // Can determine iteration for splitting. 1200 // 1201 // Return true if dependence disproved. 1202 bool DependenceInfo::weakCrossingSIVtest( 1203 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1204 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1205 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1206 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1207 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1208 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1209 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1210 ++WeakCrossingSIVapplications; 1211 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1212 Level--; 1213 Result.Consistent = false; 1214 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1215 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1216 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1217 if (Delta->isZero()) { 1218 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1219 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1220 ++WeakCrossingSIVsuccesses; 1221 if (!Result.DV[Level].Direction) { 1222 ++WeakCrossingSIVindependence; 1223 return true; 1224 } 1225 Result.DV[Level].Distance = Delta; // = 0 1226 return false; 1227 } 1228 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1229 if (!ConstCoeff) 1230 return false; 1231 1232 Result.DV[Level].Splitable = true; 1233 if (SE->isKnownNegative(ConstCoeff)) { 1234 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1235 assert(ConstCoeff && 1236 "dynamic cast of negative of ConstCoeff should yield constant"); 1237 Delta = SE->getNegativeSCEV(Delta); 1238 } 1239 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1240 1241 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1242 SplitIter = SE->getUDivExpr( 1243 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1244 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1245 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1246 1247 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1248 if (!ConstDelta) 1249 return false; 1250 1251 // We're certain that ConstCoeff > 0; therefore, 1252 // if Delta < 0, then no dependence. 1253 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1254 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1255 if (SE->isKnownNegative(Delta)) { 1256 // No dependence, Delta < 0 1257 ++WeakCrossingSIVindependence; 1258 ++WeakCrossingSIVsuccesses; 1259 return true; 1260 } 1261 1262 // We're certain that Delta > 0 and ConstCoeff > 0. 1263 // Check Delta/(2*ConstCoeff) against upper loop bound 1264 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1265 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1266 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1267 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1268 ConstantTwo); 1269 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1270 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1271 // Delta too big, no dependence 1272 ++WeakCrossingSIVindependence; 1273 ++WeakCrossingSIVsuccesses; 1274 return true; 1275 } 1276 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1277 // i = i' = UB 1278 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1279 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1280 ++WeakCrossingSIVsuccesses; 1281 if (!Result.DV[Level].Direction) { 1282 ++WeakCrossingSIVindependence; 1283 return true; 1284 } 1285 Result.DV[Level].Splitable = false; 1286 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1287 return false; 1288 } 1289 } 1290 1291 // check that Coeff divides Delta 1292 APInt APDelta = ConstDelta->getAPInt(); 1293 APInt APCoeff = ConstCoeff->getAPInt(); 1294 APInt Distance = APDelta; // these need to be initialzed 1295 APInt Remainder = APDelta; 1296 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1297 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1298 if (Remainder != 0) { 1299 // Coeff doesn't divide Delta, no dependence 1300 ++WeakCrossingSIVindependence; 1301 ++WeakCrossingSIVsuccesses; 1302 return true; 1303 } 1304 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1305 1306 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1307 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1308 Remainder = Distance.srem(Two); 1309 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1310 if (Remainder != 0) { 1311 // Equal direction isn't possible 1312 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1313 ++WeakCrossingSIVsuccesses; 1314 } 1315 return false; 1316 } 1317 1318 1319 // Kirch's algorithm, from 1320 // 1321 // Optimizing Supercompilers for Supercomputers 1322 // Michael Wolfe 1323 // MIT Press, 1989 1324 // 1325 // Program 2.1, page 29. 1326 // Computes the GCD of AM and BM. 1327 // Also finds a solution to the equation ax - by = gcd(a, b). 1328 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1329 static 1330 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta, 1331 APInt &G, APInt &X, APInt &Y) { 1332 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1333 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1334 APInt G0 = AM.abs(); 1335 APInt G1 = BM.abs(); 1336 APInt Q = G0; // these need to be initialized 1337 APInt R = G0; 1338 APInt::sdivrem(G0, G1, Q, R); 1339 while (R != 0) { 1340 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1341 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1342 G0 = G1; G1 = R; 1343 APInt::sdivrem(G0, G1, Q, R); 1344 } 1345 G = G1; 1346 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1347 X = AM.slt(0) ? -A1 : A1; 1348 Y = BM.slt(0) ? B1 : -B1; 1349 1350 // make sure gcd divides Delta 1351 R = Delta.srem(G); 1352 if (R != 0) 1353 return true; // gcd doesn't divide Delta, no dependence 1354 Q = Delta.sdiv(G); 1355 X *= Q; 1356 Y *= Q; 1357 return false; 1358 } 1359 1360 1361 static 1362 APInt floorOfQuotient(APInt A, APInt B) { 1363 APInt Q = A; // these need to be initialized 1364 APInt R = A; 1365 APInt::sdivrem(A, B, Q, R); 1366 if (R == 0) 1367 return Q; 1368 if ((A.sgt(0) && B.sgt(0)) || 1369 (A.slt(0) && B.slt(0))) 1370 return Q; 1371 else 1372 return Q - 1; 1373 } 1374 1375 1376 static 1377 APInt ceilingOfQuotient(APInt A, APInt B) { 1378 APInt Q = A; // these need to be initialized 1379 APInt R = A; 1380 APInt::sdivrem(A, B, Q, R); 1381 if (R == 0) 1382 return Q; 1383 if ((A.sgt(0) && B.sgt(0)) || 1384 (A.slt(0) && B.slt(0))) 1385 return Q + 1; 1386 else 1387 return Q; 1388 } 1389 1390 1391 static 1392 APInt maxAPInt(APInt A, APInt B) { 1393 return A.sgt(B) ? A : B; 1394 } 1395 1396 1397 static 1398 APInt minAPInt(APInt A, APInt B) { 1399 return A.slt(B) ? A : B; 1400 } 1401 1402 1403 // exactSIVtest - 1404 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1405 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1406 // and a2 are constant, we can solve it exactly using an algorithm developed 1407 // by Banerjee and Wolfe. See Section 2.5.3 in 1408 // 1409 // Optimizing Supercompilers for Supercomputers 1410 // Michael Wolfe 1411 // MIT Press, 1989 1412 // 1413 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1414 // so use them if possible. They're also a bit better with symbolics and, 1415 // in the case of the strong SIV test, can compute Distances. 1416 // 1417 // Return true if dependence disproved. 1418 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1419 const SCEV *SrcConst, const SCEV *DstConst, 1420 const Loop *CurLoop, unsigned Level, 1421 FullDependence &Result, 1422 Constraint &NewConstraint) const { 1423 DEBUG(dbgs() << "\tExact SIV test\n"); 1424 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1425 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1426 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1427 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1428 ++ExactSIVapplications; 1429 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1430 Level--; 1431 Result.Consistent = false; 1432 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1433 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1434 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1435 Delta, CurLoop); 1436 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1437 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1438 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1439 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1440 return false; 1441 1442 // find gcd 1443 APInt G, X, Y; 1444 APInt AM = ConstSrcCoeff->getAPInt(); 1445 APInt BM = ConstDstCoeff->getAPInt(); 1446 unsigned Bits = AM.getBitWidth(); 1447 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1448 // gcd doesn't divide Delta, no dependence 1449 ++ExactSIVindependence; 1450 ++ExactSIVsuccesses; 1451 return true; 1452 } 1453 1454 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1455 1456 // since SCEV construction normalizes, LM = 0 1457 APInt UM(Bits, 1, true); 1458 bool UMvalid = false; 1459 // UM is perhaps unavailable, let's check 1460 if (const SCEVConstant *CUB = 1461 collectConstantUpperBound(CurLoop, Delta->getType())) { 1462 UM = CUB->getAPInt(); 1463 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1464 UMvalid = true; 1465 } 1466 1467 APInt TU(APInt::getSignedMaxValue(Bits)); 1468 APInt TL(APInt::getSignedMinValue(Bits)); 1469 1470 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1471 APInt TMUL = BM.sdiv(G); 1472 if (TMUL.sgt(0)) { 1473 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1474 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1475 if (UMvalid) { 1476 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1477 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1478 } 1479 } 1480 else { 1481 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1482 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1483 if (UMvalid) { 1484 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1485 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1486 } 1487 } 1488 1489 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1490 TMUL = AM.sdiv(G); 1491 if (TMUL.sgt(0)) { 1492 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1493 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1494 if (UMvalid) { 1495 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1496 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1497 } 1498 } 1499 else { 1500 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1501 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1502 if (UMvalid) { 1503 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1504 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1505 } 1506 } 1507 if (TL.sgt(TU)) { 1508 ++ExactSIVindependence; 1509 ++ExactSIVsuccesses; 1510 return true; 1511 } 1512 1513 // explore directions 1514 unsigned NewDirection = Dependence::DVEntry::NONE; 1515 1516 // less than 1517 APInt SaveTU(TU); // save these 1518 APInt SaveTL(TL); 1519 DEBUG(dbgs() << "\t exploring LT direction\n"); 1520 TMUL = AM - BM; 1521 if (TMUL.sgt(0)) { 1522 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1523 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1524 } 1525 else { 1526 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1527 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1528 } 1529 if (TL.sle(TU)) { 1530 NewDirection |= Dependence::DVEntry::LT; 1531 ++ExactSIVsuccesses; 1532 } 1533 1534 // equal 1535 TU = SaveTU; // restore 1536 TL = SaveTL; 1537 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1538 if (TMUL.sgt(0)) { 1539 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1540 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1541 } 1542 else { 1543 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1544 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1545 } 1546 TMUL = BM - AM; 1547 if (TMUL.sgt(0)) { 1548 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1549 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1550 } 1551 else { 1552 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1553 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1554 } 1555 if (TL.sle(TU)) { 1556 NewDirection |= Dependence::DVEntry::EQ; 1557 ++ExactSIVsuccesses; 1558 } 1559 1560 // greater than 1561 TU = SaveTU; // restore 1562 TL = SaveTL; 1563 DEBUG(dbgs() << "\t exploring GT direction\n"); 1564 if (TMUL.sgt(0)) { 1565 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1566 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1567 } 1568 else { 1569 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1570 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1571 } 1572 if (TL.sle(TU)) { 1573 NewDirection |= Dependence::DVEntry::GT; 1574 ++ExactSIVsuccesses; 1575 } 1576 1577 // finished 1578 Result.DV[Level].Direction &= NewDirection; 1579 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1580 ++ExactSIVindependence; 1581 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1582 } 1583 1584 1585 1586 // Return true if the divisor evenly divides the dividend. 1587 static 1588 bool isRemainderZero(const SCEVConstant *Dividend, 1589 const SCEVConstant *Divisor) { 1590 APInt ConstDividend = Dividend->getAPInt(); 1591 APInt ConstDivisor = Divisor->getAPInt(); 1592 return ConstDividend.srem(ConstDivisor) == 0; 1593 } 1594 1595 1596 // weakZeroSrcSIVtest - 1597 // From the paper, Practical Dependence Testing, Section 4.2.2 1598 // 1599 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1600 // where i is an induction variable, c1 and c2 are loop invariant, 1601 // and a is a constant, we can solve it exactly using the 1602 // Weak-Zero SIV test. 1603 // 1604 // Given 1605 // 1606 // c1 = c2 + a*i 1607 // 1608 // we get 1609 // 1610 // (c1 - c2)/a = i 1611 // 1612 // If i is not an integer, there's no dependence. 1613 // If i < 0 or > UB, there's no dependence. 1614 // If i = 0, the direction is <= and peeling the 1615 // 1st iteration will break the dependence. 1616 // If i = UB, the direction is >= and peeling the 1617 // last iteration will break the dependence. 1618 // Otherwise, the direction is *. 1619 // 1620 // Can prove independence. Failing that, we can sometimes refine 1621 // the directions. Can sometimes show that first or last 1622 // iteration carries all the dependences (so worth peeling). 1623 // 1624 // (see also weakZeroDstSIVtest) 1625 // 1626 // Return true if dependence disproved. 1627 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1628 const SCEV *SrcConst, 1629 const SCEV *DstConst, 1630 const Loop *CurLoop, unsigned Level, 1631 FullDependence &Result, 1632 Constraint &NewConstraint) const { 1633 // For the WeakSIV test, it's possible the loop isn't common to 1634 // the Src and Dst loops. If it isn't, then there's no need to 1635 // record a direction. 1636 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1637 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1638 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1639 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1640 ++WeakZeroSIVapplications; 1641 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1642 Level--; 1643 Result.Consistent = false; 1644 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1645 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1646 CurLoop); 1647 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1648 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1649 if (Level < CommonLevels) { 1650 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1651 Result.DV[Level].PeelFirst = true; 1652 ++WeakZeroSIVsuccesses; 1653 } 1654 return false; // dependences caused by first iteration 1655 } 1656 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1657 if (!ConstCoeff) 1658 return false; 1659 const SCEV *AbsCoeff = 1660 SE->isKnownNegative(ConstCoeff) ? 1661 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1662 const SCEV *NewDelta = 1663 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1664 1665 // check that Delta/SrcCoeff < iteration count 1666 // really check NewDelta < count*AbsCoeff 1667 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1668 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1669 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1670 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1671 ++WeakZeroSIVindependence; 1672 ++WeakZeroSIVsuccesses; 1673 return true; 1674 } 1675 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1676 // dependences caused by last iteration 1677 if (Level < CommonLevels) { 1678 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1679 Result.DV[Level].PeelLast = true; 1680 ++WeakZeroSIVsuccesses; 1681 } 1682 return false; 1683 } 1684 } 1685 1686 // check that Delta/SrcCoeff >= 0 1687 // really check that NewDelta >= 0 1688 if (SE->isKnownNegative(NewDelta)) { 1689 // No dependence, newDelta < 0 1690 ++WeakZeroSIVindependence; 1691 ++WeakZeroSIVsuccesses; 1692 return true; 1693 } 1694 1695 // if SrcCoeff doesn't divide Delta, then no dependence 1696 if (isa<SCEVConstant>(Delta) && 1697 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1698 ++WeakZeroSIVindependence; 1699 ++WeakZeroSIVsuccesses; 1700 return true; 1701 } 1702 return false; 1703 } 1704 1705 1706 // weakZeroDstSIVtest - 1707 // From the paper, Practical Dependence Testing, Section 4.2.2 1708 // 1709 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1710 // where i is an induction variable, c1 and c2 are loop invariant, 1711 // and a is a constant, we can solve it exactly using the 1712 // Weak-Zero SIV test. 1713 // 1714 // Given 1715 // 1716 // c1 + a*i = c2 1717 // 1718 // we get 1719 // 1720 // i = (c2 - c1)/a 1721 // 1722 // If i is not an integer, there's no dependence. 1723 // If i < 0 or > UB, there's no dependence. 1724 // If i = 0, the direction is <= and peeling the 1725 // 1st iteration will break the dependence. 1726 // If i = UB, the direction is >= and peeling the 1727 // last iteration will break the dependence. 1728 // Otherwise, the direction is *. 1729 // 1730 // Can prove independence. Failing that, we can sometimes refine 1731 // the directions. Can sometimes show that first or last 1732 // iteration carries all the dependences (so worth peeling). 1733 // 1734 // (see also weakZeroSrcSIVtest) 1735 // 1736 // Return true if dependence disproved. 1737 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1738 const SCEV *SrcConst, 1739 const SCEV *DstConst, 1740 const Loop *CurLoop, unsigned Level, 1741 FullDependence &Result, 1742 Constraint &NewConstraint) const { 1743 // For the WeakSIV test, it's possible the loop isn't common to the 1744 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1745 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1746 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1747 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1748 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1749 ++WeakZeroSIVapplications; 1750 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1751 Level--; 1752 Result.Consistent = false; 1753 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1754 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1755 CurLoop); 1756 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1757 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1758 if (Level < CommonLevels) { 1759 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1760 Result.DV[Level].PeelFirst = true; 1761 ++WeakZeroSIVsuccesses; 1762 } 1763 return false; // dependences caused by first iteration 1764 } 1765 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1766 if (!ConstCoeff) 1767 return false; 1768 const SCEV *AbsCoeff = 1769 SE->isKnownNegative(ConstCoeff) ? 1770 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1771 const SCEV *NewDelta = 1772 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1773 1774 // check that Delta/SrcCoeff < iteration count 1775 // really check NewDelta < count*AbsCoeff 1776 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1777 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1778 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1779 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1780 ++WeakZeroSIVindependence; 1781 ++WeakZeroSIVsuccesses; 1782 return true; 1783 } 1784 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1785 // dependences caused by last iteration 1786 if (Level < CommonLevels) { 1787 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1788 Result.DV[Level].PeelLast = true; 1789 ++WeakZeroSIVsuccesses; 1790 } 1791 return false; 1792 } 1793 } 1794 1795 // check that Delta/SrcCoeff >= 0 1796 // really check that NewDelta >= 0 1797 if (SE->isKnownNegative(NewDelta)) { 1798 // No dependence, newDelta < 0 1799 ++WeakZeroSIVindependence; 1800 ++WeakZeroSIVsuccesses; 1801 return true; 1802 } 1803 1804 // if SrcCoeff doesn't divide Delta, then no dependence 1805 if (isa<SCEVConstant>(Delta) && 1806 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1807 ++WeakZeroSIVindependence; 1808 ++WeakZeroSIVsuccesses; 1809 return true; 1810 } 1811 return false; 1812 } 1813 1814 1815 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1816 // Things of the form [c1 + a*i] and [c2 + b*j], 1817 // where i and j are induction variable, c1 and c2 are loop invariant, 1818 // and a and b are constants. 1819 // Returns true if any possible dependence is disproved. 1820 // Marks the result as inconsistent. 1821 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1822 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1823 const SCEV *SrcConst, const SCEV *DstConst, 1824 const Loop *SrcLoop, const Loop *DstLoop, 1825 FullDependence &Result) const { 1826 DEBUG(dbgs() << "\tExact RDIV test\n"); 1827 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1828 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1829 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1830 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1831 ++ExactRDIVapplications; 1832 Result.Consistent = false; 1833 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1834 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1835 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1836 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1837 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1838 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1839 return false; 1840 1841 // find gcd 1842 APInt G, X, Y; 1843 APInt AM = ConstSrcCoeff->getAPInt(); 1844 APInt BM = ConstDstCoeff->getAPInt(); 1845 unsigned Bits = AM.getBitWidth(); 1846 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1847 // gcd doesn't divide Delta, no dependence 1848 ++ExactRDIVindependence; 1849 return true; 1850 } 1851 1852 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1853 1854 // since SCEV construction seems to normalize, LM = 0 1855 APInt SrcUM(Bits, 1, true); 1856 bool SrcUMvalid = false; 1857 // SrcUM is perhaps unavailable, let's check 1858 if (const SCEVConstant *UpperBound = 1859 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1860 SrcUM = UpperBound->getAPInt(); 1861 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1862 SrcUMvalid = true; 1863 } 1864 1865 APInt DstUM(Bits, 1, true); 1866 bool DstUMvalid = false; 1867 // UM is perhaps unavailable, let's check 1868 if (const SCEVConstant *UpperBound = 1869 collectConstantUpperBound(DstLoop, Delta->getType())) { 1870 DstUM = UpperBound->getAPInt(); 1871 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1872 DstUMvalid = true; 1873 } 1874 1875 APInt TU(APInt::getSignedMaxValue(Bits)); 1876 APInt TL(APInt::getSignedMinValue(Bits)); 1877 1878 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1879 APInt TMUL = BM.sdiv(G); 1880 if (TMUL.sgt(0)) { 1881 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1882 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1883 if (SrcUMvalid) { 1884 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1885 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1886 } 1887 } 1888 else { 1889 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1890 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1891 if (SrcUMvalid) { 1892 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1893 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1894 } 1895 } 1896 1897 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1898 TMUL = AM.sdiv(G); 1899 if (TMUL.sgt(0)) { 1900 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1901 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1902 if (DstUMvalid) { 1903 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1904 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1905 } 1906 } 1907 else { 1908 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1909 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1910 if (DstUMvalid) { 1911 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1912 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1913 } 1914 } 1915 if (TL.sgt(TU)) 1916 ++ExactRDIVindependence; 1917 return TL.sgt(TU); 1918 } 1919 1920 1921 // symbolicRDIVtest - 1922 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1923 // introduce a special case of Banerjee's Inequalities (also called the 1924 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1925 // particularly cases with symbolics. Since it's only able to disprove 1926 // dependence (not compute distances or directions), we'll use it as a 1927 // fall back for the other tests. 1928 // 1929 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1930 // where i and j are induction variables and c1 and c2 are loop invariants, 1931 // we can use the symbolic tests to disprove some dependences, serving as a 1932 // backup for the RDIV test. Note that i and j can be the same variable, 1933 // letting this test serve as a backup for the various SIV tests. 1934 // 1935 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1936 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1937 // loop bounds for the i and j loops, respectively. So, ... 1938 // 1939 // c1 + a1*i = c2 + a2*j 1940 // a1*i - a2*j = c2 - c1 1941 // 1942 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1943 // range of the maximum and minimum possible values of a1*i - a2*j. 1944 // Considering the signs of a1 and a2, we have 4 possible cases: 1945 // 1946 // 1) If a1 >= 0 and a2 >= 0, then 1947 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1948 // -a2*N2 <= c2 - c1 <= a1*N1 1949 // 1950 // 2) If a1 >= 0 and a2 <= 0, then 1951 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1952 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1953 // 1954 // 3) If a1 <= 0 and a2 >= 0, then 1955 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1956 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1957 // 1958 // 4) If a1 <= 0 and a2 <= 0, then 1959 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1960 // a1*N1 <= c2 - c1 <= -a2*N2 1961 // 1962 // return true if dependence disproved 1963 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 1964 const SCEV *C1, const SCEV *C2, 1965 const Loop *Loop1, 1966 const Loop *Loop2) const { 1967 ++SymbolicRDIVapplications; 1968 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1969 DEBUG(dbgs() << "\t A1 = " << *A1); 1970 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1971 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1972 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1973 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1974 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1975 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1976 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1977 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1978 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1979 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1980 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1981 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1982 if (SE->isKnownNonNegative(A1)) { 1983 if (SE->isKnownNonNegative(A2)) { 1984 // A1 >= 0 && A2 >= 0 1985 if (N1) { 1986 // make sure that c2 - c1 <= a1*N1 1987 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1988 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1989 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1990 ++SymbolicRDIVindependence; 1991 return true; 1992 } 1993 } 1994 if (N2) { 1995 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1996 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1997 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1998 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1999 ++SymbolicRDIVindependence; 2000 return true; 2001 } 2002 } 2003 } 2004 else if (SE->isKnownNonPositive(A2)) { 2005 // a1 >= 0 && a2 <= 0 2006 if (N1 && N2) { 2007 // make sure that c2 - c1 <= a1*N1 - a2*N2 2008 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2009 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2010 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2011 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2012 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2013 ++SymbolicRDIVindependence; 2014 return true; 2015 } 2016 } 2017 // make sure that 0 <= c2 - c1 2018 if (SE->isKnownNegative(C2_C1)) { 2019 ++SymbolicRDIVindependence; 2020 return true; 2021 } 2022 } 2023 } 2024 else if (SE->isKnownNonPositive(A1)) { 2025 if (SE->isKnownNonNegative(A2)) { 2026 // a1 <= 0 && a2 >= 0 2027 if (N1 && N2) { 2028 // make sure that a1*N1 - a2*N2 <= c2 - c1 2029 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2030 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2031 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2032 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2033 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2034 ++SymbolicRDIVindependence; 2035 return true; 2036 } 2037 } 2038 // make sure that c2 - c1 <= 0 2039 if (SE->isKnownPositive(C2_C1)) { 2040 ++SymbolicRDIVindependence; 2041 return true; 2042 } 2043 } 2044 else if (SE->isKnownNonPositive(A2)) { 2045 // a1 <= 0 && a2 <= 0 2046 if (N1) { 2047 // make sure that a1*N1 <= c2 - c1 2048 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2049 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2050 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2051 ++SymbolicRDIVindependence; 2052 return true; 2053 } 2054 } 2055 if (N2) { 2056 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2057 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2058 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2059 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2060 ++SymbolicRDIVindependence; 2061 return true; 2062 } 2063 } 2064 } 2065 } 2066 return false; 2067 } 2068 2069 2070 // testSIV - 2071 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2072 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2073 // a2 are constant, we attack it with an SIV test. While they can all be 2074 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2075 // they apply; they're cheaper and sometimes more precise. 2076 // 2077 // Return true if dependence disproved. 2078 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2079 FullDependence &Result, Constraint &NewConstraint, 2080 const SCEV *&SplitIter) const { 2081 DEBUG(dbgs() << " src = " << *Src << "\n"); 2082 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2083 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2084 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2085 if (SrcAddRec && DstAddRec) { 2086 const SCEV *SrcConst = SrcAddRec->getStart(); 2087 const SCEV *DstConst = DstAddRec->getStart(); 2088 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2089 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2090 const Loop *CurLoop = SrcAddRec->getLoop(); 2091 assert(CurLoop == DstAddRec->getLoop() && 2092 "both loops in SIV should be same"); 2093 Level = mapSrcLoop(CurLoop); 2094 bool disproven; 2095 if (SrcCoeff == DstCoeff) 2096 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2097 Level, Result, NewConstraint); 2098 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2099 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2100 Level, Result, NewConstraint, SplitIter); 2101 else 2102 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2103 Level, Result, NewConstraint); 2104 return disproven || 2105 gcdMIVtest(Src, Dst, Result) || 2106 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2107 } 2108 if (SrcAddRec) { 2109 const SCEV *SrcConst = SrcAddRec->getStart(); 2110 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2111 const SCEV *DstConst = Dst; 2112 const Loop *CurLoop = SrcAddRec->getLoop(); 2113 Level = mapSrcLoop(CurLoop); 2114 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2115 Level, Result, NewConstraint) || 2116 gcdMIVtest(Src, Dst, Result); 2117 } 2118 if (DstAddRec) { 2119 const SCEV *DstConst = DstAddRec->getStart(); 2120 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2121 const SCEV *SrcConst = Src; 2122 const Loop *CurLoop = DstAddRec->getLoop(); 2123 Level = mapDstLoop(CurLoop); 2124 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2125 CurLoop, Level, Result, NewConstraint) || 2126 gcdMIVtest(Src, Dst, Result); 2127 } 2128 llvm_unreachable("SIV test expected at least one AddRec"); 2129 return false; 2130 } 2131 2132 2133 // testRDIV - 2134 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2135 // where i and j are induction variables, c1 and c2 are loop invariant, 2136 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2137 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2138 // It doesn't make sense to talk about distance or direction in this case, 2139 // so there's no point in making special versions of the Strong SIV test or 2140 // the Weak-crossing SIV test. 2141 // 2142 // With minor algebra, this test can also be used for things like 2143 // [c1 + a1*i + a2*j][c2]. 2144 // 2145 // Return true if dependence disproved. 2146 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2147 FullDependence &Result) const { 2148 // we have 3 possible situations here: 2149 // 1) [a*i + b] and [c*j + d] 2150 // 2) [a*i + c*j + b] and [d] 2151 // 3) [b] and [a*i + c*j + d] 2152 // We need to find what we've got and get organized 2153 2154 const SCEV *SrcConst, *DstConst; 2155 const SCEV *SrcCoeff, *DstCoeff; 2156 const Loop *SrcLoop, *DstLoop; 2157 2158 DEBUG(dbgs() << " src = " << *Src << "\n"); 2159 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2160 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2161 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2162 if (SrcAddRec && DstAddRec) { 2163 SrcConst = SrcAddRec->getStart(); 2164 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2165 SrcLoop = SrcAddRec->getLoop(); 2166 DstConst = DstAddRec->getStart(); 2167 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2168 DstLoop = DstAddRec->getLoop(); 2169 } 2170 else if (SrcAddRec) { 2171 if (const SCEVAddRecExpr *tmpAddRec = 2172 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2173 SrcConst = tmpAddRec->getStart(); 2174 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2175 SrcLoop = tmpAddRec->getLoop(); 2176 DstConst = Dst; 2177 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2178 DstLoop = SrcAddRec->getLoop(); 2179 } 2180 else 2181 llvm_unreachable("RDIV reached by surprising SCEVs"); 2182 } 2183 else if (DstAddRec) { 2184 if (const SCEVAddRecExpr *tmpAddRec = 2185 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2186 DstConst = tmpAddRec->getStart(); 2187 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2188 DstLoop = tmpAddRec->getLoop(); 2189 SrcConst = Src; 2190 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2191 SrcLoop = DstAddRec->getLoop(); 2192 } 2193 else 2194 llvm_unreachable("RDIV reached by surprising SCEVs"); 2195 } 2196 else 2197 llvm_unreachable("RDIV expected at least one AddRec"); 2198 return exactRDIVtest(SrcCoeff, DstCoeff, 2199 SrcConst, DstConst, 2200 SrcLoop, DstLoop, 2201 Result) || 2202 gcdMIVtest(Src, Dst, Result) || 2203 symbolicRDIVtest(SrcCoeff, DstCoeff, 2204 SrcConst, DstConst, 2205 SrcLoop, DstLoop); 2206 } 2207 2208 2209 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2210 // Return true if dependence disproved. 2211 // Can sometimes refine direction vectors. 2212 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2213 const SmallBitVector &Loops, 2214 FullDependence &Result) const { 2215 DEBUG(dbgs() << " src = " << *Src << "\n"); 2216 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2217 Result.Consistent = false; 2218 return gcdMIVtest(Src, Dst, Result) || 2219 banerjeeMIVtest(Src, Dst, Loops, Result); 2220 } 2221 2222 2223 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2224 // in this case 10. If there is no constant part, returns NULL. 2225 static 2226 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2227 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2228 return Constant; 2229 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2230 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2231 return Constant; 2232 return nullptr; 2233 } 2234 2235 2236 //===----------------------------------------------------------------------===// 2237 // gcdMIVtest - 2238 // Tests an MIV subscript pair for dependence. 2239 // Returns true if any possible dependence is disproved. 2240 // Marks the result as inconsistent. 2241 // Can sometimes disprove the equal direction for 1 or more loops, 2242 // as discussed in Michael Wolfe's book, 2243 // High Performance Compilers for Parallel Computing, page 235. 2244 // 2245 // We spend some effort (code!) to handle cases like 2246 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2247 // but M and N are just loop-invariant variables. 2248 // This should help us handle linearized subscripts; 2249 // also makes this test a useful backup to the various SIV tests. 2250 // 2251 // It occurs to me that the presence of loop-invariant variables 2252 // changes the nature of the test from "greatest common divisor" 2253 // to "a common divisor". 2254 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2255 FullDependence &Result) const { 2256 DEBUG(dbgs() << "starting gcd\n"); 2257 ++GCDapplications; 2258 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2259 APInt RunningGCD = APInt::getNullValue(BitWidth); 2260 2261 // Examine Src coefficients. 2262 // Compute running GCD and record source constant. 2263 // Because we're looking for the constant at the end of the chain, 2264 // we can't quit the loop just because the GCD == 1. 2265 const SCEV *Coefficients = Src; 2266 while (const SCEVAddRecExpr *AddRec = 2267 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2268 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2269 // If the coefficient is the product of a constant and other stuff, 2270 // we can use the constant in the GCD computation. 2271 const auto *Constant = getConstantPart(Coeff); 2272 if (!Constant) 2273 return false; 2274 APInt ConstCoeff = Constant->getAPInt(); 2275 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2276 Coefficients = AddRec->getStart(); 2277 } 2278 const SCEV *SrcConst = Coefficients; 2279 2280 // Examine Dst coefficients. 2281 // Compute running GCD and record destination constant. 2282 // Because we're looking for the constant at the end of the chain, 2283 // we can't quit the loop just because the GCD == 1. 2284 Coefficients = Dst; 2285 while (const SCEVAddRecExpr *AddRec = 2286 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2287 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2288 // If the coefficient is the product of a constant and other stuff, 2289 // we can use the constant in the GCD computation. 2290 const auto *Constant = getConstantPart(Coeff); 2291 if (!Constant) 2292 return false; 2293 APInt ConstCoeff = Constant->getAPInt(); 2294 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2295 Coefficients = AddRec->getStart(); 2296 } 2297 const SCEV *DstConst = Coefficients; 2298 2299 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2300 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2301 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2302 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2303 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2304 // If Delta is a sum of products, we may be able to make further progress. 2305 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2306 const SCEV *Operand = Sum->getOperand(Op); 2307 if (isa<SCEVConstant>(Operand)) { 2308 assert(!Constant && "Surprised to find multiple constants"); 2309 Constant = cast<SCEVConstant>(Operand); 2310 } 2311 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2312 // Search for constant operand to participate in GCD; 2313 // If none found; return false. 2314 const SCEVConstant *ConstOp = getConstantPart(Product); 2315 if (!ConstOp) 2316 return false; 2317 APInt ConstOpValue = ConstOp->getAPInt(); 2318 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2319 ConstOpValue.abs()); 2320 } 2321 else 2322 return false; 2323 } 2324 } 2325 if (!Constant) 2326 return false; 2327 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2328 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2329 if (ConstDelta == 0) 2330 return false; 2331 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2332 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2333 APInt Remainder = ConstDelta.srem(RunningGCD); 2334 if (Remainder != 0) { 2335 ++GCDindependence; 2336 return true; 2337 } 2338 2339 // Try to disprove equal directions. 2340 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2341 // the code above can't disprove the dependence because the GCD = 1. 2342 // So we consider what happen if i = i' and what happens if j = j'. 2343 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2344 // which is infeasible, so we can disallow the = direction for the i level. 2345 // Setting j = j' doesn't help matters, so we end up with a direction vector 2346 // of [<>, *] 2347 // 2348 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2349 // we need to remember that the constant part is 5 and the RunningGCD should 2350 // be initialized to ExtraGCD = 30. 2351 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2352 2353 bool Improved = false; 2354 Coefficients = Src; 2355 while (const SCEVAddRecExpr *AddRec = 2356 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2357 Coefficients = AddRec->getStart(); 2358 const Loop *CurLoop = AddRec->getLoop(); 2359 RunningGCD = ExtraGCD; 2360 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2361 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2362 const SCEV *Inner = Src; 2363 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2364 AddRec = cast<SCEVAddRecExpr>(Inner); 2365 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2366 if (CurLoop == AddRec->getLoop()) 2367 ; // SrcCoeff == Coeff 2368 else { 2369 // If the coefficient is the product of a constant and other stuff, 2370 // we can use the constant in the GCD computation. 2371 Constant = getConstantPart(Coeff); 2372 if (!Constant) 2373 return false; 2374 APInt ConstCoeff = Constant->getAPInt(); 2375 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2376 } 2377 Inner = AddRec->getStart(); 2378 } 2379 Inner = Dst; 2380 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2381 AddRec = cast<SCEVAddRecExpr>(Inner); 2382 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2383 if (CurLoop == AddRec->getLoop()) 2384 DstCoeff = Coeff; 2385 else { 2386 // If the coefficient is the product of a constant and other stuff, 2387 // we can use the constant in the GCD computation. 2388 Constant = getConstantPart(Coeff); 2389 if (!Constant) 2390 return false; 2391 APInt ConstCoeff = Constant->getAPInt(); 2392 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2393 } 2394 Inner = AddRec->getStart(); 2395 } 2396 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2397 // If the coefficient is the product of a constant and other stuff, 2398 // we can use the constant in the GCD computation. 2399 Constant = getConstantPart(Delta); 2400 if (!Constant) 2401 // The difference of the two coefficients might not be a product 2402 // or constant, in which case we give up on this direction. 2403 continue; 2404 APInt ConstCoeff = Constant->getAPInt(); 2405 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2406 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2407 if (RunningGCD != 0) { 2408 Remainder = ConstDelta.srem(RunningGCD); 2409 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2410 if (Remainder != 0) { 2411 unsigned Level = mapSrcLoop(CurLoop); 2412 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2413 Improved = true; 2414 } 2415 } 2416 } 2417 if (Improved) 2418 ++GCDsuccesses; 2419 DEBUG(dbgs() << "all done\n"); 2420 return false; 2421 } 2422 2423 2424 //===----------------------------------------------------------------------===// 2425 // banerjeeMIVtest - 2426 // Use Banerjee's Inequalities to test an MIV subscript pair. 2427 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2428 // Generally follows the discussion in Section 2.5.2 of 2429 // 2430 // Optimizing Supercompilers for Supercomputers 2431 // Michael Wolfe 2432 // 2433 // The inequalities given on page 25 are simplified in that loops are 2434 // normalized so that the lower bound is always 0 and the stride is always 1. 2435 // For example, Wolfe gives 2436 // 2437 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2438 // 2439 // where A_k is the coefficient of the kth index in the source subscript, 2440 // B_k is the coefficient of the kth index in the destination subscript, 2441 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2442 // index, and N_k is the stride of the kth index. Since all loops are normalized 2443 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2444 // equation to 2445 // 2446 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2447 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2448 // 2449 // Similar simplifications are possible for the other equations. 2450 // 2451 // When we can't determine the number of iterations for a loop, 2452 // we use NULL as an indicator for the worst case, infinity. 2453 // When computing the upper bound, NULL denotes +inf; 2454 // for the lower bound, NULL denotes -inf. 2455 // 2456 // Return true if dependence disproved. 2457 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2458 const SmallBitVector &Loops, 2459 FullDependence &Result) const { 2460 DEBUG(dbgs() << "starting Banerjee\n"); 2461 ++BanerjeeApplications; 2462 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2463 const SCEV *A0; 2464 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2465 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2466 const SCEV *B0; 2467 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2468 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2469 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2470 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2471 2472 // Compute bounds for all the * directions. 2473 DEBUG(dbgs() << "\tBounds[*]\n"); 2474 for (unsigned K = 1; K <= MaxLevels; ++K) { 2475 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2476 Bound[K].Direction = Dependence::DVEntry::ALL; 2477 Bound[K].DirSet = Dependence::DVEntry::NONE; 2478 findBoundsALL(A, B, Bound, K); 2479 #ifndef NDEBUG 2480 DEBUG(dbgs() << "\t " << K << '\t'); 2481 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2482 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2483 else 2484 DEBUG(dbgs() << "-inf\t"); 2485 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2486 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2487 else 2488 DEBUG(dbgs() << "+inf\n"); 2489 #endif 2490 } 2491 2492 // Test the *, *, *, ... case. 2493 bool Disproved = false; 2494 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2495 // Explore the direction vector hierarchy. 2496 unsigned DepthExpanded = 0; 2497 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2498 Loops, DepthExpanded, Delta); 2499 if (NewDeps > 0) { 2500 bool Improved = false; 2501 for (unsigned K = 1; K <= CommonLevels; ++K) { 2502 if (Loops[K]) { 2503 unsigned Old = Result.DV[K - 1].Direction; 2504 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2505 Improved |= Old != Result.DV[K - 1].Direction; 2506 if (!Result.DV[K - 1].Direction) { 2507 Improved = false; 2508 Disproved = true; 2509 break; 2510 } 2511 } 2512 } 2513 if (Improved) 2514 ++BanerjeeSuccesses; 2515 } 2516 else { 2517 ++BanerjeeIndependence; 2518 Disproved = true; 2519 } 2520 } 2521 else { 2522 ++BanerjeeIndependence; 2523 Disproved = true; 2524 } 2525 delete [] Bound; 2526 delete [] A; 2527 delete [] B; 2528 return Disproved; 2529 } 2530 2531 2532 // Hierarchically expands the direction vector 2533 // search space, combining the directions of discovered dependences 2534 // in the DirSet field of Bound. Returns the number of distinct 2535 // dependences discovered. If the dependence is disproved, 2536 // it will return 0. 2537 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2538 CoefficientInfo *B, BoundInfo *Bound, 2539 const SmallBitVector &Loops, 2540 unsigned &DepthExpanded, 2541 const SCEV *Delta) const { 2542 if (Level > CommonLevels) { 2543 // record result 2544 DEBUG(dbgs() << "\t["); 2545 for (unsigned K = 1; K <= CommonLevels; ++K) { 2546 if (Loops[K]) { 2547 Bound[K].DirSet |= Bound[K].Direction; 2548 #ifndef NDEBUG 2549 switch (Bound[K].Direction) { 2550 case Dependence::DVEntry::LT: 2551 DEBUG(dbgs() << " <"); 2552 break; 2553 case Dependence::DVEntry::EQ: 2554 DEBUG(dbgs() << " ="); 2555 break; 2556 case Dependence::DVEntry::GT: 2557 DEBUG(dbgs() << " >"); 2558 break; 2559 case Dependence::DVEntry::ALL: 2560 DEBUG(dbgs() << " *"); 2561 break; 2562 default: 2563 llvm_unreachable("unexpected Bound[K].Direction"); 2564 } 2565 #endif 2566 } 2567 } 2568 DEBUG(dbgs() << " ]\n"); 2569 return 1; 2570 } 2571 if (Loops[Level]) { 2572 if (Level > DepthExpanded) { 2573 DepthExpanded = Level; 2574 // compute bounds for <, =, > at current level 2575 findBoundsLT(A, B, Bound, Level); 2576 findBoundsGT(A, B, Bound, Level); 2577 findBoundsEQ(A, B, Bound, Level); 2578 #ifndef NDEBUG 2579 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2580 DEBUG(dbgs() << "\t <\t"); 2581 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2582 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2583 else 2584 DEBUG(dbgs() << "-inf\t"); 2585 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2586 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2587 else 2588 DEBUG(dbgs() << "+inf\n"); 2589 DEBUG(dbgs() << "\t =\t"); 2590 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2591 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2592 else 2593 DEBUG(dbgs() << "-inf\t"); 2594 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2595 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2596 else 2597 DEBUG(dbgs() << "+inf\n"); 2598 DEBUG(dbgs() << "\t >\t"); 2599 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2600 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2601 else 2602 DEBUG(dbgs() << "-inf\t"); 2603 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2604 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2605 else 2606 DEBUG(dbgs() << "+inf\n"); 2607 #endif 2608 } 2609 2610 unsigned NewDeps = 0; 2611 2612 // test bounds for <, *, *, ... 2613 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2614 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2615 Loops, DepthExpanded, Delta); 2616 2617 // Test bounds for =, *, *, ... 2618 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2619 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2620 Loops, DepthExpanded, Delta); 2621 2622 // test bounds for >, *, *, ... 2623 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2624 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2625 Loops, DepthExpanded, Delta); 2626 2627 Bound[Level].Direction = Dependence::DVEntry::ALL; 2628 return NewDeps; 2629 } 2630 else 2631 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2632 } 2633 2634 2635 // Returns true iff the current bounds are plausible. 2636 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2637 BoundInfo *Bound, const SCEV *Delta) const { 2638 Bound[Level].Direction = DirKind; 2639 if (const SCEV *LowerBound = getLowerBound(Bound)) 2640 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2641 return false; 2642 if (const SCEV *UpperBound = getUpperBound(Bound)) 2643 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2644 return false; 2645 return true; 2646 } 2647 2648 2649 // Computes the upper and lower bounds for level K 2650 // using the * direction. Records them in Bound. 2651 // Wolfe gives the equations 2652 // 2653 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2654 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2655 // 2656 // Since we normalize loops, we can simplify these equations to 2657 // 2658 // LB^*_k = (A^-_k - B^+_k)U_k 2659 // UB^*_k = (A^+_k - B^-_k)U_k 2660 // 2661 // We must be careful to handle the case where the upper bound is unknown. 2662 // Note that the lower bound is always <= 0 2663 // and the upper bound is always >= 0. 2664 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2665 BoundInfo *Bound, unsigned K) const { 2666 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2667 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2668 if (Bound[K].Iterations) { 2669 Bound[K].Lower[Dependence::DVEntry::ALL] = 2670 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2671 Bound[K].Iterations); 2672 Bound[K].Upper[Dependence::DVEntry::ALL] = 2673 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2674 Bound[K].Iterations); 2675 } 2676 else { 2677 // If the difference is 0, we won't need to know the number of iterations. 2678 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2679 Bound[K].Lower[Dependence::DVEntry::ALL] = 2680 SE->getZero(A[K].Coeff->getType()); 2681 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2682 Bound[K].Upper[Dependence::DVEntry::ALL] = 2683 SE->getZero(A[K].Coeff->getType()); 2684 } 2685 } 2686 2687 2688 // Computes the upper and lower bounds for level K 2689 // using the = direction. Records them in Bound. 2690 // Wolfe gives the equations 2691 // 2692 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2693 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2694 // 2695 // Since we normalize loops, we can simplify these equations to 2696 // 2697 // LB^=_k = (A_k - B_k)^- U_k 2698 // UB^=_k = (A_k - B_k)^+ U_k 2699 // 2700 // We must be careful to handle the case where the upper bound is unknown. 2701 // Note that the lower bound is always <= 0 2702 // and the upper bound is always >= 0. 2703 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2704 BoundInfo *Bound, unsigned K) const { 2705 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2706 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2707 if (Bound[K].Iterations) { 2708 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2709 const SCEV *NegativePart = getNegativePart(Delta); 2710 Bound[K].Lower[Dependence::DVEntry::EQ] = 2711 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2712 const SCEV *PositivePart = getPositivePart(Delta); 2713 Bound[K].Upper[Dependence::DVEntry::EQ] = 2714 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2715 } 2716 else { 2717 // If the positive/negative part of the difference is 0, 2718 // we won't need to know the number of iterations. 2719 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2720 const SCEV *NegativePart = getNegativePart(Delta); 2721 if (NegativePart->isZero()) 2722 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2723 const SCEV *PositivePart = getPositivePart(Delta); 2724 if (PositivePart->isZero()) 2725 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2726 } 2727 } 2728 2729 2730 // Computes the upper and lower bounds for level K 2731 // using the < direction. Records them in Bound. 2732 // Wolfe gives the equations 2733 // 2734 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2735 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2736 // 2737 // Since we normalize loops, we can simplify these equations to 2738 // 2739 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2740 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2741 // 2742 // We must be careful to handle the case where the upper bound is unknown. 2743 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2744 BoundInfo *Bound, unsigned K) const { 2745 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2746 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2747 if (Bound[K].Iterations) { 2748 const SCEV *Iter_1 = SE->getMinusSCEV( 2749 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2750 const SCEV *NegPart = 2751 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2752 Bound[K].Lower[Dependence::DVEntry::LT] = 2753 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2754 const SCEV *PosPart = 2755 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2756 Bound[K].Upper[Dependence::DVEntry::LT] = 2757 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2758 } 2759 else { 2760 // If the positive/negative part of the difference is 0, 2761 // we won't need to know the number of iterations. 2762 const SCEV *NegPart = 2763 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2764 if (NegPart->isZero()) 2765 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2766 const SCEV *PosPart = 2767 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2768 if (PosPart->isZero()) 2769 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2770 } 2771 } 2772 2773 2774 // Computes the upper and lower bounds for level K 2775 // using the > direction. Records them in Bound. 2776 // Wolfe gives the equations 2777 // 2778 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2779 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2780 // 2781 // Since we normalize loops, we can simplify these equations to 2782 // 2783 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2784 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2785 // 2786 // We must be careful to handle the case where the upper bound is unknown. 2787 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2788 BoundInfo *Bound, unsigned K) const { 2789 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2790 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2791 if (Bound[K].Iterations) { 2792 const SCEV *Iter_1 = SE->getMinusSCEV( 2793 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2794 const SCEV *NegPart = 2795 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2796 Bound[K].Lower[Dependence::DVEntry::GT] = 2797 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2798 const SCEV *PosPart = 2799 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2800 Bound[K].Upper[Dependence::DVEntry::GT] = 2801 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2802 } 2803 else { 2804 // If the positive/negative part of the difference is 0, 2805 // we won't need to know the number of iterations. 2806 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2807 if (NegPart->isZero()) 2808 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2809 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2810 if (PosPart->isZero()) 2811 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2812 } 2813 } 2814 2815 2816 // X^+ = max(X, 0) 2817 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2818 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2819 } 2820 2821 2822 // X^- = min(X, 0) 2823 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2824 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2825 } 2826 2827 2828 // Walks through the subscript, 2829 // collecting each coefficient, the associated loop bounds, 2830 // and recording its positive and negative parts for later use. 2831 DependenceInfo::CoefficientInfo * 2832 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2833 const SCEV *&Constant) const { 2834 const SCEV *Zero = SE->getZero(Subscript->getType()); 2835 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2836 for (unsigned K = 1; K <= MaxLevels; ++K) { 2837 CI[K].Coeff = Zero; 2838 CI[K].PosPart = Zero; 2839 CI[K].NegPart = Zero; 2840 CI[K].Iterations = nullptr; 2841 } 2842 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2843 const Loop *L = AddRec->getLoop(); 2844 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2845 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2846 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2847 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2848 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2849 Subscript = AddRec->getStart(); 2850 } 2851 Constant = Subscript; 2852 #ifndef NDEBUG 2853 DEBUG(dbgs() << "\tCoefficient Info\n"); 2854 for (unsigned K = 1; K <= MaxLevels; ++K) { 2855 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2856 DEBUG(dbgs() << "\tPos Part = "); 2857 DEBUG(dbgs() << *CI[K].PosPart); 2858 DEBUG(dbgs() << "\tNeg Part = "); 2859 DEBUG(dbgs() << *CI[K].NegPart); 2860 DEBUG(dbgs() << "\tUpper Bound = "); 2861 if (CI[K].Iterations) 2862 DEBUG(dbgs() << *CI[K].Iterations); 2863 else 2864 DEBUG(dbgs() << "+inf"); 2865 DEBUG(dbgs() << '\n'); 2866 } 2867 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2868 #endif 2869 return CI; 2870 } 2871 2872 2873 // Looks through all the bounds info and 2874 // computes the lower bound given the current direction settings 2875 // at each level. If the lower bound for any level is -inf, 2876 // the result is -inf. 2877 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2878 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2879 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2880 if (Bound[K].Lower[Bound[K].Direction]) 2881 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2882 else 2883 Sum = nullptr; 2884 } 2885 return Sum; 2886 } 2887 2888 2889 // Looks through all the bounds info and 2890 // computes the upper bound given the current direction settings 2891 // at each level. If the upper bound at any level is +inf, 2892 // the result is +inf. 2893 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2894 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2895 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2896 if (Bound[K].Upper[Bound[K].Direction]) 2897 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2898 else 2899 Sum = nullptr; 2900 } 2901 return Sum; 2902 } 2903 2904 2905 //===----------------------------------------------------------------------===// 2906 // Constraint manipulation for Delta test. 2907 2908 // Given a linear SCEV, 2909 // return the coefficient (the step) 2910 // corresponding to the specified loop. 2911 // If there isn't one, return 0. 2912 // For example, given a*i + b*j + c*k, finding the coefficient 2913 // corresponding to the j loop would yield b. 2914 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2915 const Loop *TargetLoop) const { 2916 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2917 if (!AddRec) 2918 return SE->getZero(Expr->getType()); 2919 if (AddRec->getLoop() == TargetLoop) 2920 return AddRec->getStepRecurrence(*SE); 2921 return findCoefficient(AddRec->getStart(), TargetLoop); 2922 } 2923 2924 2925 // Given a linear SCEV, 2926 // return the SCEV given by zeroing out the coefficient 2927 // corresponding to the specified loop. 2928 // For example, given a*i + b*j + c*k, zeroing the coefficient 2929 // corresponding to the j loop would yield a*i + c*k. 2930 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 2931 const Loop *TargetLoop) const { 2932 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2933 if (!AddRec) 2934 return Expr; // ignore 2935 if (AddRec->getLoop() == TargetLoop) 2936 return AddRec->getStart(); 2937 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2938 AddRec->getStepRecurrence(*SE), 2939 AddRec->getLoop(), 2940 AddRec->getNoWrapFlags()); 2941 } 2942 2943 2944 // Given a linear SCEV Expr, 2945 // return the SCEV given by adding some Value to the 2946 // coefficient corresponding to the specified TargetLoop. 2947 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2948 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2949 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 2950 const Loop *TargetLoop, 2951 const SCEV *Value) const { 2952 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2953 if (!AddRec) // create a new addRec 2954 return SE->getAddRecExpr(Expr, 2955 Value, 2956 TargetLoop, 2957 SCEV::FlagAnyWrap); // Worst case, with no info. 2958 if (AddRec->getLoop() == TargetLoop) { 2959 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2960 if (Sum->isZero()) 2961 return AddRec->getStart(); 2962 return SE->getAddRecExpr(AddRec->getStart(), 2963 Sum, 2964 AddRec->getLoop(), 2965 AddRec->getNoWrapFlags()); 2966 } 2967 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2968 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2969 return SE->getAddRecExpr( 2970 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2971 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2972 AddRec->getNoWrapFlags()); 2973 } 2974 2975 2976 // Review the constraints, looking for opportunities 2977 // to simplify a subscript pair (Src and Dst). 2978 // Return true if some simplification occurs. 2979 // If the simplification isn't exact (that is, if it is conservative 2980 // in terms of dependence), set consistent to false. 2981 // Corresponds to Figure 5 from the paper 2982 // 2983 // Practical Dependence Testing 2984 // Goff, Kennedy, Tseng 2985 // PLDI 1991 2986 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 2987 SmallBitVector &Loops, 2988 SmallVectorImpl<Constraint> &Constraints, 2989 bool &Consistent) { 2990 bool Result = false; 2991 for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { 2992 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2993 DEBUG(Constraints[LI].dump(dbgs())); 2994 if (Constraints[LI].isDistance()) 2995 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2996 else if (Constraints[LI].isLine()) 2997 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2998 else if (Constraints[LI].isPoint()) 2999 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3000 } 3001 return Result; 3002 } 3003 3004 3005 // Attempt to propagate a distance 3006 // constraint into a subscript pair (Src and Dst). 3007 // Return true if some simplification occurs. 3008 // If the simplification isn't exact (that is, if it is conservative 3009 // in terms of dependence), set consistent to false. 3010 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3011 Constraint &CurConstraint, 3012 bool &Consistent) { 3013 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3014 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3015 const SCEV *A_K = findCoefficient(Src, CurLoop); 3016 if (A_K->isZero()) 3017 return false; 3018 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3019 Src = SE->getMinusSCEV(Src, DA_K); 3020 Src = zeroCoefficient(Src, CurLoop); 3021 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3022 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3023 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3024 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3025 if (!findCoefficient(Dst, CurLoop)->isZero()) 3026 Consistent = false; 3027 return true; 3028 } 3029 3030 3031 // Attempt to propagate a line 3032 // constraint into a subscript pair (Src and Dst). 3033 // Return true if some simplification occurs. 3034 // If the simplification isn't exact (that is, if it is conservative 3035 // in terms of dependence), set consistent to false. 3036 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3037 Constraint &CurConstraint, 3038 bool &Consistent) { 3039 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3040 const SCEV *A = CurConstraint.getA(); 3041 const SCEV *B = CurConstraint.getB(); 3042 const SCEV *C = CurConstraint.getC(); 3043 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3044 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3045 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3046 if (A->isZero()) { 3047 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3048 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3049 if (!Bconst || !Cconst) return false; 3050 APInt Beta = Bconst->getAPInt(); 3051 APInt Charlie = Cconst->getAPInt(); 3052 APInt CdivB = Charlie.sdiv(Beta); 3053 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3054 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3055 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3056 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3057 Dst = zeroCoefficient(Dst, CurLoop); 3058 if (!findCoefficient(Src, CurLoop)->isZero()) 3059 Consistent = false; 3060 } 3061 else if (B->isZero()) { 3062 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3063 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3064 if (!Aconst || !Cconst) return false; 3065 APInt Alpha = Aconst->getAPInt(); 3066 APInt Charlie = Cconst->getAPInt(); 3067 APInt CdivA = Charlie.sdiv(Alpha); 3068 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3069 const SCEV *A_K = findCoefficient(Src, CurLoop); 3070 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3071 Src = zeroCoefficient(Src, CurLoop); 3072 if (!findCoefficient(Dst, CurLoop)->isZero()) 3073 Consistent = false; 3074 } 3075 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3076 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3077 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3078 if (!Aconst || !Cconst) return false; 3079 APInt Alpha = Aconst->getAPInt(); 3080 APInt Charlie = Cconst->getAPInt(); 3081 APInt CdivA = Charlie.sdiv(Alpha); 3082 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3083 const SCEV *A_K = findCoefficient(Src, CurLoop); 3084 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3085 Src = zeroCoefficient(Src, CurLoop); 3086 Dst = addToCoefficient(Dst, CurLoop, A_K); 3087 if (!findCoefficient(Dst, CurLoop)->isZero()) 3088 Consistent = false; 3089 } 3090 else { 3091 // paper is incorrect here, or perhaps just misleading 3092 const SCEV *A_K = findCoefficient(Src, CurLoop); 3093 Src = SE->getMulExpr(Src, A); 3094 Dst = SE->getMulExpr(Dst, A); 3095 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3096 Src = zeroCoefficient(Src, CurLoop); 3097 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3098 if (!findCoefficient(Dst, CurLoop)->isZero()) 3099 Consistent = false; 3100 } 3101 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3102 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3103 return true; 3104 } 3105 3106 3107 // Attempt to propagate a point 3108 // constraint into a subscript pair (Src and Dst). 3109 // Return true if some simplification occurs. 3110 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3111 Constraint &CurConstraint) { 3112 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3113 const SCEV *A_K = findCoefficient(Src, CurLoop); 3114 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3115 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3116 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3117 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3118 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3119 Src = zeroCoefficient(Src, CurLoop); 3120 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3121 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3122 Dst = zeroCoefficient(Dst, CurLoop); 3123 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3124 return true; 3125 } 3126 3127 3128 // Update direction vector entry based on the current constraint. 3129 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3130 const Constraint &CurConstraint) const { 3131 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3132 DEBUG(CurConstraint.dump(dbgs())); 3133 if (CurConstraint.isAny()) 3134 ; // use defaults 3135 else if (CurConstraint.isDistance()) { 3136 // this one is consistent, the others aren't 3137 Level.Scalar = false; 3138 Level.Distance = CurConstraint.getD(); 3139 unsigned NewDirection = Dependence::DVEntry::NONE; 3140 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3141 NewDirection = Dependence::DVEntry::EQ; 3142 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3143 NewDirection |= Dependence::DVEntry::LT; 3144 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3145 NewDirection |= Dependence::DVEntry::GT; 3146 Level.Direction &= NewDirection; 3147 } 3148 else if (CurConstraint.isLine()) { 3149 Level.Scalar = false; 3150 Level.Distance = nullptr; 3151 // direction should be accurate 3152 } 3153 else if (CurConstraint.isPoint()) { 3154 Level.Scalar = false; 3155 Level.Distance = nullptr; 3156 unsigned NewDirection = Dependence::DVEntry::NONE; 3157 if (!isKnownPredicate(CmpInst::ICMP_NE, 3158 CurConstraint.getY(), 3159 CurConstraint.getX())) 3160 // if X may be = Y 3161 NewDirection |= Dependence::DVEntry::EQ; 3162 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3163 CurConstraint.getY(), 3164 CurConstraint.getX())) 3165 // if Y may be > X 3166 NewDirection |= Dependence::DVEntry::LT; 3167 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3168 CurConstraint.getY(), 3169 CurConstraint.getX())) 3170 // if Y may be < X 3171 NewDirection |= Dependence::DVEntry::GT; 3172 Level.Direction &= NewDirection; 3173 } 3174 else 3175 llvm_unreachable("constraint has unexpected kind"); 3176 } 3177 3178 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3179 /// source and destination array references are recurrences on a nested loop, 3180 /// this function flattens the nested recurrences into separate recurrences 3181 /// for each loop level. 3182 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3183 SmallVectorImpl<Subscript> &Pair) { 3184 Value *SrcPtr = getPointerOperand(Src); 3185 Value *DstPtr = getPointerOperand(Dst); 3186 3187 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3188 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3189 3190 // Below code mimics the code in Delinearization.cpp 3191 const SCEV *SrcAccessFn = 3192 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3193 const SCEV *DstAccessFn = 3194 SE->getSCEVAtScope(DstPtr, DstLoop); 3195 3196 const SCEVUnknown *SrcBase = 3197 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3198 const SCEVUnknown *DstBase = 3199 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3200 3201 if (!SrcBase || !DstBase || SrcBase != DstBase) 3202 return false; 3203 3204 const SCEV *ElementSize = SE->getElementSize(Src); 3205 if (ElementSize != SE->getElementSize(Dst)) 3206 return false; 3207 3208 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3209 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3210 3211 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3212 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3213 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3214 return false; 3215 3216 // First step: collect parametric terms in both array references. 3217 SmallVector<const SCEV *, 4> Terms; 3218 SE->collectParametricTerms(SrcAR, Terms); 3219 SE->collectParametricTerms(DstAR, Terms); 3220 3221 // Second step: find subscript sizes. 3222 SmallVector<const SCEV *, 4> Sizes; 3223 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3224 3225 // Third step: compute the access functions for each subscript. 3226 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3227 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3228 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3229 3230 // Fail when there is only a subscript: that's a linearized access function. 3231 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3232 SrcSubscripts.size() != DstSubscripts.size()) 3233 return false; 3234 3235 int size = SrcSubscripts.size(); 3236 3237 DEBUG({ 3238 dbgs() << "\nSrcSubscripts: "; 3239 for (int i = 0; i < size; i++) 3240 dbgs() << *SrcSubscripts[i]; 3241 dbgs() << "\nDstSubscripts: "; 3242 for (int i = 0; i < size; i++) 3243 dbgs() << *DstSubscripts[i]; 3244 }); 3245 3246 // The delinearization transforms a single-subscript MIV dependence test into 3247 // a multi-subscript SIV dependence test that is easier to compute. So we 3248 // resize Pair to contain as many pairs of subscripts as the delinearization 3249 // has found, and then initialize the pairs following the delinearization. 3250 Pair.resize(size); 3251 for (int i = 0; i < size; ++i) { 3252 Pair[i].Src = SrcSubscripts[i]; 3253 Pair[i].Dst = DstSubscripts[i]; 3254 unifySubscriptType(&Pair[i]); 3255 3256 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3257 // delinearization has found, and add these constraints to the dependence 3258 // check to avoid memory accesses overflow from one dimension into another. 3259 // This is related to the problem of determining the existence of data 3260 // dependences in array accesses using a different number of subscripts: in 3261 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3262 } 3263 3264 return true; 3265 } 3266 3267 //===----------------------------------------------------------------------===// 3268 3269 #ifndef NDEBUG 3270 // For debugging purposes, dump a small bit vector to dbgs(). 3271 static void dumpSmallBitVector(SmallBitVector &BV) { 3272 dbgs() << "{"; 3273 for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { 3274 dbgs() << VI; 3275 if (BV.find_next(VI) >= 0) 3276 dbgs() << ' '; 3277 } 3278 dbgs() << "}\n"; 3279 } 3280 #endif 3281 3282 // depends - 3283 // Returns NULL if there is no dependence. 3284 // Otherwise, return a Dependence with as many details as possible. 3285 // Corresponds to Section 3.1 in the paper 3286 // 3287 // Practical Dependence Testing 3288 // Goff, Kennedy, Tseng 3289 // PLDI 1991 3290 // 3291 // Care is required to keep the routine below, getSplitIteration(), 3292 // up to date with respect to this routine. 3293 std::unique_ptr<Dependence> 3294 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3295 bool PossiblyLoopIndependent) { 3296 if (Src == Dst) 3297 PossiblyLoopIndependent = false; 3298 3299 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3300 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3301 // if both instructions don't reference memory, there's no dependence 3302 return nullptr; 3303 3304 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3305 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3306 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3307 return make_unique<Dependence>(Src, Dst); 3308 } 3309 3310 Value *SrcPtr = getPointerOperand(Src); 3311 Value *DstPtr = getPointerOperand(Dst); 3312 3313 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3314 SrcPtr)) { 3315 case MayAlias: 3316 case PartialAlias: 3317 // cannot analyse objects if we don't understand their aliasing. 3318 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3319 return make_unique<Dependence>(Src, Dst); 3320 case NoAlias: 3321 // If the objects noalias, they are distinct, accesses are independent. 3322 DEBUG(dbgs() << "no alias\n"); 3323 return nullptr; 3324 case MustAlias: 3325 break; // The underlying objects alias; test accesses for dependence. 3326 } 3327 3328 // establish loop nesting levels 3329 establishNestingLevels(Src, Dst); 3330 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3331 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3332 3333 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3334 ++TotalArrayPairs; 3335 3336 // See if there are GEPs we can use. 3337 bool UsefulGEP = false; 3338 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3339 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3340 if (SrcGEP && DstGEP && 3341 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3342 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3343 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3344 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3345 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3346 3347 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3348 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3349 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3350 } 3351 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3352 SmallVector<Subscript, 4> Pair(Pairs); 3353 if (UsefulGEP) { 3354 DEBUG(dbgs() << " using GEPs\n"); 3355 unsigned P = 0; 3356 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3357 SrcEnd = SrcGEP->idx_end(), 3358 DstIdx = DstGEP->idx_begin(); 3359 SrcIdx != SrcEnd; 3360 ++SrcIdx, ++DstIdx, ++P) { 3361 Pair[P].Src = SE->getSCEV(*SrcIdx); 3362 Pair[P].Dst = SE->getSCEV(*DstIdx); 3363 unifySubscriptType(&Pair[P]); 3364 } 3365 } 3366 else { 3367 DEBUG(dbgs() << " ignoring GEPs\n"); 3368 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3369 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3370 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3371 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3372 Pair[0].Src = SrcSCEV; 3373 Pair[0].Dst = DstSCEV; 3374 } 3375 3376 if (Delinearize && CommonLevels > 1) { 3377 if (tryDelinearize(Src, Dst, Pair)) { 3378 DEBUG(dbgs() << " delinerized GEP\n"); 3379 Pairs = Pair.size(); 3380 } 3381 } 3382 3383 for (unsigned P = 0; P < Pairs; ++P) { 3384 Pair[P].Loops.resize(MaxLevels + 1); 3385 Pair[P].GroupLoops.resize(MaxLevels + 1); 3386 Pair[P].Group.resize(Pairs); 3387 removeMatchingExtensions(&Pair[P]); 3388 Pair[P].Classification = 3389 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3390 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3391 Pair[P].Loops); 3392 Pair[P].GroupLoops = Pair[P].Loops; 3393 Pair[P].Group.set(P); 3394 DEBUG(dbgs() << " subscript " << P << "\n"); 3395 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3396 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3397 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3398 DEBUG(dbgs() << "\tloops = "); 3399 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3400 } 3401 3402 SmallBitVector Separable(Pairs); 3403 SmallBitVector Coupled(Pairs); 3404 3405 // Partition subscripts into separable and minimally-coupled groups 3406 // Algorithm in paper is algorithmically better; 3407 // this may be faster in practice. Check someday. 3408 // 3409 // Here's an example of how it works. Consider this code: 3410 // 3411 // for (i = ...) { 3412 // for (j = ...) { 3413 // for (k = ...) { 3414 // for (l = ...) { 3415 // for (m = ...) { 3416 // A[i][j][k][m] = ...; 3417 // ... = A[0][j][l][i + j]; 3418 // } 3419 // } 3420 // } 3421 // } 3422 // } 3423 // 3424 // There are 4 subscripts here: 3425 // 0 [i] and [0] 3426 // 1 [j] and [j] 3427 // 2 [k] and [l] 3428 // 3 [m] and [i + j] 3429 // 3430 // We've already classified each subscript pair as ZIV, SIV, etc., 3431 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3432 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3433 // and set Pair[P].Group = {P}. 3434 // 3435 // Src Dst Classification Loops GroupLoops Group 3436 // 0 [i] [0] SIV {1} {1} {0} 3437 // 1 [j] [j] SIV {2} {2} {1} 3438 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3439 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3440 // 3441 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3442 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3443 // 3444 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3445 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3446 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3447 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3448 // to either Separable or Coupled). 3449 // 3450 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3451 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3452 // so Pair[3].Group = {0, 1, 3} and Done = false. 3453 // 3454 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3455 // Since Done remains true, we add 2 to the set of Separable pairs. 3456 // 3457 // Finally, we consider 3. There's nothing to compare it with, 3458 // so Done remains true and we add it to the Coupled set. 3459 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3460 // 3461 // In the end, we've got 1 separable subscript and 1 coupled group. 3462 for (unsigned SI = 0; SI < Pairs; ++SI) { 3463 if (Pair[SI].Classification == Subscript::NonLinear) { 3464 // ignore these, but collect loops for later 3465 ++NonlinearSubscriptPairs; 3466 collectCommonLoops(Pair[SI].Src, 3467 LI->getLoopFor(Src->getParent()), 3468 Pair[SI].Loops); 3469 collectCommonLoops(Pair[SI].Dst, 3470 LI->getLoopFor(Dst->getParent()), 3471 Pair[SI].Loops); 3472 Result.Consistent = false; 3473 } else if (Pair[SI].Classification == Subscript::ZIV) { 3474 // always separable 3475 Separable.set(SI); 3476 } 3477 else { 3478 // SIV, RDIV, or MIV, so check for coupled group 3479 bool Done = true; 3480 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3481 SmallBitVector Intersection = Pair[SI].GroupLoops; 3482 Intersection &= Pair[SJ].GroupLoops; 3483 if (Intersection.any()) { 3484 // accumulate set of all the loops in group 3485 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3486 // accumulate set of all subscripts in group 3487 Pair[SJ].Group |= Pair[SI].Group; 3488 Done = false; 3489 } 3490 } 3491 if (Done) { 3492 if (Pair[SI].Group.count() == 1) { 3493 Separable.set(SI); 3494 ++SeparableSubscriptPairs; 3495 } 3496 else { 3497 Coupled.set(SI); 3498 ++CoupledSubscriptPairs; 3499 } 3500 } 3501 } 3502 } 3503 3504 DEBUG(dbgs() << " Separable = "); 3505 DEBUG(dumpSmallBitVector(Separable)); 3506 DEBUG(dbgs() << " Coupled = "); 3507 DEBUG(dumpSmallBitVector(Coupled)); 3508 3509 Constraint NewConstraint; 3510 NewConstraint.setAny(SE); 3511 3512 // test separable subscripts 3513 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3514 DEBUG(dbgs() << "testing subscript " << SI); 3515 switch (Pair[SI].Classification) { 3516 case Subscript::ZIV: 3517 DEBUG(dbgs() << ", ZIV\n"); 3518 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3519 return nullptr; 3520 break; 3521 case Subscript::SIV: { 3522 DEBUG(dbgs() << ", SIV\n"); 3523 unsigned Level; 3524 const SCEV *SplitIter = nullptr; 3525 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3526 SplitIter)) 3527 return nullptr; 3528 break; 3529 } 3530 case Subscript::RDIV: 3531 DEBUG(dbgs() << ", RDIV\n"); 3532 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3533 return nullptr; 3534 break; 3535 case Subscript::MIV: 3536 DEBUG(dbgs() << ", MIV\n"); 3537 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3538 return nullptr; 3539 break; 3540 default: 3541 llvm_unreachable("subscript has unexpected classification"); 3542 } 3543 } 3544 3545 if (Coupled.count()) { 3546 // test coupled subscript groups 3547 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3548 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3549 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3550 for (unsigned II = 0; II <= MaxLevels; ++II) 3551 Constraints[II].setAny(SE); 3552 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3553 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3554 SmallBitVector Group(Pair[SI].Group); 3555 SmallBitVector Sivs(Pairs); 3556 SmallBitVector Mivs(Pairs); 3557 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3558 SmallVector<Subscript *, 4> PairsInGroup; 3559 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3560 DEBUG(dbgs() << SJ << " "); 3561 if (Pair[SJ].Classification == Subscript::SIV) 3562 Sivs.set(SJ); 3563 else 3564 Mivs.set(SJ); 3565 PairsInGroup.push_back(&Pair[SJ]); 3566 } 3567 unifySubscriptType(PairsInGroup); 3568 DEBUG(dbgs() << "}\n"); 3569 while (Sivs.any()) { 3570 bool Changed = false; 3571 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3572 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3573 // SJ is an SIV subscript that's part of the current coupled group 3574 unsigned Level; 3575 const SCEV *SplitIter = nullptr; 3576 DEBUG(dbgs() << "SIV\n"); 3577 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3578 SplitIter)) 3579 return nullptr; 3580 ConstrainedLevels.set(Level); 3581 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3582 if (Constraints[Level].isEmpty()) { 3583 ++DeltaIndependence; 3584 return nullptr; 3585 } 3586 Changed = true; 3587 } 3588 Sivs.reset(SJ); 3589 } 3590 if (Changed) { 3591 // propagate, possibly creating new SIVs and ZIVs 3592 DEBUG(dbgs() << " propagating\n"); 3593 DEBUG(dbgs() << "\tMivs = "); 3594 DEBUG(dumpSmallBitVector(Mivs)); 3595 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3596 // SJ is an MIV subscript that's part of the current coupled group 3597 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3598 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3599 Constraints, Result.Consistent)) { 3600 DEBUG(dbgs() << "\t Changed\n"); 3601 ++DeltaPropagations; 3602 Pair[SJ].Classification = 3603 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3604 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3605 Pair[SJ].Loops); 3606 switch (Pair[SJ].Classification) { 3607 case Subscript::ZIV: 3608 DEBUG(dbgs() << "ZIV\n"); 3609 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3610 return nullptr; 3611 Mivs.reset(SJ); 3612 break; 3613 case Subscript::SIV: 3614 Sivs.set(SJ); 3615 Mivs.reset(SJ); 3616 break; 3617 case Subscript::RDIV: 3618 case Subscript::MIV: 3619 break; 3620 default: 3621 llvm_unreachable("bad subscript classification"); 3622 } 3623 } 3624 } 3625 } 3626 } 3627 3628 // test & propagate remaining RDIVs 3629 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3630 if (Pair[SJ].Classification == Subscript::RDIV) { 3631 DEBUG(dbgs() << "RDIV test\n"); 3632 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3633 return nullptr; 3634 // I don't yet understand how to propagate RDIV results 3635 Mivs.reset(SJ); 3636 } 3637 } 3638 3639 // test remaining MIVs 3640 // This code is temporary. 3641 // Better to somehow test all remaining subscripts simultaneously. 3642 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3643 if (Pair[SJ].Classification == Subscript::MIV) { 3644 DEBUG(dbgs() << "MIV test\n"); 3645 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3646 return nullptr; 3647 } 3648 else 3649 llvm_unreachable("expected only MIV subscripts at this point"); 3650 } 3651 3652 // update Result.DV from constraint vector 3653 DEBUG(dbgs() << " updating\n"); 3654 for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; 3655 SJ = ConstrainedLevels.find_next(SJ)) { 3656 if (SJ > (int)CommonLevels) 3657 break; 3658 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3659 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3660 return nullptr; 3661 } 3662 } 3663 } 3664 3665 // Make sure the Scalar flags are set correctly. 3666 SmallBitVector CompleteLoops(MaxLevels + 1); 3667 for (unsigned SI = 0; SI < Pairs; ++SI) 3668 CompleteLoops |= Pair[SI].Loops; 3669 for (unsigned II = 1; II <= CommonLevels; ++II) 3670 if (CompleteLoops[II]) 3671 Result.DV[II - 1].Scalar = false; 3672 3673 if (PossiblyLoopIndependent) { 3674 // Make sure the LoopIndependent flag is set correctly. 3675 // All directions must include equal, otherwise no 3676 // loop-independent dependence is possible. 3677 for (unsigned II = 1; II <= CommonLevels; ++II) { 3678 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3679 Result.LoopIndependent = false; 3680 break; 3681 } 3682 } 3683 } 3684 else { 3685 // On the other hand, if all directions are equal and there's no 3686 // loop-independent dependence possible, then no dependence exists. 3687 bool AllEqual = true; 3688 for (unsigned II = 1; II <= CommonLevels; ++II) { 3689 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3690 AllEqual = false; 3691 break; 3692 } 3693 } 3694 if (AllEqual) 3695 return nullptr; 3696 } 3697 3698 return make_unique<FullDependence>(std::move(Result)); 3699 } 3700 3701 3702 3703 //===----------------------------------------------------------------------===// 3704 // getSplitIteration - 3705 // Rather than spend rarely-used space recording the splitting iteration 3706 // during the Weak-Crossing SIV test, we re-compute it on demand. 3707 // The re-computation is basically a repeat of the entire dependence test, 3708 // though simplified since we know that the dependence exists. 3709 // It's tedious, since we must go through all propagations, etc. 3710 // 3711 // Care is required to keep this code up to date with respect to the routine 3712 // above, depends(). 3713 // 3714 // Generally, the dependence analyzer will be used to build 3715 // a dependence graph for a function (basically a map from instructions 3716 // to dependences). Looking for cycles in the graph shows us loops 3717 // that cannot be trivially vectorized/parallelized. 3718 // 3719 // We can try to improve the situation by examining all the dependences 3720 // that make up the cycle, looking for ones we can break. 3721 // Sometimes, peeling the first or last iteration of a loop will break 3722 // dependences, and we've got flags for those possibilities. 3723 // Sometimes, splitting a loop at some other iteration will do the trick, 3724 // and we've got a flag for that case. Rather than waste the space to 3725 // record the exact iteration (since we rarely know), we provide 3726 // a method that calculates the iteration. It's a drag that it must work 3727 // from scratch, but wonderful in that it's possible. 3728 // 3729 // Here's an example: 3730 // 3731 // for (i = 0; i < 10; i++) 3732 // A[i] = ... 3733 // ... = A[11 - i] 3734 // 3735 // There's a loop-carried flow dependence from the store to the load, 3736 // found by the weak-crossing SIV test. The dependence will have a flag, 3737 // indicating that the dependence can be broken by splitting the loop. 3738 // Calling getSplitIteration will return 5. 3739 // Splitting the loop breaks the dependence, like so: 3740 // 3741 // for (i = 0; i <= 5; i++) 3742 // A[i] = ... 3743 // ... = A[11 - i] 3744 // for (i = 6; i < 10; i++) 3745 // A[i] = ... 3746 // ... = A[11 - i] 3747 // 3748 // breaks the dependence and allows us to vectorize/parallelize 3749 // both loops. 3750 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3751 unsigned SplitLevel) { 3752 assert(Dep.isSplitable(SplitLevel) && 3753 "Dep should be splitable at SplitLevel"); 3754 Instruction *Src = Dep.getSrc(); 3755 Instruction *Dst = Dep.getDst(); 3756 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3757 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3758 assert(isLoadOrStore(Src)); 3759 assert(isLoadOrStore(Dst)); 3760 Value *SrcPtr = getPointerOperand(Src); 3761 Value *DstPtr = getPointerOperand(Dst); 3762 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3763 SrcPtr) == MustAlias); 3764 3765 // establish loop nesting levels 3766 establishNestingLevels(Src, Dst); 3767 3768 FullDependence Result(Src, Dst, false, CommonLevels); 3769 3770 // See if there are GEPs we can use. 3771 bool UsefulGEP = false; 3772 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3773 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3774 if (SrcGEP && DstGEP && 3775 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3776 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3777 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3778 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3779 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3780 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3781 } 3782 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3783 SmallVector<Subscript, 4> Pair(Pairs); 3784 if (UsefulGEP) { 3785 unsigned P = 0; 3786 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3787 SrcEnd = SrcGEP->idx_end(), 3788 DstIdx = DstGEP->idx_begin(); 3789 SrcIdx != SrcEnd; 3790 ++SrcIdx, ++DstIdx, ++P) { 3791 Pair[P].Src = SE->getSCEV(*SrcIdx); 3792 Pair[P].Dst = SE->getSCEV(*DstIdx); 3793 } 3794 } 3795 else { 3796 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3797 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3798 Pair[0].Src = SrcSCEV; 3799 Pair[0].Dst = DstSCEV; 3800 } 3801 3802 if (Delinearize && CommonLevels > 1) { 3803 if (tryDelinearize(Src, Dst, Pair)) { 3804 DEBUG(dbgs() << " delinerized GEP\n"); 3805 Pairs = Pair.size(); 3806 } 3807 } 3808 3809 for (unsigned P = 0; P < Pairs; ++P) { 3810 Pair[P].Loops.resize(MaxLevels + 1); 3811 Pair[P].GroupLoops.resize(MaxLevels + 1); 3812 Pair[P].Group.resize(Pairs); 3813 removeMatchingExtensions(&Pair[P]); 3814 Pair[P].Classification = 3815 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3816 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3817 Pair[P].Loops); 3818 Pair[P].GroupLoops = Pair[P].Loops; 3819 Pair[P].Group.set(P); 3820 } 3821 3822 SmallBitVector Separable(Pairs); 3823 SmallBitVector Coupled(Pairs); 3824 3825 // partition subscripts into separable and minimally-coupled groups 3826 for (unsigned SI = 0; SI < Pairs; ++SI) { 3827 if (Pair[SI].Classification == Subscript::NonLinear) { 3828 // ignore these, but collect loops for later 3829 collectCommonLoops(Pair[SI].Src, 3830 LI->getLoopFor(Src->getParent()), 3831 Pair[SI].Loops); 3832 collectCommonLoops(Pair[SI].Dst, 3833 LI->getLoopFor(Dst->getParent()), 3834 Pair[SI].Loops); 3835 Result.Consistent = false; 3836 } 3837 else if (Pair[SI].Classification == Subscript::ZIV) 3838 Separable.set(SI); 3839 else { 3840 // SIV, RDIV, or MIV, so check for coupled group 3841 bool Done = true; 3842 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3843 SmallBitVector Intersection = Pair[SI].GroupLoops; 3844 Intersection &= Pair[SJ].GroupLoops; 3845 if (Intersection.any()) { 3846 // accumulate set of all the loops in group 3847 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3848 // accumulate set of all subscripts in group 3849 Pair[SJ].Group |= Pair[SI].Group; 3850 Done = false; 3851 } 3852 } 3853 if (Done) { 3854 if (Pair[SI].Group.count() == 1) 3855 Separable.set(SI); 3856 else 3857 Coupled.set(SI); 3858 } 3859 } 3860 } 3861 3862 Constraint NewConstraint; 3863 NewConstraint.setAny(SE); 3864 3865 // test separable subscripts 3866 for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { 3867 switch (Pair[SI].Classification) { 3868 case Subscript::SIV: { 3869 unsigned Level; 3870 const SCEV *SplitIter = nullptr; 3871 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3872 Result, NewConstraint, SplitIter); 3873 if (Level == SplitLevel) { 3874 assert(SplitIter != nullptr); 3875 return SplitIter; 3876 } 3877 break; 3878 } 3879 case Subscript::ZIV: 3880 case Subscript::RDIV: 3881 case Subscript::MIV: 3882 break; 3883 default: 3884 llvm_unreachable("subscript has unexpected classification"); 3885 } 3886 } 3887 3888 if (Coupled.count()) { 3889 // test coupled subscript groups 3890 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3891 for (unsigned II = 0; II <= MaxLevels; ++II) 3892 Constraints[II].setAny(SE); 3893 for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { 3894 SmallBitVector Group(Pair[SI].Group); 3895 SmallBitVector Sivs(Pairs); 3896 SmallBitVector Mivs(Pairs); 3897 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3898 for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { 3899 if (Pair[SJ].Classification == Subscript::SIV) 3900 Sivs.set(SJ); 3901 else 3902 Mivs.set(SJ); 3903 } 3904 while (Sivs.any()) { 3905 bool Changed = false; 3906 for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { 3907 // SJ is an SIV subscript that's part of the current coupled group 3908 unsigned Level; 3909 const SCEV *SplitIter = nullptr; 3910 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3911 Result, NewConstraint, SplitIter); 3912 if (Level == SplitLevel && SplitIter) 3913 return SplitIter; 3914 ConstrainedLevels.set(Level); 3915 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3916 Changed = true; 3917 Sivs.reset(SJ); 3918 } 3919 if (Changed) { 3920 // propagate, possibly creating new SIVs and ZIVs 3921 for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { 3922 // SJ is an MIV subscript that's part of the current coupled group 3923 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3924 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3925 Pair[SJ].Classification = 3926 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3927 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3928 Pair[SJ].Loops); 3929 switch (Pair[SJ].Classification) { 3930 case Subscript::ZIV: 3931 Mivs.reset(SJ); 3932 break; 3933 case Subscript::SIV: 3934 Sivs.set(SJ); 3935 Mivs.reset(SJ); 3936 break; 3937 case Subscript::RDIV: 3938 case Subscript::MIV: 3939 break; 3940 default: 3941 llvm_unreachable("bad subscript classification"); 3942 } 3943 } 3944 } 3945 } 3946 } 3947 } 3948 } 3949 llvm_unreachable("somehow reached end of routine"); 3950 return nullptr; 3951 } 3952