1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // Since Clang linearizes some array subscripts, the dependence 28 // analysis is using SCEV->delinearize to recover the representation of multiple 29 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 30 // delinearization is controlled by the flag -da-delinearize. 31 // 32 // We should pay some careful attention to the possibility of integer overflow 33 // in the implementation of the various tests. This could happen with Add, 34 // Subtract, or Multiply, with both APInt's and SCEV's. 35 // 36 // Some non-linear subscript pairs can be handled by the GCD test 37 // (and perhaps other tests). 38 // Should explore how often these things occur. 39 // 40 // Finally, it seems like certain test cases expose weaknesses in the SCEV 41 // simplification, especially in the handling of sign and zero extensions. 42 // It could be useful to spend time exploring these. 43 // 44 // Please note that this is work in progress and the interface is subject to 45 // change. 46 // 47 //===----------------------------------------------------------------------===// 48 // // 49 // In memory of Ken Kennedy, 1945 - 2007 // 50 // // 51 //===----------------------------------------------------------------------===// 52 53 #include "llvm/Analysis/DependenceAnalysis.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/Analysis/AliasAnalysis.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 113 //===----------------------------------------------------------------------===// 114 // basics 115 116 DependenceAnalysis::Result 117 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 118 auto &AA = FAM.getResult<AAManager>(F); 119 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 120 auto &LI = FAM.getResult<LoopAnalysis>(F); 121 return DependenceInfo(&F, &AA, &SE, &LI); 122 } 123 124 AnalysisKey DependenceAnalysis::Key; 125 126 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 127 "Dependence Analysis", true, true) 128 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 129 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 131 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 132 true, true) 133 134 char DependenceAnalysisWrapperPass::ID = 0; 135 136 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 137 return new DependenceAnalysisWrapperPass(); 138 } 139 140 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 141 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 142 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 143 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 144 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 145 return false; 146 } 147 148 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 149 150 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 151 152 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 153 AU.setPreservesAll(); 154 AU.addRequiredTransitive<AAResultsWrapperPass>(); 155 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 156 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 157 } 158 159 160 // Used to test the dependence analyzer. 161 // Looks through the function, noting loads and stores. 162 // Calls depends() on every possible pair and prints out the result. 163 // Ignores all other instructions. 164 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 165 auto *F = DA->getFunction(); 166 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 167 ++SrcI) { 168 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 169 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 170 DstI != DstE; ++DstI) { 171 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 172 OS << "da analyze - "; 173 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 174 D->dump(OS); 175 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 176 if (D->isSplitable(Level)) { 177 OS << "da analyze - split level = " << Level; 178 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 179 OS << "!\n"; 180 } 181 } 182 } 183 else 184 OS << "none!\n"; 185 } 186 } 187 } 188 } 189 } 190 191 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 192 const Module *) const { 193 dumpExampleDependence(OS, info.get()); 194 } 195 196 //===----------------------------------------------------------------------===// 197 // Dependence methods 198 199 // Returns true if this is an input dependence. 200 bool Dependence::isInput() const { 201 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 202 } 203 204 205 // Returns true if this is an output dependence. 206 bool Dependence::isOutput() const { 207 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 208 } 209 210 211 // Returns true if this is an flow (aka true) dependence. 212 bool Dependence::isFlow() const { 213 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 214 } 215 216 217 // Returns true if this is an anti dependence. 218 bool Dependence::isAnti() const { 219 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 220 } 221 222 223 // Returns true if a particular level is scalar; that is, 224 // if no subscript in the source or destination mention the induction 225 // variable associated with the loop at this level. 226 // Leave this out of line, so it will serve as a virtual method anchor 227 bool Dependence::isScalar(unsigned level) const { 228 return false; 229 } 230 231 232 //===----------------------------------------------------------------------===// 233 // FullDependence methods 234 235 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 236 bool PossiblyLoopIndependent, 237 unsigned CommonLevels) 238 : Dependence(Source, Destination), Levels(CommonLevels), 239 LoopIndependent(PossiblyLoopIndependent) { 240 Consistent = true; 241 if (CommonLevels) 242 DV = make_unique<DVEntry[]>(CommonLevels); 243 } 244 245 // The rest are simple getters that hide the implementation. 246 247 // getDirection - Returns the direction associated with a particular level. 248 unsigned FullDependence::getDirection(unsigned Level) const { 249 assert(0 < Level && Level <= Levels && "Level out of range"); 250 return DV[Level - 1].Direction; 251 } 252 253 254 // Returns the distance (or NULL) associated with a particular level. 255 const SCEV *FullDependence::getDistance(unsigned Level) const { 256 assert(0 < Level && Level <= Levels && "Level out of range"); 257 return DV[Level - 1].Distance; 258 } 259 260 261 // Returns true if a particular level is scalar; that is, 262 // if no subscript in the source or destination mention the induction 263 // variable associated with the loop at this level. 264 bool FullDependence::isScalar(unsigned Level) const { 265 assert(0 < Level && Level <= Levels && "Level out of range"); 266 return DV[Level - 1].Scalar; 267 } 268 269 270 // Returns true if peeling the first iteration from this loop 271 // will break this dependence. 272 bool FullDependence::isPeelFirst(unsigned Level) const { 273 assert(0 < Level && Level <= Levels && "Level out of range"); 274 return DV[Level - 1].PeelFirst; 275 } 276 277 278 // Returns true if peeling the last iteration from this loop 279 // will break this dependence. 280 bool FullDependence::isPeelLast(unsigned Level) const { 281 assert(0 < Level && Level <= Levels && "Level out of range"); 282 return DV[Level - 1].PeelLast; 283 } 284 285 286 // Returns true if splitting this loop will break the dependence. 287 bool FullDependence::isSplitable(unsigned Level) const { 288 assert(0 < Level && Level <= Levels && "Level out of range"); 289 return DV[Level - 1].Splitable; 290 } 291 292 293 //===----------------------------------------------------------------------===// 294 // DependenceInfo::Constraint methods 295 296 // If constraint is a point <X, Y>, returns X. 297 // Otherwise assert. 298 const SCEV *DependenceInfo::Constraint::getX() const { 299 assert(Kind == Point && "Kind should be Point"); 300 return A; 301 } 302 303 304 // If constraint is a point <X, Y>, returns Y. 305 // Otherwise assert. 306 const SCEV *DependenceInfo::Constraint::getY() const { 307 assert(Kind == Point && "Kind should be Point"); 308 return B; 309 } 310 311 312 // If constraint is a line AX + BY = C, returns A. 313 // Otherwise assert. 314 const SCEV *DependenceInfo::Constraint::getA() const { 315 assert((Kind == Line || Kind == Distance) && 316 "Kind should be Line (or Distance)"); 317 return A; 318 } 319 320 321 // If constraint is a line AX + BY = C, returns B. 322 // Otherwise assert. 323 const SCEV *DependenceInfo::Constraint::getB() const { 324 assert((Kind == Line || Kind == Distance) && 325 "Kind should be Line (or Distance)"); 326 return B; 327 } 328 329 330 // If constraint is a line AX + BY = C, returns C. 331 // Otherwise assert. 332 const SCEV *DependenceInfo::Constraint::getC() const { 333 assert((Kind == Line || Kind == Distance) && 334 "Kind should be Line (or Distance)"); 335 return C; 336 } 337 338 339 // If constraint is a distance, returns D. 340 // Otherwise assert. 341 const SCEV *DependenceInfo::Constraint::getD() const { 342 assert(Kind == Distance && "Kind should be Distance"); 343 return SE->getNegativeSCEV(C); 344 } 345 346 347 // Returns the loop associated with this constraint. 348 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 349 assert((Kind == Distance || Kind == Line || Kind == Point) && 350 "Kind should be Distance, Line, or Point"); 351 return AssociatedLoop; 352 } 353 354 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 355 const Loop *CurLoop) { 356 Kind = Point; 357 A = X; 358 B = Y; 359 AssociatedLoop = CurLoop; 360 } 361 362 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 363 const SCEV *CC, const Loop *CurLoop) { 364 Kind = Line; 365 A = AA; 366 B = BB; 367 C = CC; 368 AssociatedLoop = CurLoop; 369 } 370 371 void DependenceInfo::Constraint::setDistance(const SCEV *D, 372 const Loop *CurLoop) { 373 Kind = Distance; 374 A = SE->getOne(D->getType()); 375 B = SE->getNegativeSCEV(A); 376 C = SE->getNegativeSCEV(D); 377 AssociatedLoop = CurLoop; 378 } 379 380 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 381 382 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 383 SE = NewSE; 384 Kind = Any; 385 } 386 387 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 388 // For debugging purposes. Dumps the constraint out to OS. 389 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 390 if (isEmpty()) 391 OS << " Empty\n"; 392 else if (isAny()) 393 OS << " Any\n"; 394 else if (isPoint()) 395 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 396 else if (isDistance()) 397 OS << " Distance is " << *getD() << 398 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 399 else if (isLine()) 400 OS << " Line is " << *getA() << "*X + " << 401 *getB() << "*Y = " << *getC() << "\n"; 402 else 403 llvm_unreachable("unknown constraint type in Constraint::dump"); 404 } 405 #endif 406 407 408 // Updates X with the intersection 409 // of the Constraints X and Y. Returns true if X has changed. 410 // Corresponds to Figure 4 from the paper 411 // 412 // Practical Dependence Testing 413 // Goff, Kennedy, Tseng 414 // PLDI 1991 415 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 416 ++DeltaApplications; 417 DEBUG(dbgs() << "\tintersect constraints\n"); 418 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 419 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 420 assert(!Y->isPoint() && "Y must not be a Point"); 421 if (X->isAny()) { 422 if (Y->isAny()) 423 return false; 424 *X = *Y; 425 return true; 426 } 427 if (X->isEmpty()) 428 return false; 429 if (Y->isEmpty()) { 430 X->setEmpty(); 431 return true; 432 } 433 434 if (X->isDistance() && Y->isDistance()) { 435 DEBUG(dbgs() << "\t intersect 2 distances\n"); 436 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 437 return false; 438 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 439 X->setEmpty(); 440 ++DeltaSuccesses; 441 return true; 442 } 443 // Hmmm, interesting situation. 444 // I guess if either is constant, keep it and ignore the other. 445 if (isa<SCEVConstant>(Y->getD())) { 446 *X = *Y; 447 return true; 448 } 449 return false; 450 } 451 452 // At this point, the pseudo-code in Figure 4 of the paper 453 // checks if (X->isPoint() && Y->isPoint()). 454 // This case can't occur in our implementation, 455 // since a Point can only arise as the result of intersecting 456 // two Line constraints, and the right-hand value, Y, is never 457 // the result of an intersection. 458 assert(!(X->isPoint() && Y->isPoint()) && 459 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 460 461 if (X->isLine() && Y->isLine()) { 462 DEBUG(dbgs() << "\t intersect 2 lines\n"); 463 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 464 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 465 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 466 // slopes are equal, so lines are parallel 467 DEBUG(dbgs() << "\t\tsame slope\n"); 468 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 469 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 470 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 471 return false; 472 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 473 X->setEmpty(); 474 ++DeltaSuccesses; 475 return true; 476 } 477 return false; 478 } 479 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 480 // slopes differ, so lines intersect 481 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 482 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 483 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 484 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 485 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 486 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 487 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 488 const SCEVConstant *C1A2_C2A1 = 489 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 490 const SCEVConstant *C1B2_C2B1 = 491 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 492 const SCEVConstant *A1B2_A2B1 = 493 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 494 const SCEVConstant *A2B1_A1B2 = 495 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 496 if (!C1B2_C2B1 || !C1A2_C2A1 || 497 !A1B2_A2B1 || !A2B1_A1B2) 498 return false; 499 APInt Xtop = C1B2_C2B1->getAPInt(); 500 APInt Xbot = A1B2_A2B1->getAPInt(); 501 APInt Ytop = C1A2_C2A1->getAPInt(); 502 APInt Ybot = A2B1_A1B2->getAPInt(); 503 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 504 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 505 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 506 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 507 APInt Xq = Xtop; // these need to be initialized, even 508 APInt Xr = Xtop; // though they're just going to be overwritten 509 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 510 APInt Yq = Ytop; 511 APInt Yr = Ytop; 512 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 513 if (Xr != 0 || Yr != 0) { 514 X->setEmpty(); 515 ++DeltaSuccesses; 516 return true; 517 } 518 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 519 if (Xq.slt(0) || Yq.slt(0)) { 520 X->setEmpty(); 521 ++DeltaSuccesses; 522 return true; 523 } 524 if (const SCEVConstant *CUB = 525 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 526 const APInt &UpperBound = CUB->getAPInt(); 527 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 528 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 529 X->setEmpty(); 530 ++DeltaSuccesses; 531 return true; 532 } 533 } 534 X->setPoint(SE->getConstant(Xq), 535 SE->getConstant(Yq), 536 X->getAssociatedLoop()); 537 ++DeltaSuccesses; 538 return true; 539 } 540 return false; 541 } 542 543 // if (X->isLine() && Y->isPoint()) This case can't occur. 544 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 545 546 if (X->isPoint() && Y->isLine()) { 547 DEBUG(dbgs() << "\t intersect Point and Line\n"); 548 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 549 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 550 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 551 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 552 return false; 553 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 554 X->setEmpty(); 555 ++DeltaSuccesses; 556 return true; 557 } 558 return false; 559 } 560 561 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 562 return false; 563 } 564 565 566 //===----------------------------------------------------------------------===// 567 // DependenceInfo methods 568 569 // For debugging purposes. Dumps a dependence to OS. 570 void Dependence::dump(raw_ostream &OS) const { 571 bool Splitable = false; 572 if (isConfused()) 573 OS << "confused"; 574 else { 575 if (isConsistent()) 576 OS << "consistent "; 577 if (isFlow()) 578 OS << "flow"; 579 else if (isOutput()) 580 OS << "output"; 581 else if (isAnti()) 582 OS << "anti"; 583 else if (isInput()) 584 OS << "input"; 585 unsigned Levels = getLevels(); 586 OS << " ["; 587 for (unsigned II = 1; II <= Levels; ++II) { 588 if (isSplitable(II)) 589 Splitable = true; 590 if (isPeelFirst(II)) 591 OS << 'p'; 592 const SCEV *Distance = getDistance(II); 593 if (Distance) 594 OS << *Distance; 595 else if (isScalar(II)) 596 OS << "S"; 597 else { 598 unsigned Direction = getDirection(II); 599 if (Direction == DVEntry::ALL) 600 OS << "*"; 601 else { 602 if (Direction & DVEntry::LT) 603 OS << "<"; 604 if (Direction & DVEntry::EQ) 605 OS << "="; 606 if (Direction & DVEntry::GT) 607 OS << ">"; 608 } 609 } 610 if (isPeelLast(II)) 611 OS << 'p'; 612 if (II < Levels) 613 OS << " "; 614 } 615 if (isLoopIndependent()) 616 OS << "|<"; 617 OS << "]"; 618 if (Splitable) 619 OS << " splitable"; 620 } 621 OS << "!\n"; 622 } 623 624 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 625 const DataLayout &DL, const Value *A, 626 const Value *B) { 627 const Value *AObj = GetUnderlyingObject(A, DL); 628 const Value *BObj = GetUnderlyingObject(B, DL); 629 return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), 630 BObj, DL.getTypeStoreSize(BObj->getType())); 631 } 632 633 634 // Returns true if the load or store can be analyzed. Atomic and volatile 635 // operations have properties which this analysis does not understand. 636 static 637 bool isLoadOrStore(const Instruction *I) { 638 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 639 return LI->isUnordered(); 640 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 641 return SI->isUnordered(); 642 return false; 643 } 644 645 646 // Examines the loop nesting of the Src and Dst 647 // instructions and establishes their shared loops. Sets the variables 648 // CommonLevels, SrcLevels, and MaxLevels. 649 // The source and destination instructions needn't be contained in the same 650 // loop. The routine establishNestingLevels finds the level of most deeply 651 // nested loop that contains them both, CommonLevels. An instruction that's 652 // not contained in a loop is at level = 0. MaxLevels is equal to the level 653 // of the source plus the level of the destination, minus CommonLevels. 654 // This lets us allocate vectors MaxLevels in length, with room for every 655 // distinct loop referenced in both the source and destination subscripts. 656 // The variable SrcLevels is the nesting depth of the source instruction. 657 // It's used to help calculate distinct loops referenced by the destination. 658 // Here's the map from loops to levels: 659 // 0 - unused 660 // 1 - outermost common loop 661 // ... - other common loops 662 // CommonLevels - innermost common loop 663 // ... - loops containing Src but not Dst 664 // SrcLevels - innermost loop containing Src but not Dst 665 // ... - loops containing Dst but not Src 666 // MaxLevels - innermost loops containing Dst but not Src 667 // Consider the follow code fragment: 668 // for (a = ...) { 669 // for (b = ...) { 670 // for (c = ...) { 671 // for (d = ...) { 672 // A[] = ...; 673 // } 674 // } 675 // for (e = ...) { 676 // for (f = ...) { 677 // for (g = ...) { 678 // ... = A[]; 679 // } 680 // } 681 // } 682 // } 683 // } 684 // If we're looking at the possibility of a dependence between the store 685 // to A (the Src) and the load from A (the Dst), we'll note that they 686 // have 2 loops in common, so CommonLevels will equal 2 and the direction 687 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 688 // A map from loop names to loop numbers would look like 689 // a - 1 690 // b - 2 = CommonLevels 691 // c - 3 692 // d - 4 = SrcLevels 693 // e - 5 694 // f - 6 695 // g - 7 = MaxLevels 696 void DependenceInfo::establishNestingLevels(const Instruction *Src, 697 const Instruction *Dst) { 698 const BasicBlock *SrcBlock = Src->getParent(); 699 const BasicBlock *DstBlock = Dst->getParent(); 700 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 701 unsigned DstLevel = LI->getLoopDepth(DstBlock); 702 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 703 const Loop *DstLoop = LI->getLoopFor(DstBlock); 704 SrcLevels = SrcLevel; 705 MaxLevels = SrcLevel + DstLevel; 706 while (SrcLevel > DstLevel) { 707 SrcLoop = SrcLoop->getParentLoop(); 708 SrcLevel--; 709 } 710 while (DstLevel > SrcLevel) { 711 DstLoop = DstLoop->getParentLoop(); 712 DstLevel--; 713 } 714 while (SrcLoop != DstLoop) { 715 SrcLoop = SrcLoop->getParentLoop(); 716 DstLoop = DstLoop->getParentLoop(); 717 SrcLevel--; 718 } 719 CommonLevels = SrcLevel; 720 MaxLevels -= CommonLevels; 721 } 722 723 724 // Given one of the loops containing the source, return 725 // its level index in our numbering scheme. 726 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 727 return SrcLoop->getLoopDepth(); 728 } 729 730 731 // Given one of the loops containing the destination, 732 // return its level index in our numbering scheme. 733 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 734 unsigned D = DstLoop->getLoopDepth(); 735 if (D > CommonLevels) 736 return D - CommonLevels + SrcLevels; 737 else 738 return D; 739 } 740 741 742 // Returns true if Expression is loop invariant in LoopNest. 743 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 744 const Loop *LoopNest) const { 745 if (!LoopNest) 746 return true; 747 return SE->isLoopInvariant(Expression, LoopNest) && 748 isLoopInvariant(Expression, LoopNest->getParentLoop()); 749 } 750 751 752 753 // Finds the set of loops from the LoopNest that 754 // have a level <= CommonLevels and are referred to by the SCEV Expression. 755 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 756 const Loop *LoopNest, 757 SmallBitVector &Loops) const { 758 while (LoopNest) { 759 unsigned Level = LoopNest->getLoopDepth(); 760 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 761 Loops.set(Level); 762 LoopNest = LoopNest->getParentLoop(); 763 } 764 } 765 766 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 767 768 unsigned widestWidthSeen = 0; 769 Type *widestType; 770 771 // Go through each pair and find the widest bit to which we need 772 // to extend all of them. 773 for (Subscript *Pair : Pairs) { 774 const SCEV *Src = Pair->Src; 775 const SCEV *Dst = Pair->Dst; 776 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 777 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 778 if (SrcTy == nullptr || DstTy == nullptr) { 779 assert(SrcTy == DstTy && "This function only unify integer types and " 780 "expect Src and Dst share the same type " 781 "otherwise."); 782 continue; 783 } 784 if (SrcTy->getBitWidth() > widestWidthSeen) { 785 widestWidthSeen = SrcTy->getBitWidth(); 786 widestType = SrcTy; 787 } 788 if (DstTy->getBitWidth() > widestWidthSeen) { 789 widestWidthSeen = DstTy->getBitWidth(); 790 widestType = DstTy; 791 } 792 } 793 794 795 assert(widestWidthSeen > 0); 796 797 // Now extend each pair to the widest seen. 798 for (Subscript *Pair : Pairs) { 799 const SCEV *Src = Pair->Src; 800 const SCEV *Dst = Pair->Dst; 801 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 802 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 803 if (SrcTy == nullptr || DstTy == nullptr) { 804 assert(SrcTy == DstTy && "This function only unify integer types and " 805 "expect Src and Dst share the same type " 806 "otherwise."); 807 continue; 808 } 809 if (SrcTy->getBitWidth() < widestWidthSeen) 810 // Sign-extend Src to widestType 811 Pair->Src = SE->getSignExtendExpr(Src, widestType); 812 if (DstTy->getBitWidth() < widestWidthSeen) { 813 // Sign-extend Dst to widestType 814 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 815 } 816 } 817 } 818 819 // removeMatchingExtensions - Examines a subscript pair. 820 // If the source and destination are identically sign (or zero) 821 // extended, it strips off the extension in an effect to simplify 822 // the actual analysis. 823 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 824 const SCEV *Src = Pair->Src; 825 const SCEV *Dst = Pair->Dst; 826 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 827 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 828 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 829 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 830 const SCEV *SrcCastOp = SrcCast->getOperand(); 831 const SCEV *DstCastOp = DstCast->getOperand(); 832 if (SrcCastOp->getType() == DstCastOp->getType()) { 833 Pair->Src = SrcCastOp; 834 Pair->Dst = DstCastOp; 835 } 836 } 837 } 838 839 840 // Examine the scev and return true iff it's linear. 841 // Collect any loops mentioned in the set of "Loops". 842 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 843 SmallBitVector &Loops) { 844 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 845 if (!AddRec) 846 return isLoopInvariant(Src, LoopNest); 847 const SCEV *Start = AddRec->getStart(); 848 const SCEV *Step = AddRec->getStepRecurrence(*SE); 849 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 850 if (!isa<SCEVCouldNotCompute>(UB)) { 851 if (SE->getTypeSizeInBits(Start->getType()) < 852 SE->getTypeSizeInBits(UB->getType())) { 853 if (!AddRec->getNoWrapFlags()) 854 return false; 855 } 856 } 857 if (!isLoopInvariant(Step, LoopNest)) 858 return false; 859 Loops.set(mapSrcLoop(AddRec->getLoop())); 860 return checkSrcSubscript(Start, LoopNest, Loops); 861 } 862 863 864 865 // Examine the scev and return true iff it's linear. 866 // Collect any loops mentioned in the set of "Loops". 867 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 868 SmallBitVector &Loops) { 869 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 870 if (!AddRec) 871 return isLoopInvariant(Dst, LoopNest); 872 const SCEV *Start = AddRec->getStart(); 873 const SCEV *Step = AddRec->getStepRecurrence(*SE); 874 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 875 if (!isa<SCEVCouldNotCompute>(UB)) { 876 if (SE->getTypeSizeInBits(Start->getType()) < 877 SE->getTypeSizeInBits(UB->getType())) { 878 if (!AddRec->getNoWrapFlags()) 879 return false; 880 } 881 } 882 if (!isLoopInvariant(Step, LoopNest)) 883 return false; 884 Loops.set(mapDstLoop(AddRec->getLoop())); 885 return checkDstSubscript(Start, LoopNest, Loops); 886 } 887 888 889 // Examines the subscript pair (the Src and Dst SCEVs) 890 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 891 // Collects the associated loops in a set. 892 DependenceInfo::Subscript::ClassificationKind 893 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 894 const SCEV *Dst, const Loop *DstLoopNest, 895 SmallBitVector &Loops) { 896 SmallBitVector SrcLoops(MaxLevels + 1); 897 SmallBitVector DstLoops(MaxLevels + 1); 898 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 899 return Subscript::NonLinear; 900 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 901 return Subscript::NonLinear; 902 Loops = SrcLoops; 903 Loops |= DstLoops; 904 unsigned N = Loops.count(); 905 if (N == 0) 906 return Subscript::ZIV; 907 if (N == 1) 908 return Subscript::SIV; 909 if (N == 2 && (SrcLoops.count() == 0 || 910 DstLoops.count() == 0 || 911 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 912 return Subscript::RDIV; 913 return Subscript::MIV; 914 } 915 916 917 // A wrapper around SCEV::isKnownPredicate. 918 // Looks for cases where we're interested in comparing for equality. 919 // If both X and Y have been identically sign or zero extended, 920 // it strips off the (confusing) extensions before invoking 921 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 922 // will be similarly updated. 923 // 924 // If SCEV::isKnownPredicate can't prove the predicate, 925 // we try simple subtraction, which seems to help in some cases 926 // involving symbolics. 927 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 928 const SCEV *Y) const { 929 if (Pred == CmpInst::ICMP_EQ || 930 Pred == CmpInst::ICMP_NE) { 931 if ((isa<SCEVSignExtendExpr>(X) && 932 isa<SCEVSignExtendExpr>(Y)) || 933 (isa<SCEVZeroExtendExpr>(X) && 934 isa<SCEVZeroExtendExpr>(Y))) { 935 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 936 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 937 const SCEV *Xop = CX->getOperand(); 938 const SCEV *Yop = CY->getOperand(); 939 if (Xop->getType() == Yop->getType()) { 940 X = Xop; 941 Y = Yop; 942 } 943 } 944 } 945 if (SE->isKnownPredicate(Pred, X, Y)) 946 return true; 947 // If SE->isKnownPredicate can't prove the condition, 948 // we try the brute-force approach of subtracting 949 // and testing the difference. 950 // By testing with SE->isKnownPredicate first, we avoid 951 // the possibility of overflow when the arguments are constants. 952 const SCEV *Delta = SE->getMinusSCEV(X, Y); 953 switch (Pred) { 954 case CmpInst::ICMP_EQ: 955 return Delta->isZero(); 956 case CmpInst::ICMP_NE: 957 return SE->isKnownNonZero(Delta); 958 case CmpInst::ICMP_SGE: 959 return SE->isKnownNonNegative(Delta); 960 case CmpInst::ICMP_SLE: 961 return SE->isKnownNonPositive(Delta); 962 case CmpInst::ICMP_SGT: 963 return SE->isKnownPositive(Delta); 964 case CmpInst::ICMP_SLT: 965 return SE->isKnownNegative(Delta); 966 default: 967 llvm_unreachable("unexpected predicate in isKnownPredicate"); 968 } 969 } 970 971 972 // All subscripts are all the same type. 973 // Loop bound may be smaller (e.g., a char). 974 // Should zero extend loop bound, since it's always >= 0. 975 // This routine collects upper bound and extends or truncates if needed. 976 // Truncating is safe when subscripts are known not to wrap. Cases without 977 // nowrap flags should have been rejected earlier. 978 // Return null if no bound available. 979 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 980 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 981 const SCEV *UB = SE->getBackedgeTakenCount(L); 982 return SE->getTruncateOrZeroExtend(UB, T); 983 } 984 return nullptr; 985 } 986 987 988 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 989 // If the cast fails, returns NULL. 990 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 991 Type *T) const { 992 if (const SCEV *UB = collectUpperBound(L, T)) 993 return dyn_cast<SCEVConstant>(UB); 994 return nullptr; 995 } 996 997 998 // testZIV - 999 // When we have a pair of subscripts of the form [c1] and [c2], 1000 // where c1 and c2 are both loop invariant, we attack it using 1001 // the ZIV test. Basically, we test by comparing the two values, 1002 // but there are actually three possible results: 1003 // 1) the values are equal, so there's a dependence 1004 // 2) the values are different, so there's no dependence 1005 // 3) the values might be equal, so we have to assume a dependence. 1006 // 1007 // Return true if dependence disproved. 1008 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1009 FullDependence &Result) const { 1010 DEBUG(dbgs() << " src = " << *Src << "\n"); 1011 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1012 ++ZIVapplications; 1013 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1014 DEBUG(dbgs() << " provably dependent\n"); 1015 return false; // provably dependent 1016 } 1017 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1018 DEBUG(dbgs() << " provably independent\n"); 1019 ++ZIVindependence; 1020 return true; // provably independent 1021 } 1022 DEBUG(dbgs() << " possibly dependent\n"); 1023 Result.Consistent = false; 1024 return false; // possibly dependent 1025 } 1026 1027 1028 // strongSIVtest - 1029 // From the paper, Practical Dependence Testing, Section 4.2.1 1030 // 1031 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1032 // where i is an induction variable, c1 and c2 are loop invariant, 1033 // and a is a constant, we can solve it exactly using the Strong SIV test. 1034 // 1035 // Can prove independence. Failing that, can compute distance (and direction). 1036 // In the presence of symbolic terms, we can sometimes make progress. 1037 // 1038 // If there's a dependence, 1039 // 1040 // c1 + a*i = c2 + a*i' 1041 // 1042 // The dependence distance is 1043 // 1044 // d = i' - i = (c1 - c2)/a 1045 // 1046 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1047 // loop's upper bound. If a dependence exists, the dependence direction is 1048 // defined as 1049 // 1050 // { < if d > 0 1051 // direction = { = if d = 0 1052 // { > if d < 0 1053 // 1054 // Return true if dependence disproved. 1055 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1056 const SCEV *DstConst, const Loop *CurLoop, 1057 unsigned Level, FullDependence &Result, 1058 Constraint &NewConstraint) const { 1059 DEBUG(dbgs() << "\tStrong SIV test\n"); 1060 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1061 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1062 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1063 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1064 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1065 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1066 ++StrongSIVapplications; 1067 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1068 Level--; 1069 1070 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1071 DEBUG(dbgs() << "\t Delta = " << *Delta); 1072 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1073 1074 // check that |Delta| < iteration count 1075 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1076 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1077 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1078 const SCEV *AbsDelta = 1079 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1080 const SCEV *AbsCoeff = 1081 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1082 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1083 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1084 // Distance greater than trip count - no dependence 1085 ++StrongSIVindependence; 1086 ++StrongSIVsuccesses; 1087 return true; 1088 } 1089 } 1090 1091 // Can we compute distance? 1092 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1093 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1094 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1095 APInt Distance = ConstDelta; // these need to be initialized 1096 APInt Remainder = ConstDelta; 1097 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1098 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1099 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1100 // Make sure Coeff divides Delta exactly 1101 if (Remainder != 0) { 1102 // Coeff doesn't divide Distance, no dependence 1103 ++StrongSIVindependence; 1104 ++StrongSIVsuccesses; 1105 return true; 1106 } 1107 Result.DV[Level].Distance = SE->getConstant(Distance); 1108 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1109 if (Distance.sgt(0)) 1110 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1111 else if (Distance.slt(0)) 1112 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1113 else 1114 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1115 ++StrongSIVsuccesses; 1116 } 1117 else if (Delta->isZero()) { 1118 // since 0/X == 0 1119 Result.DV[Level].Distance = Delta; 1120 NewConstraint.setDistance(Delta, CurLoop); 1121 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1122 ++StrongSIVsuccesses; 1123 } 1124 else { 1125 if (Coeff->isOne()) { 1126 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1127 Result.DV[Level].Distance = Delta; // since X/1 == X 1128 NewConstraint.setDistance(Delta, CurLoop); 1129 } 1130 else { 1131 Result.Consistent = false; 1132 NewConstraint.setLine(Coeff, 1133 SE->getNegativeSCEV(Coeff), 1134 SE->getNegativeSCEV(Delta), CurLoop); 1135 } 1136 1137 // maybe we can get a useful direction 1138 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1139 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1140 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1141 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1142 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1143 // The double negatives above are confusing. 1144 // It helps to read !SE->isKnownNonZero(Delta) 1145 // as "Delta might be Zero" 1146 unsigned NewDirection = Dependence::DVEntry::NONE; 1147 if ((DeltaMaybePositive && CoeffMaybePositive) || 1148 (DeltaMaybeNegative && CoeffMaybeNegative)) 1149 NewDirection = Dependence::DVEntry::LT; 1150 if (DeltaMaybeZero) 1151 NewDirection |= Dependence::DVEntry::EQ; 1152 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1153 (DeltaMaybePositive && CoeffMaybeNegative)) 1154 NewDirection |= Dependence::DVEntry::GT; 1155 if (NewDirection < Result.DV[Level].Direction) 1156 ++StrongSIVsuccesses; 1157 Result.DV[Level].Direction &= NewDirection; 1158 } 1159 return false; 1160 } 1161 1162 1163 // weakCrossingSIVtest - 1164 // From the paper, Practical Dependence Testing, Section 4.2.2 1165 // 1166 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1167 // where i is an induction variable, c1 and c2 are loop invariant, 1168 // and a is a constant, we can solve it exactly using the 1169 // Weak-Crossing SIV test. 1170 // 1171 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1172 // the two lines, where i = i', yielding 1173 // 1174 // c1 + a*i = c2 - a*i 1175 // 2a*i = c2 - c1 1176 // i = (c2 - c1)/2a 1177 // 1178 // If i < 0, there is no dependence. 1179 // If i > upperbound, there is no dependence. 1180 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1181 // If i = upperbound, there's a dependence with distance = 0. 1182 // If i is integral, there's a dependence (all directions). 1183 // If the non-integer part = 1/2, there's a dependence (<> directions). 1184 // Otherwise, there's no dependence. 1185 // 1186 // Can prove independence. Failing that, 1187 // can sometimes refine the directions. 1188 // Can determine iteration for splitting. 1189 // 1190 // Return true if dependence disproved. 1191 bool DependenceInfo::weakCrossingSIVtest( 1192 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1193 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1194 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1195 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1196 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1197 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1198 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1199 ++WeakCrossingSIVapplications; 1200 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1201 Level--; 1202 Result.Consistent = false; 1203 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1204 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1205 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1206 if (Delta->isZero()) { 1207 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1208 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1209 ++WeakCrossingSIVsuccesses; 1210 if (!Result.DV[Level].Direction) { 1211 ++WeakCrossingSIVindependence; 1212 return true; 1213 } 1214 Result.DV[Level].Distance = Delta; // = 0 1215 return false; 1216 } 1217 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1218 if (!ConstCoeff) 1219 return false; 1220 1221 Result.DV[Level].Splitable = true; 1222 if (SE->isKnownNegative(ConstCoeff)) { 1223 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1224 assert(ConstCoeff && 1225 "dynamic cast of negative of ConstCoeff should yield constant"); 1226 Delta = SE->getNegativeSCEV(Delta); 1227 } 1228 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1229 1230 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1231 SplitIter = SE->getUDivExpr( 1232 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1233 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1234 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1235 1236 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1237 if (!ConstDelta) 1238 return false; 1239 1240 // We're certain that ConstCoeff > 0; therefore, 1241 // if Delta < 0, then no dependence. 1242 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1243 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1244 if (SE->isKnownNegative(Delta)) { 1245 // No dependence, Delta < 0 1246 ++WeakCrossingSIVindependence; 1247 ++WeakCrossingSIVsuccesses; 1248 return true; 1249 } 1250 1251 // We're certain that Delta > 0 and ConstCoeff > 0. 1252 // Check Delta/(2*ConstCoeff) against upper loop bound 1253 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1254 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1255 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1256 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1257 ConstantTwo); 1258 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1259 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1260 // Delta too big, no dependence 1261 ++WeakCrossingSIVindependence; 1262 ++WeakCrossingSIVsuccesses; 1263 return true; 1264 } 1265 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1266 // i = i' = UB 1267 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1268 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1269 ++WeakCrossingSIVsuccesses; 1270 if (!Result.DV[Level].Direction) { 1271 ++WeakCrossingSIVindependence; 1272 return true; 1273 } 1274 Result.DV[Level].Splitable = false; 1275 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1276 return false; 1277 } 1278 } 1279 1280 // check that Coeff divides Delta 1281 APInt APDelta = ConstDelta->getAPInt(); 1282 APInt APCoeff = ConstCoeff->getAPInt(); 1283 APInt Distance = APDelta; // these need to be initialzed 1284 APInt Remainder = APDelta; 1285 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1286 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1287 if (Remainder != 0) { 1288 // Coeff doesn't divide Delta, no dependence 1289 ++WeakCrossingSIVindependence; 1290 ++WeakCrossingSIVsuccesses; 1291 return true; 1292 } 1293 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1294 1295 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1296 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1297 Remainder = Distance.srem(Two); 1298 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1299 if (Remainder != 0) { 1300 // Equal direction isn't possible 1301 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1302 ++WeakCrossingSIVsuccesses; 1303 } 1304 return false; 1305 } 1306 1307 1308 // Kirch's algorithm, from 1309 // 1310 // Optimizing Supercompilers for Supercomputers 1311 // Michael Wolfe 1312 // MIT Press, 1989 1313 // 1314 // Program 2.1, page 29. 1315 // Computes the GCD of AM and BM. 1316 // Also finds a solution to the equation ax - by = gcd(a, b). 1317 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1318 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1319 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1320 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1321 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1322 APInt G0 = AM.abs(); 1323 APInt G1 = BM.abs(); 1324 APInt Q = G0; // these need to be initialized 1325 APInt R = G0; 1326 APInt::sdivrem(G0, G1, Q, R); 1327 while (R != 0) { 1328 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1329 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1330 G0 = G1; G1 = R; 1331 APInt::sdivrem(G0, G1, Q, R); 1332 } 1333 G = G1; 1334 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1335 X = AM.slt(0) ? -A1 : A1; 1336 Y = BM.slt(0) ? B1 : -B1; 1337 1338 // make sure gcd divides Delta 1339 R = Delta.srem(G); 1340 if (R != 0) 1341 return true; // gcd doesn't divide Delta, no dependence 1342 Q = Delta.sdiv(G); 1343 X *= Q; 1344 Y *= Q; 1345 return false; 1346 } 1347 1348 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1349 APInt Q = A; // these need to be initialized 1350 APInt R = A; 1351 APInt::sdivrem(A, B, Q, R); 1352 if (R == 0) 1353 return Q; 1354 if ((A.sgt(0) && B.sgt(0)) || 1355 (A.slt(0) && B.slt(0))) 1356 return Q; 1357 else 1358 return Q - 1; 1359 } 1360 1361 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1362 APInt Q = A; // these need to be initialized 1363 APInt R = A; 1364 APInt::sdivrem(A, B, Q, R); 1365 if (R == 0) 1366 return Q; 1367 if ((A.sgt(0) && B.sgt(0)) || 1368 (A.slt(0) && B.slt(0))) 1369 return Q + 1; 1370 else 1371 return Q; 1372 } 1373 1374 1375 static 1376 APInt maxAPInt(APInt A, APInt B) { 1377 return A.sgt(B) ? A : B; 1378 } 1379 1380 1381 static 1382 APInt minAPInt(APInt A, APInt B) { 1383 return A.slt(B) ? A : B; 1384 } 1385 1386 1387 // exactSIVtest - 1388 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1389 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1390 // and a2 are constant, we can solve it exactly using an algorithm developed 1391 // by Banerjee and Wolfe. See Section 2.5.3 in 1392 // 1393 // Optimizing Supercompilers for Supercomputers 1394 // Michael Wolfe 1395 // MIT Press, 1989 1396 // 1397 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1398 // so use them if possible. They're also a bit better with symbolics and, 1399 // in the case of the strong SIV test, can compute Distances. 1400 // 1401 // Return true if dependence disproved. 1402 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1403 const SCEV *SrcConst, const SCEV *DstConst, 1404 const Loop *CurLoop, unsigned Level, 1405 FullDependence &Result, 1406 Constraint &NewConstraint) const { 1407 DEBUG(dbgs() << "\tExact SIV test\n"); 1408 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1409 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1410 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1411 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1412 ++ExactSIVapplications; 1413 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1414 Level--; 1415 Result.Consistent = false; 1416 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1417 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1418 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1419 Delta, CurLoop); 1420 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1421 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1422 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1423 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1424 return false; 1425 1426 // find gcd 1427 APInt G, X, Y; 1428 APInt AM = ConstSrcCoeff->getAPInt(); 1429 APInt BM = ConstDstCoeff->getAPInt(); 1430 unsigned Bits = AM.getBitWidth(); 1431 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1432 // gcd doesn't divide Delta, no dependence 1433 ++ExactSIVindependence; 1434 ++ExactSIVsuccesses; 1435 return true; 1436 } 1437 1438 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1439 1440 // since SCEV construction normalizes, LM = 0 1441 APInt UM(Bits, 1, true); 1442 bool UMvalid = false; 1443 // UM is perhaps unavailable, let's check 1444 if (const SCEVConstant *CUB = 1445 collectConstantUpperBound(CurLoop, Delta->getType())) { 1446 UM = CUB->getAPInt(); 1447 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1448 UMvalid = true; 1449 } 1450 1451 APInt TU(APInt::getSignedMaxValue(Bits)); 1452 APInt TL(APInt::getSignedMinValue(Bits)); 1453 1454 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1455 APInt TMUL = BM.sdiv(G); 1456 if (TMUL.sgt(0)) { 1457 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1458 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1459 if (UMvalid) { 1460 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1461 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1462 } 1463 } 1464 else { 1465 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1466 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1467 if (UMvalid) { 1468 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1469 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1470 } 1471 } 1472 1473 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1474 TMUL = AM.sdiv(G); 1475 if (TMUL.sgt(0)) { 1476 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1477 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1478 if (UMvalid) { 1479 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1480 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1481 } 1482 } 1483 else { 1484 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1485 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1486 if (UMvalid) { 1487 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1488 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1489 } 1490 } 1491 if (TL.sgt(TU)) { 1492 ++ExactSIVindependence; 1493 ++ExactSIVsuccesses; 1494 return true; 1495 } 1496 1497 // explore directions 1498 unsigned NewDirection = Dependence::DVEntry::NONE; 1499 1500 // less than 1501 APInt SaveTU(TU); // save these 1502 APInt SaveTL(TL); 1503 DEBUG(dbgs() << "\t exploring LT direction\n"); 1504 TMUL = AM - BM; 1505 if (TMUL.sgt(0)) { 1506 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1507 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1508 } 1509 else { 1510 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1511 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1512 } 1513 if (TL.sle(TU)) { 1514 NewDirection |= Dependence::DVEntry::LT; 1515 ++ExactSIVsuccesses; 1516 } 1517 1518 // equal 1519 TU = SaveTU; // restore 1520 TL = SaveTL; 1521 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1522 if (TMUL.sgt(0)) { 1523 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1524 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1525 } 1526 else { 1527 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1528 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1529 } 1530 TMUL = BM - AM; 1531 if (TMUL.sgt(0)) { 1532 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1533 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1534 } 1535 else { 1536 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1537 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1538 } 1539 if (TL.sle(TU)) { 1540 NewDirection |= Dependence::DVEntry::EQ; 1541 ++ExactSIVsuccesses; 1542 } 1543 1544 // greater than 1545 TU = SaveTU; // restore 1546 TL = SaveTL; 1547 DEBUG(dbgs() << "\t exploring GT direction\n"); 1548 if (TMUL.sgt(0)) { 1549 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1550 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1551 } 1552 else { 1553 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1554 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1555 } 1556 if (TL.sle(TU)) { 1557 NewDirection |= Dependence::DVEntry::GT; 1558 ++ExactSIVsuccesses; 1559 } 1560 1561 // finished 1562 Result.DV[Level].Direction &= NewDirection; 1563 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1564 ++ExactSIVindependence; 1565 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1566 } 1567 1568 1569 1570 // Return true if the divisor evenly divides the dividend. 1571 static 1572 bool isRemainderZero(const SCEVConstant *Dividend, 1573 const SCEVConstant *Divisor) { 1574 const APInt &ConstDividend = Dividend->getAPInt(); 1575 const APInt &ConstDivisor = Divisor->getAPInt(); 1576 return ConstDividend.srem(ConstDivisor) == 0; 1577 } 1578 1579 1580 // weakZeroSrcSIVtest - 1581 // From the paper, Practical Dependence Testing, Section 4.2.2 1582 // 1583 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1584 // where i is an induction variable, c1 and c2 are loop invariant, 1585 // and a is a constant, we can solve it exactly using the 1586 // Weak-Zero SIV test. 1587 // 1588 // Given 1589 // 1590 // c1 = c2 + a*i 1591 // 1592 // we get 1593 // 1594 // (c1 - c2)/a = i 1595 // 1596 // If i is not an integer, there's no dependence. 1597 // If i < 0 or > UB, there's no dependence. 1598 // If i = 0, the direction is <= and peeling the 1599 // 1st iteration will break the dependence. 1600 // If i = UB, the direction is >= and peeling the 1601 // last iteration will break the dependence. 1602 // Otherwise, the direction is *. 1603 // 1604 // Can prove independence. Failing that, we can sometimes refine 1605 // the directions. Can sometimes show that first or last 1606 // iteration carries all the dependences (so worth peeling). 1607 // 1608 // (see also weakZeroDstSIVtest) 1609 // 1610 // Return true if dependence disproved. 1611 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1612 const SCEV *SrcConst, 1613 const SCEV *DstConst, 1614 const Loop *CurLoop, unsigned Level, 1615 FullDependence &Result, 1616 Constraint &NewConstraint) const { 1617 // For the WeakSIV test, it's possible the loop isn't common to 1618 // the Src and Dst loops. If it isn't, then there's no need to 1619 // record a direction. 1620 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1621 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1622 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1623 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1624 ++WeakZeroSIVapplications; 1625 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1626 Level--; 1627 Result.Consistent = false; 1628 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1629 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1630 CurLoop); 1631 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1632 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1633 if (Level < CommonLevels) { 1634 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1635 Result.DV[Level].PeelFirst = true; 1636 ++WeakZeroSIVsuccesses; 1637 } 1638 return false; // dependences caused by first iteration 1639 } 1640 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1641 if (!ConstCoeff) 1642 return false; 1643 const SCEV *AbsCoeff = 1644 SE->isKnownNegative(ConstCoeff) ? 1645 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1646 const SCEV *NewDelta = 1647 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1648 1649 // check that Delta/SrcCoeff < iteration count 1650 // really check NewDelta < count*AbsCoeff 1651 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1652 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1653 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1654 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1655 ++WeakZeroSIVindependence; 1656 ++WeakZeroSIVsuccesses; 1657 return true; 1658 } 1659 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1660 // dependences caused by last iteration 1661 if (Level < CommonLevels) { 1662 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1663 Result.DV[Level].PeelLast = true; 1664 ++WeakZeroSIVsuccesses; 1665 } 1666 return false; 1667 } 1668 } 1669 1670 // check that Delta/SrcCoeff >= 0 1671 // really check that NewDelta >= 0 1672 if (SE->isKnownNegative(NewDelta)) { 1673 // No dependence, newDelta < 0 1674 ++WeakZeroSIVindependence; 1675 ++WeakZeroSIVsuccesses; 1676 return true; 1677 } 1678 1679 // if SrcCoeff doesn't divide Delta, then no dependence 1680 if (isa<SCEVConstant>(Delta) && 1681 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1682 ++WeakZeroSIVindependence; 1683 ++WeakZeroSIVsuccesses; 1684 return true; 1685 } 1686 return false; 1687 } 1688 1689 1690 // weakZeroDstSIVtest - 1691 // From the paper, Practical Dependence Testing, Section 4.2.2 1692 // 1693 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1694 // where i is an induction variable, c1 and c2 are loop invariant, 1695 // and a is a constant, we can solve it exactly using the 1696 // Weak-Zero SIV test. 1697 // 1698 // Given 1699 // 1700 // c1 + a*i = c2 1701 // 1702 // we get 1703 // 1704 // i = (c2 - c1)/a 1705 // 1706 // If i is not an integer, there's no dependence. 1707 // If i < 0 or > UB, there's no dependence. 1708 // If i = 0, the direction is <= and peeling the 1709 // 1st iteration will break the dependence. 1710 // If i = UB, the direction is >= and peeling the 1711 // last iteration will break the dependence. 1712 // Otherwise, the direction is *. 1713 // 1714 // Can prove independence. Failing that, we can sometimes refine 1715 // the directions. Can sometimes show that first or last 1716 // iteration carries all the dependences (so worth peeling). 1717 // 1718 // (see also weakZeroSrcSIVtest) 1719 // 1720 // Return true if dependence disproved. 1721 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1722 const SCEV *SrcConst, 1723 const SCEV *DstConst, 1724 const Loop *CurLoop, unsigned Level, 1725 FullDependence &Result, 1726 Constraint &NewConstraint) const { 1727 // For the WeakSIV test, it's possible the loop isn't common to the 1728 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1729 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1730 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1731 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1732 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1733 ++WeakZeroSIVapplications; 1734 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1735 Level--; 1736 Result.Consistent = false; 1737 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1738 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1739 CurLoop); 1740 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1741 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1742 if (Level < CommonLevels) { 1743 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1744 Result.DV[Level].PeelFirst = true; 1745 ++WeakZeroSIVsuccesses; 1746 } 1747 return false; // dependences caused by first iteration 1748 } 1749 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1750 if (!ConstCoeff) 1751 return false; 1752 const SCEV *AbsCoeff = 1753 SE->isKnownNegative(ConstCoeff) ? 1754 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1755 const SCEV *NewDelta = 1756 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1757 1758 // check that Delta/SrcCoeff < iteration count 1759 // really check NewDelta < count*AbsCoeff 1760 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1761 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1762 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1763 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1764 ++WeakZeroSIVindependence; 1765 ++WeakZeroSIVsuccesses; 1766 return true; 1767 } 1768 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1769 // dependences caused by last iteration 1770 if (Level < CommonLevels) { 1771 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1772 Result.DV[Level].PeelLast = true; 1773 ++WeakZeroSIVsuccesses; 1774 } 1775 return false; 1776 } 1777 } 1778 1779 // check that Delta/SrcCoeff >= 0 1780 // really check that NewDelta >= 0 1781 if (SE->isKnownNegative(NewDelta)) { 1782 // No dependence, newDelta < 0 1783 ++WeakZeroSIVindependence; 1784 ++WeakZeroSIVsuccesses; 1785 return true; 1786 } 1787 1788 // if SrcCoeff doesn't divide Delta, then no dependence 1789 if (isa<SCEVConstant>(Delta) && 1790 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1791 ++WeakZeroSIVindependence; 1792 ++WeakZeroSIVsuccesses; 1793 return true; 1794 } 1795 return false; 1796 } 1797 1798 1799 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1800 // Things of the form [c1 + a*i] and [c2 + b*j], 1801 // where i and j are induction variable, c1 and c2 are loop invariant, 1802 // and a and b are constants. 1803 // Returns true if any possible dependence is disproved. 1804 // Marks the result as inconsistent. 1805 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1806 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1807 const SCEV *SrcConst, const SCEV *DstConst, 1808 const Loop *SrcLoop, const Loop *DstLoop, 1809 FullDependence &Result) const { 1810 DEBUG(dbgs() << "\tExact RDIV test\n"); 1811 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1812 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1813 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1814 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1815 ++ExactRDIVapplications; 1816 Result.Consistent = false; 1817 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1818 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1819 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1820 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1821 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1822 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1823 return false; 1824 1825 // find gcd 1826 APInt G, X, Y; 1827 APInt AM = ConstSrcCoeff->getAPInt(); 1828 APInt BM = ConstDstCoeff->getAPInt(); 1829 unsigned Bits = AM.getBitWidth(); 1830 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1831 // gcd doesn't divide Delta, no dependence 1832 ++ExactRDIVindependence; 1833 return true; 1834 } 1835 1836 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1837 1838 // since SCEV construction seems to normalize, LM = 0 1839 APInt SrcUM(Bits, 1, true); 1840 bool SrcUMvalid = false; 1841 // SrcUM is perhaps unavailable, let's check 1842 if (const SCEVConstant *UpperBound = 1843 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1844 SrcUM = UpperBound->getAPInt(); 1845 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1846 SrcUMvalid = true; 1847 } 1848 1849 APInt DstUM(Bits, 1, true); 1850 bool DstUMvalid = false; 1851 // UM is perhaps unavailable, let's check 1852 if (const SCEVConstant *UpperBound = 1853 collectConstantUpperBound(DstLoop, Delta->getType())) { 1854 DstUM = UpperBound->getAPInt(); 1855 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1856 DstUMvalid = true; 1857 } 1858 1859 APInt TU(APInt::getSignedMaxValue(Bits)); 1860 APInt TL(APInt::getSignedMinValue(Bits)); 1861 1862 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1863 APInt TMUL = BM.sdiv(G); 1864 if (TMUL.sgt(0)) { 1865 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1866 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1867 if (SrcUMvalid) { 1868 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1869 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1870 } 1871 } 1872 else { 1873 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1874 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1875 if (SrcUMvalid) { 1876 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1877 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1878 } 1879 } 1880 1881 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1882 TMUL = AM.sdiv(G); 1883 if (TMUL.sgt(0)) { 1884 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1885 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1886 if (DstUMvalid) { 1887 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1888 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1889 } 1890 } 1891 else { 1892 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1893 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1894 if (DstUMvalid) { 1895 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1896 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1897 } 1898 } 1899 if (TL.sgt(TU)) 1900 ++ExactRDIVindependence; 1901 return TL.sgt(TU); 1902 } 1903 1904 1905 // symbolicRDIVtest - 1906 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1907 // introduce a special case of Banerjee's Inequalities (also called the 1908 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1909 // particularly cases with symbolics. Since it's only able to disprove 1910 // dependence (not compute distances or directions), we'll use it as a 1911 // fall back for the other tests. 1912 // 1913 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1914 // where i and j are induction variables and c1 and c2 are loop invariants, 1915 // we can use the symbolic tests to disprove some dependences, serving as a 1916 // backup for the RDIV test. Note that i and j can be the same variable, 1917 // letting this test serve as a backup for the various SIV tests. 1918 // 1919 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1920 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1921 // loop bounds for the i and j loops, respectively. So, ... 1922 // 1923 // c1 + a1*i = c2 + a2*j 1924 // a1*i - a2*j = c2 - c1 1925 // 1926 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1927 // range of the maximum and minimum possible values of a1*i - a2*j. 1928 // Considering the signs of a1 and a2, we have 4 possible cases: 1929 // 1930 // 1) If a1 >= 0 and a2 >= 0, then 1931 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1932 // -a2*N2 <= c2 - c1 <= a1*N1 1933 // 1934 // 2) If a1 >= 0 and a2 <= 0, then 1935 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1936 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1937 // 1938 // 3) If a1 <= 0 and a2 >= 0, then 1939 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1940 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1941 // 1942 // 4) If a1 <= 0 and a2 <= 0, then 1943 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1944 // a1*N1 <= c2 - c1 <= -a2*N2 1945 // 1946 // return true if dependence disproved 1947 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 1948 const SCEV *C1, const SCEV *C2, 1949 const Loop *Loop1, 1950 const Loop *Loop2) const { 1951 ++SymbolicRDIVapplications; 1952 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1953 DEBUG(dbgs() << "\t A1 = " << *A1); 1954 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1955 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1956 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1957 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1958 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1959 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1960 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1961 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1962 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1963 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1964 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1965 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1966 if (SE->isKnownNonNegative(A1)) { 1967 if (SE->isKnownNonNegative(A2)) { 1968 // A1 >= 0 && A2 >= 0 1969 if (N1) { 1970 // make sure that c2 - c1 <= a1*N1 1971 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1972 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1973 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1974 ++SymbolicRDIVindependence; 1975 return true; 1976 } 1977 } 1978 if (N2) { 1979 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1980 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1981 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1982 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1983 ++SymbolicRDIVindependence; 1984 return true; 1985 } 1986 } 1987 } 1988 else if (SE->isKnownNonPositive(A2)) { 1989 // a1 >= 0 && a2 <= 0 1990 if (N1 && N2) { 1991 // make sure that c2 - c1 <= a1*N1 - a2*N2 1992 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1993 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1994 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 1995 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 1996 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 1997 ++SymbolicRDIVindependence; 1998 return true; 1999 } 2000 } 2001 // make sure that 0 <= c2 - c1 2002 if (SE->isKnownNegative(C2_C1)) { 2003 ++SymbolicRDIVindependence; 2004 return true; 2005 } 2006 } 2007 } 2008 else if (SE->isKnownNonPositive(A1)) { 2009 if (SE->isKnownNonNegative(A2)) { 2010 // a1 <= 0 && a2 >= 0 2011 if (N1 && N2) { 2012 // make sure that a1*N1 - a2*N2 <= c2 - c1 2013 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2014 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2015 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2016 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2017 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2018 ++SymbolicRDIVindependence; 2019 return true; 2020 } 2021 } 2022 // make sure that c2 - c1 <= 0 2023 if (SE->isKnownPositive(C2_C1)) { 2024 ++SymbolicRDIVindependence; 2025 return true; 2026 } 2027 } 2028 else if (SE->isKnownNonPositive(A2)) { 2029 // a1 <= 0 && a2 <= 0 2030 if (N1) { 2031 // make sure that a1*N1 <= c2 - c1 2032 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2033 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2034 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2035 ++SymbolicRDIVindependence; 2036 return true; 2037 } 2038 } 2039 if (N2) { 2040 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2041 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2042 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2043 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2044 ++SymbolicRDIVindependence; 2045 return true; 2046 } 2047 } 2048 } 2049 } 2050 return false; 2051 } 2052 2053 2054 // testSIV - 2055 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2056 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2057 // a2 are constant, we attack it with an SIV test. While they can all be 2058 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2059 // they apply; they're cheaper and sometimes more precise. 2060 // 2061 // Return true if dependence disproved. 2062 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2063 FullDependence &Result, Constraint &NewConstraint, 2064 const SCEV *&SplitIter) const { 2065 DEBUG(dbgs() << " src = " << *Src << "\n"); 2066 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2067 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2068 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2069 if (SrcAddRec && DstAddRec) { 2070 const SCEV *SrcConst = SrcAddRec->getStart(); 2071 const SCEV *DstConst = DstAddRec->getStart(); 2072 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2073 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2074 const Loop *CurLoop = SrcAddRec->getLoop(); 2075 assert(CurLoop == DstAddRec->getLoop() && 2076 "both loops in SIV should be same"); 2077 Level = mapSrcLoop(CurLoop); 2078 bool disproven; 2079 if (SrcCoeff == DstCoeff) 2080 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2081 Level, Result, NewConstraint); 2082 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2083 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2084 Level, Result, NewConstraint, SplitIter); 2085 else 2086 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2087 Level, Result, NewConstraint); 2088 return disproven || 2089 gcdMIVtest(Src, Dst, Result) || 2090 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2091 } 2092 if (SrcAddRec) { 2093 const SCEV *SrcConst = SrcAddRec->getStart(); 2094 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2095 const SCEV *DstConst = Dst; 2096 const Loop *CurLoop = SrcAddRec->getLoop(); 2097 Level = mapSrcLoop(CurLoop); 2098 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2099 Level, Result, NewConstraint) || 2100 gcdMIVtest(Src, Dst, Result); 2101 } 2102 if (DstAddRec) { 2103 const SCEV *DstConst = DstAddRec->getStart(); 2104 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2105 const SCEV *SrcConst = Src; 2106 const Loop *CurLoop = DstAddRec->getLoop(); 2107 Level = mapDstLoop(CurLoop); 2108 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2109 CurLoop, Level, Result, NewConstraint) || 2110 gcdMIVtest(Src, Dst, Result); 2111 } 2112 llvm_unreachable("SIV test expected at least one AddRec"); 2113 return false; 2114 } 2115 2116 2117 // testRDIV - 2118 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2119 // where i and j are induction variables, c1 and c2 are loop invariant, 2120 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2121 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2122 // It doesn't make sense to talk about distance or direction in this case, 2123 // so there's no point in making special versions of the Strong SIV test or 2124 // the Weak-crossing SIV test. 2125 // 2126 // With minor algebra, this test can also be used for things like 2127 // [c1 + a1*i + a2*j][c2]. 2128 // 2129 // Return true if dependence disproved. 2130 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2131 FullDependence &Result) const { 2132 // we have 3 possible situations here: 2133 // 1) [a*i + b] and [c*j + d] 2134 // 2) [a*i + c*j + b] and [d] 2135 // 3) [b] and [a*i + c*j + d] 2136 // We need to find what we've got and get organized 2137 2138 const SCEV *SrcConst, *DstConst; 2139 const SCEV *SrcCoeff, *DstCoeff; 2140 const Loop *SrcLoop, *DstLoop; 2141 2142 DEBUG(dbgs() << " src = " << *Src << "\n"); 2143 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2144 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2145 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2146 if (SrcAddRec && DstAddRec) { 2147 SrcConst = SrcAddRec->getStart(); 2148 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2149 SrcLoop = SrcAddRec->getLoop(); 2150 DstConst = DstAddRec->getStart(); 2151 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2152 DstLoop = DstAddRec->getLoop(); 2153 } 2154 else if (SrcAddRec) { 2155 if (const SCEVAddRecExpr *tmpAddRec = 2156 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2157 SrcConst = tmpAddRec->getStart(); 2158 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2159 SrcLoop = tmpAddRec->getLoop(); 2160 DstConst = Dst; 2161 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2162 DstLoop = SrcAddRec->getLoop(); 2163 } 2164 else 2165 llvm_unreachable("RDIV reached by surprising SCEVs"); 2166 } 2167 else if (DstAddRec) { 2168 if (const SCEVAddRecExpr *tmpAddRec = 2169 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2170 DstConst = tmpAddRec->getStart(); 2171 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2172 DstLoop = tmpAddRec->getLoop(); 2173 SrcConst = Src; 2174 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2175 SrcLoop = DstAddRec->getLoop(); 2176 } 2177 else 2178 llvm_unreachable("RDIV reached by surprising SCEVs"); 2179 } 2180 else 2181 llvm_unreachable("RDIV expected at least one AddRec"); 2182 return exactRDIVtest(SrcCoeff, DstCoeff, 2183 SrcConst, DstConst, 2184 SrcLoop, DstLoop, 2185 Result) || 2186 gcdMIVtest(Src, Dst, Result) || 2187 symbolicRDIVtest(SrcCoeff, DstCoeff, 2188 SrcConst, DstConst, 2189 SrcLoop, DstLoop); 2190 } 2191 2192 2193 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2194 // Return true if dependence disproved. 2195 // Can sometimes refine direction vectors. 2196 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2197 const SmallBitVector &Loops, 2198 FullDependence &Result) const { 2199 DEBUG(dbgs() << " src = " << *Src << "\n"); 2200 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2201 Result.Consistent = false; 2202 return gcdMIVtest(Src, Dst, Result) || 2203 banerjeeMIVtest(Src, Dst, Loops, Result); 2204 } 2205 2206 2207 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2208 // in this case 10. If there is no constant part, returns NULL. 2209 static 2210 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2211 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2212 return Constant; 2213 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2214 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2215 return Constant; 2216 return nullptr; 2217 } 2218 2219 2220 //===----------------------------------------------------------------------===// 2221 // gcdMIVtest - 2222 // Tests an MIV subscript pair for dependence. 2223 // Returns true if any possible dependence is disproved. 2224 // Marks the result as inconsistent. 2225 // Can sometimes disprove the equal direction for 1 or more loops, 2226 // as discussed in Michael Wolfe's book, 2227 // High Performance Compilers for Parallel Computing, page 235. 2228 // 2229 // We spend some effort (code!) to handle cases like 2230 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2231 // but M and N are just loop-invariant variables. 2232 // This should help us handle linearized subscripts; 2233 // also makes this test a useful backup to the various SIV tests. 2234 // 2235 // It occurs to me that the presence of loop-invariant variables 2236 // changes the nature of the test from "greatest common divisor" 2237 // to "a common divisor". 2238 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2239 FullDependence &Result) const { 2240 DEBUG(dbgs() << "starting gcd\n"); 2241 ++GCDapplications; 2242 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2243 APInt RunningGCD = APInt::getNullValue(BitWidth); 2244 2245 // Examine Src coefficients. 2246 // Compute running GCD and record source constant. 2247 // Because we're looking for the constant at the end of the chain, 2248 // we can't quit the loop just because the GCD == 1. 2249 const SCEV *Coefficients = Src; 2250 while (const SCEVAddRecExpr *AddRec = 2251 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2252 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2253 // If the coefficient is the product of a constant and other stuff, 2254 // we can use the constant in the GCD computation. 2255 const auto *Constant = getConstantPart(Coeff); 2256 if (!Constant) 2257 return false; 2258 APInt ConstCoeff = Constant->getAPInt(); 2259 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2260 Coefficients = AddRec->getStart(); 2261 } 2262 const SCEV *SrcConst = Coefficients; 2263 2264 // Examine Dst coefficients. 2265 // Compute running GCD and record destination constant. 2266 // Because we're looking for the constant at the end of the chain, 2267 // we can't quit the loop just because the GCD == 1. 2268 Coefficients = Dst; 2269 while (const SCEVAddRecExpr *AddRec = 2270 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2271 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2272 // If the coefficient is the product of a constant and other stuff, 2273 // we can use the constant in the GCD computation. 2274 const auto *Constant = getConstantPart(Coeff); 2275 if (!Constant) 2276 return false; 2277 APInt ConstCoeff = Constant->getAPInt(); 2278 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2279 Coefficients = AddRec->getStart(); 2280 } 2281 const SCEV *DstConst = Coefficients; 2282 2283 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2284 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2285 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2286 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2287 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2288 // If Delta is a sum of products, we may be able to make further progress. 2289 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2290 const SCEV *Operand = Sum->getOperand(Op); 2291 if (isa<SCEVConstant>(Operand)) { 2292 assert(!Constant && "Surprised to find multiple constants"); 2293 Constant = cast<SCEVConstant>(Operand); 2294 } 2295 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2296 // Search for constant operand to participate in GCD; 2297 // If none found; return false. 2298 const SCEVConstant *ConstOp = getConstantPart(Product); 2299 if (!ConstOp) 2300 return false; 2301 APInt ConstOpValue = ConstOp->getAPInt(); 2302 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2303 ConstOpValue.abs()); 2304 } 2305 else 2306 return false; 2307 } 2308 } 2309 if (!Constant) 2310 return false; 2311 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2312 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2313 if (ConstDelta == 0) 2314 return false; 2315 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2316 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2317 APInt Remainder = ConstDelta.srem(RunningGCD); 2318 if (Remainder != 0) { 2319 ++GCDindependence; 2320 return true; 2321 } 2322 2323 // Try to disprove equal directions. 2324 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2325 // the code above can't disprove the dependence because the GCD = 1. 2326 // So we consider what happen if i = i' and what happens if j = j'. 2327 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2328 // which is infeasible, so we can disallow the = direction for the i level. 2329 // Setting j = j' doesn't help matters, so we end up with a direction vector 2330 // of [<>, *] 2331 // 2332 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2333 // we need to remember that the constant part is 5 and the RunningGCD should 2334 // be initialized to ExtraGCD = 30. 2335 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2336 2337 bool Improved = false; 2338 Coefficients = Src; 2339 while (const SCEVAddRecExpr *AddRec = 2340 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2341 Coefficients = AddRec->getStart(); 2342 const Loop *CurLoop = AddRec->getLoop(); 2343 RunningGCD = ExtraGCD; 2344 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2345 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2346 const SCEV *Inner = Src; 2347 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2348 AddRec = cast<SCEVAddRecExpr>(Inner); 2349 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2350 if (CurLoop == AddRec->getLoop()) 2351 ; // SrcCoeff == Coeff 2352 else { 2353 // If the coefficient is the product of a constant and other stuff, 2354 // we can use the constant in the GCD computation. 2355 Constant = getConstantPart(Coeff); 2356 if (!Constant) 2357 return false; 2358 APInt ConstCoeff = Constant->getAPInt(); 2359 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2360 } 2361 Inner = AddRec->getStart(); 2362 } 2363 Inner = Dst; 2364 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2365 AddRec = cast<SCEVAddRecExpr>(Inner); 2366 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2367 if (CurLoop == AddRec->getLoop()) 2368 DstCoeff = Coeff; 2369 else { 2370 // If the coefficient is the product of a constant and other stuff, 2371 // we can use the constant in the GCD computation. 2372 Constant = getConstantPart(Coeff); 2373 if (!Constant) 2374 return false; 2375 APInt ConstCoeff = Constant->getAPInt(); 2376 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2377 } 2378 Inner = AddRec->getStart(); 2379 } 2380 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2381 // If the coefficient is the product of a constant and other stuff, 2382 // we can use the constant in the GCD computation. 2383 Constant = getConstantPart(Delta); 2384 if (!Constant) 2385 // The difference of the two coefficients might not be a product 2386 // or constant, in which case we give up on this direction. 2387 continue; 2388 APInt ConstCoeff = Constant->getAPInt(); 2389 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2390 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2391 if (RunningGCD != 0) { 2392 Remainder = ConstDelta.srem(RunningGCD); 2393 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2394 if (Remainder != 0) { 2395 unsigned Level = mapSrcLoop(CurLoop); 2396 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2397 Improved = true; 2398 } 2399 } 2400 } 2401 if (Improved) 2402 ++GCDsuccesses; 2403 DEBUG(dbgs() << "all done\n"); 2404 return false; 2405 } 2406 2407 2408 //===----------------------------------------------------------------------===// 2409 // banerjeeMIVtest - 2410 // Use Banerjee's Inequalities to test an MIV subscript pair. 2411 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2412 // Generally follows the discussion in Section 2.5.2 of 2413 // 2414 // Optimizing Supercompilers for Supercomputers 2415 // Michael Wolfe 2416 // 2417 // The inequalities given on page 25 are simplified in that loops are 2418 // normalized so that the lower bound is always 0 and the stride is always 1. 2419 // For example, Wolfe gives 2420 // 2421 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2422 // 2423 // where A_k is the coefficient of the kth index in the source subscript, 2424 // B_k is the coefficient of the kth index in the destination subscript, 2425 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2426 // index, and N_k is the stride of the kth index. Since all loops are normalized 2427 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2428 // equation to 2429 // 2430 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2431 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2432 // 2433 // Similar simplifications are possible for the other equations. 2434 // 2435 // When we can't determine the number of iterations for a loop, 2436 // we use NULL as an indicator for the worst case, infinity. 2437 // When computing the upper bound, NULL denotes +inf; 2438 // for the lower bound, NULL denotes -inf. 2439 // 2440 // Return true if dependence disproved. 2441 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2442 const SmallBitVector &Loops, 2443 FullDependence &Result) const { 2444 DEBUG(dbgs() << "starting Banerjee\n"); 2445 ++BanerjeeApplications; 2446 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2447 const SCEV *A0; 2448 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2449 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2450 const SCEV *B0; 2451 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2452 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2453 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2454 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2455 2456 // Compute bounds for all the * directions. 2457 DEBUG(dbgs() << "\tBounds[*]\n"); 2458 for (unsigned K = 1; K <= MaxLevels; ++K) { 2459 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2460 Bound[K].Direction = Dependence::DVEntry::ALL; 2461 Bound[K].DirSet = Dependence::DVEntry::NONE; 2462 findBoundsALL(A, B, Bound, K); 2463 #ifndef NDEBUG 2464 DEBUG(dbgs() << "\t " << K << '\t'); 2465 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2466 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2467 else 2468 DEBUG(dbgs() << "-inf\t"); 2469 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2470 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2471 else 2472 DEBUG(dbgs() << "+inf\n"); 2473 #endif 2474 } 2475 2476 // Test the *, *, *, ... case. 2477 bool Disproved = false; 2478 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2479 // Explore the direction vector hierarchy. 2480 unsigned DepthExpanded = 0; 2481 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2482 Loops, DepthExpanded, Delta); 2483 if (NewDeps > 0) { 2484 bool Improved = false; 2485 for (unsigned K = 1; K <= CommonLevels; ++K) { 2486 if (Loops[K]) { 2487 unsigned Old = Result.DV[K - 1].Direction; 2488 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2489 Improved |= Old != Result.DV[K - 1].Direction; 2490 if (!Result.DV[K - 1].Direction) { 2491 Improved = false; 2492 Disproved = true; 2493 break; 2494 } 2495 } 2496 } 2497 if (Improved) 2498 ++BanerjeeSuccesses; 2499 } 2500 else { 2501 ++BanerjeeIndependence; 2502 Disproved = true; 2503 } 2504 } 2505 else { 2506 ++BanerjeeIndependence; 2507 Disproved = true; 2508 } 2509 delete [] Bound; 2510 delete [] A; 2511 delete [] B; 2512 return Disproved; 2513 } 2514 2515 2516 // Hierarchically expands the direction vector 2517 // search space, combining the directions of discovered dependences 2518 // in the DirSet field of Bound. Returns the number of distinct 2519 // dependences discovered. If the dependence is disproved, 2520 // it will return 0. 2521 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2522 CoefficientInfo *B, BoundInfo *Bound, 2523 const SmallBitVector &Loops, 2524 unsigned &DepthExpanded, 2525 const SCEV *Delta) const { 2526 if (Level > CommonLevels) { 2527 // record result 2528 DEBUG(dbgs() << "\t["); 2529 for (unsigned K = 1; K <= CommonLevels; ++K) { 2530 if (Loops[K]) { 2531 Bound[K].DirSet |= Bound[K].Direction; 2532 #ifndef NDEBUG 2533 switch (Bound[K].Direction) { 2534 case Dependence::DVEntry::LT: 2535 DEBUG(dbgs() << " <"); 2536 break; 2537 case Dependence::DVEntry::EQ: 2538 DEBUG(dbgs() << " ="); 2539 break; 2540 case Dependence::DVEntry::GT: 2541 DEBUG(dbgs() << " >"); 2542 break; 2543 case Dependence::DVEntry::ALL: 2544 DEBUG(dbgs() << " *"); 2545 break; 2546 default: 2547 llvm_unreachable("unexpected Bound[K].Direction"); 2548 } 2549 #endif 2550 } 2551 } 2552 DEBUG(dbgs() << " ]\n"); 2553 return 1; 2554 } 2555 if (Loops[Level]) { 2556 if (Level > DepthExpanded) { 2557 DepthExpanded = Level; 2558 // compute bounds for <, =, > at current level 2559 findBoundsLT(A, B, Bound, Level); 2560 findBoundsGT(A, B, Bound, Level); 2561 findBoundsEQ(A, B, Bound, Level); 2562 #ifndef NDEBUG 2563 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2564 DEBUG(dbgs() << "\t <\t"); 2565 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2566 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2567 else 2568 DEBUG(dbgs() << "-inf\t"); 2569 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2570 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2571 else 2572 DEBUG(dbgs() << "+inf\n"); 2573 DEBUG(dbgs() << "\t =\t"); 2574 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2575 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2576 else 2577 DEBUG(dbgs() << "-inf\t"); 2578 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2579 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2580 else 2581 DEBUG(dbgs() << "+inf\n"); 2582 DEBUG(dbgs() << "\t >\t"); 2583 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2584 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2585 else 2586 DEBUG(dbgs() << "-inf\t"); 2587 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2588 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2589 else 2590 DEBUG(dbgs() << "+inf\n"); 2591 #endif 2592 } 2593 2594 unsigned NewDeps = 0; 2595 2596 // test bounds for <, *, *, ... 2597 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2598 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2599 Loops, DepthExpanded, Delta); 2600 2601 // Test bounds for =, *, *, ... 2602 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2603 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2604 Loops, DepthExpanded, Delta); 2605 2606 // test bounds for >, *, *, ... 2607 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2608 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2609 Loops, DepthExpanded, Delta); 2610 2611 Bound[Level].Direction = Dependence::DVEntry::ALL; 2612 return NewDeps; 2613 } 2614 else 2615 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2616 } 2617 2618 2619 // Returns true iff the current bounds are plausible. 2620 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2621 BoundInfo *Bound, const SCEV *Delta) const { 2622 Bound[Level].Direction = DirKind; 2623 if (const SCEV *LowerBound = getLowerBound(Bound)) 2624 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2625 return false; 2626 if (const SCEV *UpperBound = getUpperBound(Bound)) 2627 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2628 return false; 2629 return true; 2630 } 2631 2632 2633 // Computes the upper and lower bounds for level K 2634 // using the * direction. Records them in Bound. 2635 // Wolfe gives the equations 2636 // 2637 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2638 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2639 // 2640 // Since we normalize loops, we can simplify these equations to 2641 // 2642 // LB^*_k = (A^-_k - B^+_k)U_k 2643 // UB^*_k = (A^+_k - B^-_k)U_k 2644 // 2645 // We must be careful to handle the case where the upper bound is unknown. 2646 // Note that the lower bound is always <= 0 2647 // and the upper bound is always >= 0. 2648 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2649 BoundInfo *Bound, unsigned K) const { 2650 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2651 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2652 if (Bound[K].Iterations) { 2653 Bound[K].Lower[Dependence::DVEntry::ALL] = 2654 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2655 Bound[K].Iterations); 2656 Bound[K].Upper[Dependence::DVEntry::ALL] = 2657 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2658 Bound[K].Iterations); 2659 } 2660 else { 2661 // If the difference is 0, we won't need to know the number of iterations. 2662 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2663 Bound[K].Lower[Dependence::DVEntry::ALL] = 2664 SE->getZero(A[K].Coeff->getType()); 2665 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2666 Bound[K].Upper[Dependence::DVEntry::ALL] = 2667 SE->getZero(A[K].Coeff->getType()); 2668 } 2669 } 2670 2671 2672 // Computes the upper and lower bounds for level K 2673 // using the = direction. Records them in Bound. 2674 // Wolfe gives the equations 2675 // 2676 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2677 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2678 // 2679 // Since we normalize loops, we can simplify these equations to 2680 // 2681 // LB^=_k = (A_k - B_k)^- U_k 2682 // UB^=_k = (A_k - B_k)^+ U_k 2683 // 2684 // We must be careful to handle the case where the upper bound is unknown. 2685 // Note that the lower bound is always <= 0 2686 // and the upper bound is always >= 0. 2687 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2688 BoundInfo *Bound, unsigned K) const { 2689 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2690 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2691 if (Bound[K].Iterations) { 2692 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2693 const SCEV *NegativePart = getNegativePart(Delta); 2694 Bound[K].Lower[Dependence::DVEntry::EQ] = 2695 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2696 const SCEV *PositivePart = getPositivePart(Delta); 2697 Bound[K].Upper[Dependence::DVEntry::EQ] = 2698 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2699 } 2700 else { 2701 // If the positive/negative part of the difference is 0, 2702 // we won't need to know the number of iterations. 2703 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2704 const SCEV *NegativePart = getNegativePart(Delta); 2705 if (NegativePart->isZero()) 2706 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2707 const SCEV *PositivePart = getPositivePart(Delta); 2708 if (PositivePart->isZero()) 2709 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2710 } 2711 } 2712 2713 2714 // Computes the upper and lower bounds for level K 2715 // using the < direction. Records them in Bound. 2716 // Wolfe gives the equations 2717 // 2718 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2719 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2720 // 2721 // Since we normalize loops, we can simplify these equations to 2722 // 2723 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2724 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2725 // 2726 // We must be careful to handle the case where the upper bound is unknown. 2727 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2728 BoundInfo *Bound, unsigned K) const { 2729 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2730 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2731 if (Bound[K].Iterations) { 2732 const SCEV *Iter_1 = SE->getMinusSCEV( 2733 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2734 const SCEV *NegPart = 2735 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2736 Bound[K].Lower[Dependence::DVEntry::LT] = 2737 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2738 const SCEV *PosPart = 2739 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2740 Bound[K].Upper[Dependence::DVEntry::LT] = 2741 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2742 } 2743 else { 2744 // If the positive/negative part of the difference is 0, 2745 // we won't need to know the number of iterations. 2746 const SCEV *NegPart = 2747 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2748 if (NegPart->isZero()) 2749 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2750 const SCEV *PosPart = 2751 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2752 if (PosPart->isZero()) 2753 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2754 } 2755 } 2756 2757 2758 // Computes the upper and lower bounds for level K 2759 // using the > direction. Records them in Bound. 2760 // Wolfe gives the equations 2761 // 2762 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2763 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2764 // 2765 // Since we normalize loops, we can simplify these equations to 2766 // 2767 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2768 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2769 // 2770 // We must be careful to handle the case where the upper bound is unknown. 2771 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2772 BoundInfo *Bound, unsigned K) const { 2773 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2774 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2775 if (Bound[K].Iterations) { 2776 const SCEV *Iter_1 = SE->getMinusSCEV( 2777 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2778 const SCEV *NegPart = 2779 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2780 Bound[K].Lower[Dependence::DVEntry::GT] = 2781 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2782 const SCEV *PosPart = 2783 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2784 Bound[K].Upper[Dependence::DVEntry::GT] = 2785 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2786 } 2787 else { 2788 // If the positive/negative part of the difference is 0, 2789 // we won't need to know the number of iterations. 2790 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2791 if (NegPart->isZero()) 2792 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2793 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2794 if (PosPart->isZero()) 2795 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2796 } 2797 } 2798 2799 2800 // X^+ = max(X, 0) 2801 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2802 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2803 } 2804 2805 2806 // X^- = min(X, 0) 2807 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2808 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2809 } 2810 2811 2812 // Walks through the subscript, 2813 // collecting each coefficient, the associated loop bounds, 2814 // and recording its positive and negative parts for later use. 2815 DependenceInfo::CoefficientInfo * 2816 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2817 const SCEV *&Constant) const { 2818 const SCEV *Zero = SE->getZero(Subscript->getType()); 2819 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2820 for (unsigned K = 1; K <= MaxLevels; ++K) { 2821 CI[K].Coeff = Zero; 2822 CI[K].PosPart = Zero; 2823 CI[K].NegPart = Zero; 2824 CI[K].Iterations = nullptr; 2825 } 2826 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2827 const Loop *L = AddRec->getLoop(); 2828 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2829 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2830 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2831 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2832 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2833 Subscript = AddRec->getStart(); 2834 } 2835 Constant = Subscript; 2836 #ifndef NDEBUG 2837 DEBUG(dbgs() << "\tCoefficient Info\n"); 2838 for (unsigned K = 1; K <= MaxLevels; ++K) { 2839 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2840 DEBUG(dbgs() << "\tPos Part = "); 2841 DEBUG(dbgs() << *CI[K].PosPart); 2842 DEBUG(dbgs() << "\tNeg Part = "); 2843 DEBUG(dbgs() << *CI[K].NegPart); 2844 DEBUG(dbgs() << "\tUpper Bound = "); 2845 if (CI[K].Iterations) 2846 DEBUG(dbgs() << *CI[K].Iterations); 2847 else 2848 DEBUG(dbgs() << "+inf"); 2849 DEBUG(dbgs() << '\n'); 2850 } 2851 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2852 #endif 2853 return CI; 2854 } 2855 2856 2857 // Looks through all the bounds info and 2858 // computes the lower bound given the current direction settings 2859 // at each level. If the lower bound for any level is -inf, 2860 // the result is -inf. 2861 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2862 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2863 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2864 if (Bound[K].Lower[Bound[K].Direction]) 2865 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2866 else 2867 Sum = nullptr; 2868 } 2869 return Sum; 2870 } 2871 2872 2873 // Looks through all the bounds info and 2874 // computes the upper bound given the current direction settings 2875 // at each level. If the upper bound at any level is +inf, 2876 // the result is +inf. 2877 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2878 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2879 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2880 if (Bound[K].Upper[Bound[K].Direction]) 2881 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2882 else 2883 Sum = nullptr; 2884 } 2885 return Sum; 2886 } 2887 2888 2889 //===----------------------------------------------------------------------===// 2890 // Constraint manipulation for Delta test. 2891 2892 // Given a linear SCEV, 2893 // return the coefficient (the step) 2894 // corresponding to the specified loop. 2895 // If there isn't one, return 0. 2896 // For example, given a*i + b*j + c*k, finding the coefficient 2897 // corresponding to the j loop would yield b. 2898 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2899 const Loop *TargetLoop) const { 2900 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2901 if (!AddRec) 2902 return SE->getZero(Expr->getType()); 2903 if (AddRec->getLoop() == TargetLoop) 2904 return AddRec->getStepRecurrence(*SE); 2905 return findCoefficient(AddRec->getStart(), TargetLoop); 2906 } 2907 2908 2909 // Given a linear SCEV, 2910 // return the SCEV given by zeroing out the coefficient 2911 // corresponding to the specified loop. 2912 // For example, given a*i + b*j + c*k, zeroing the coefficient 2913 // corresponding to the j loop would yield a*i + c*k. 2914 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 2915 const Loop *TargetLoop) const { 2916 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2917 if (!AddRec) 2918 return Expr; // ignore 2919 if (AddRec->getLoop() == TargetLoop) 2920 return AddRec->getStart(); 2921 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2922 AddRec->getStepRecurrence(*SE), 2923 AddRec->getLoop(), 2924 AddRec->getNoWrapFlags()); 2925 } 2926 2927 2928 // Given a linear SCEV Expr, 2929 // return the SCEV given by adding some Value to the 2930 // coefficient corresponding to the specified TargetLoop. 2931 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2932 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2933 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 2934 const Loop *TargetLoop, 2935 const SCEV *Value) const { 2936 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2937 if (!AddRec) // create a new addRec 2938 return SE->getAddRecExpr(Expr, 2939 Value, 2940 TargetLoop, 2941 SCEV::FlagAnyWrap); // Worst case, with no info. 2942 if (AddRec->getLoop() == TargetLoop) { 2943 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2944 if (Sum->isZero()) 2945 return AddRec->getStart(); 2946 return SE->getAddRecExpr(AddRec->getStart(), 2947 Sum, 2948 AddRec->getLoop(), 2949 AddRec->getNoWrapFlags()); 2950 } 2951 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2952 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2953 return SE->getAddRecExpr( 2954 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2955 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2956 AddRec->getNoWrapFlags()); 2957 } 2958 2959 2960 // Review the constraints, looking for opportunities 2961 // to simplify a subscript pair (Src and Dst). 2962 // Return true if some simplification occurs. 2963 // If the simplification isn't exact (that is, if it is conservative 2964 // in terms of dependence), set consistent to false. 2965 // Corresponds to Figure 5 from the paper 2966 // 2967 // Practical Dependence Testing 2968 // Goff, Kennedy, Tseng 2969 // PLDI 1991 2970 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 2971 SmallBitVector &Loops, 2972 SmallVectorImpl<Constraint> &Constraints, 2973 bool &Consistent) { 2974 bool Result = false; 2975 for (unsigned LI : Loops.set_bits()) { 2976 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2977 DEBUG(Constraints[LI].dump(dbgs())); 2978 if (Constraints[LI].isDistance()) 2979 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2980 else if (Constraints[LI].isLine()) 2981 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2982 else if (Constraints[LI].isPoint()) 2983 Result |= propagatePoint(Src, Dst, Constraints[LI]); 2984 } 2985 return Result; 2986 } 2987 2988 2989 // Attempt to propagate a distance 2990 // constraint into a subscript pair (Src and Dst). 2991 // Return true if some simplification occurs. 2992 // If the simplification isn't exact (that is, if it is conservative 2993 // in terms of dependence), set consistent to false. 2994 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 2995 Constraint &CurConstraint, 2996 bool &Consistent) { 2997 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 2998 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 2999 const SCEV *A_K = findCoefficient(Src, CurLoop); 3000 if (A_K->isZero()) 3001 return false; 3002 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3003 Src = SE->getMinusSCEV(Src, DA_K); 3004 Src = zeroCoefficient(Src, CurLoop); 3005 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3006 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3007 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3008 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3009 if (!findCoefficient(Dst, CurLoop)->isZero()) 3010 Consistent = false; 3011 return true; 3012 } 3013 3014 3015 // Attempt to propagate a line 3016 // constraint into a subscript pair (Src and Dst). 3017 // Return true if some simplification occurs. 3018 // If the simplification isn't exact (that is, if it is conservative 3019 // in terms of dependence), set consistent to false. 3020 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3021 Constraint &CurConstraint, 3022 bool &Consistent) { 3023 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3024 const SCEV *A = CurConstraint.getA(); 3025 const SCEV *B = CurConstraint.getB(); 3026 const SCEV *C = CurConstraint.getC(); 3027 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3028 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3029 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3030 if (A->isZero()) { 3031 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3032 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3033 if (!Bconst || !Cconst) return false; 3034 APInt Beta = Bconst->getAPInt(); 3035 APInt Charlie = Cconst->getAPInt(); 3036 APInt CdivB = Charlie.sdiv(Beta); 3037 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3038 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3039 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3040 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3041 Dst = zeroCoefficient(Dst, CurLoop); 3042 if (!findCoefficient(Src, CurLoop)->isZero()) 3043 Consistent = false; 3044 } 3045 else if (B->isZero()) { 3046 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3047 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3048 if (!Aconst || !Cconst) return false; 3049 APInt Alpha = Aconst->getAPInt(); 3050 APInt Charlie = Cconst->getAPInt(); 3051 APInt CdivA = Charlie.sdiv(Alpha); 3052 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3053 const SCEV *A_K = findCoefficient(Src, CurLoop); 3054 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3055 Src = zeroCoefficient(Src, CurLoop); 3056 if (!findCoefficient(Dst, CurLoop)->isZero()) 3057 Consistent = false; 3058 } 3059 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3060 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3061 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3062 if (!Aconst || !Cconst) return false; 3063 APInt Alpha = Aconst->getAPInt(); 3064 APInt Charlie = Cconst->getAPInt(); 3065 APInt CdivA = Charlie.sdiv(Alpha); 3066 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3067 const SCEV *A_K = findCoefficient(Src, CurLoop); 3068 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3069 Src = zeroCoefficient(Src, CurLoop); 3070 Dst = addToCoefficient(Dst, CurLoop, A_K); 3071 if (!findCoefficient(Dst, CurLoop)->isZero()) 3072 Consistent = false; 3073 } 3074 else { 3075 // paper is incorrect here, or perhaps just misleading 3076 const SCEV *A_K = findCoefficient(Src, CurLoop); 3077 Src = SE->getMulExpr(Src, A); 3078 Dst = SE->getMulExpr(Dst, A); 3079 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3080 Src = zeroCoefficient(Src, CurLoop); 3081 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3082 if (!findCoefficient(Dst, CurLoop)->isZero()) 3083 Consistent = false; 3084 } 3085 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3086 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3087 return true; 3088 } 3089 3090 3091 // Attempt to propagate a point 3092 // constraint into a subscript pair (Src and Dst). 3093 // Return true if some simplification occurs. 3094 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3095 Constraint &CurConstraint) { 3096 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3097 const SCEV *A_K = findCoefficient(Src, CurLoop); 3098 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3099 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3100 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3101 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3102 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3103 Src = zeroCoefficient(Src, CurLoop); 3104 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3105 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3106 Dst = zeroCoefficient(Dst, CurLoop); 3107 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3108 return true; 3109 } 3110 3111 3112 // Update direction vector entry based on the current constraint. 3113 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3114 const Constraint &CurConstraint) const { 3115 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3116 DEBUG(CurConstraint.dump(dbgs())); 3117 if (CurConstraint.isAny()) 3118 ; // use defaults 3119 else if (CurConstraint.isDistance()) { 3120 // this one is consistent, the others aren't 3121 Level.Scalar = false; 3122 Level.Distance = CurConstraint.getD(); 3123 unsigned NewDirection = Dependence::DVEntry::NONE; 3124 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3125 NewDirection = Dependence::DVEntry::EQ; 3126 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3127 NewDirection |= Dependence::DVEntry::LT; 3128 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3129 NewDirection |= Dependence::DVEntry::GT; 3130 Level.Direction &= NewDirection; 3131 } 3132 else if (CurConstraint.isLine()) { 3133 Level.Scalar = false; 3134 Level.Distance = nullptr; 3135 // direction should be accurate 3136 } 3137 else if (CurConstraint.isPoint()) { 3138 Level.Scalar = false; 3139 Level.Distance = nullptr; 3140 unsigned NewDirection = Dependence::DVEntry::NONE; 3141 if (!isKnownPredicate(CmpInst::ICMP_NE, 3142 CurConstraint.getY(), 3143 CurConstraint.getX())) 3144 // if X may be = Y 3145 NewDirection |= Dependence::DVEntry::EQ; 3146 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3147 CurConstraint.getY(), 3148 CurConstraint.getX())) 3149 // if Y may be > X 3150 NewDirection |= Dependence::DVEntry::LT; 3151 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3152 CurConstraint.getY(), 3153 CurConstraint.getX())) 3154 // if Y may be < X 3155 NewDirection |= Dependence::DVEntry::GT; 3156 Level.Direction &= NewDirection; 3157 } 3158 else 3159 llvm_unreachable("constraint has unexpected kind"); 3160 } 3161 3162 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3163 /// source and destination array references are recurrences on a nested loop, 3164 /// this function flattens the nested recurrences into separate recurrences 3165 /// for each loop level. 3166 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3167 SmallVectorImpl<Subscript> &Pair) { 3168 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3169 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3170 Value *SrcPtr = getLoadStorePointerOperand(Src); 3171 Value *DstPtr = getLoadStorePointerOperand(Dst); 3172 3173 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3174 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3175 3176 // Below code mimics the code in Delinearization.cpp 3177 const SCEV *SrcAccessFn = 3178 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3179 const SCEV *DstAccessFn = 3180 SE->getSCEVAtScope(DstPtr, DstLoop); 3181 3182 const SCEVUnknown *SrcBase = 3183 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3184 const SCEVUnknown *DstBase = 3185 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3186 3187 if (!SrcBase || !DstBase || SrcBase != DstBase) 3188 return false; 3189 3190 const SCEV *ElementSize = SE->getElementSize(Src); 3191 if (ElementSize != SE->getElementSize(Dst)) 3192 return false; 3193 3194 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3195 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3196 3197 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3198 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3199 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3200 return false; 3201 3202 // First step: collect parametric terms in both array references. 3203 SmallVector<const SCEV *, 4> Terms; 3204 SE->collectParametricTerms(SrcAR, Terms); 3205 SE->collectParametricTerms(DstAR, Terms); 3206 3207 // Second step: find subscript sizes. 3208 SmallVector<const SCEV *, 4> Sizes; 3209 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3210 3211 // Third step: compute the access functions for each subscript. 3212 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3213 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3214 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3215 3216 // Fail when there is only a subscript: that's a linearized access function. 3217 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3218 SrcSubscripts.size() != DstSubscripts.size()) 3219 return false; 3220 3221 int size = SrcSubscripts.size(); 3222 3223 DEBUG({ 3224 dbgs() << "\nSrcSubscripts: "; 3225 for (int i = 0; i < size; i++) 3226 dbgs() << *SrcSubscripts[i]; 3227 dbgs() << "\nDstSubscripts: "; 3228 for (int i = 0; i < size; i++) 3229 dbgs() << *DstSubscripts[i]; 3230 }); 3231 3232 // The delinearization transforms a single-subscript MIV dependence test into 3233 // a multi-subscript SIV dependence test that is easier to compute. So we 3234 // resize Pair to contain as many pairs of subscripts as the delinearization 3235 // has found, and then initialize the pairs following the delinearization. 3236 Pair.resize(size); 3237 for (int i = 0; i < size; ++i) { 3238 Pair[i].Src = SrcSubscripts[i]; 3239 Pair[i].Dst = DstSubscripts[i]; 3240 unifySubscriptType(&Pair[i]); 3241 3242 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3243 // delinearization has found, and add these constraints to the dependence 3244 // check to avoid memory accesses overflow from one dimension into another. 3245 // This is related to the problem of determining the existence of data 3246 // dependences in array accesses using a different number of subscripts: in 3247 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3248 } 3249 3250 return true; 3251 } 3252 3253 //===----------------------------------------------------------------------===// 3254 3255 #ifndef NDEBUG 3256 // For debugging purposes, dump a small bit vector to dbgs(). 3257 static void dumpSmallBitVector(SmallBitVector &BV) { 3258 dbgs() << "{"; 3259 for (unsigned VI : BV.set_bits()) { 3260 dbgs() << VI; 3261 if (BV.find_next(VI) >= 0) 3262 dbgs() << ' '; 3263 } 3264 dbgs() << "}\n"; 3265 } 3266 #endif 3267 3268 // depends - 3269 // Returns NULL if there is no dependence. 3270 // Otherwise, return a Dependence with as many details as possible. 3271 // Corresponds to Section 3.1 in the paper 3272 // 3273 // Practical Dependence Testing 3274 // Goff, Kennedy, Tseng 3275 // PLDI 1991 3276 // 3277 // Care is required to keep the routine below, getSplitIteration(), 3278 // up to date with respect to this routine. 3279 std::unique_ptr<Dependence> 3280 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3281 bool PossiblyLoopIndependent) { 3282 if (Src == Dst) 3283 PossiblyLoopIndependent = false; 3284 3285 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3286 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3287 // if both instructions don't reference memory, there's no dependence 3288 return nullptr; 3289 3290 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3291 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3292 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3293 return make_unique<Dependence>(Src, Dst); 3294 } 3295 3296 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3297 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3298 Value *SrcPtr = getLoadStorePointerOperand(Src); 3299 Value *DstPtr = getLoadStorePointerOperand(Dst); 3300 3301 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3302 SrcPtr)) { 3303 case MayAlias: 3304 case PartialAlias: 3305 // cannot analyse objects if we don't understand their aliasing. 3306 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3307 return make_unique<Dependence>(Src, Dst); 3308 case NoAlias: 3309 // If the objects noalias, they are distinct, accesses are independent. 3310 DEBUG(dbgs() << "no alias\n"); 3311 return nullptr; 3312 case MustAlias: 3313 break; // The underlying objects alias; test accesses for dependence. 3314 } 3315 3316 // establish loop nesting levels 3317 establishNestingLevels(Src, Dst); 3318 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3319 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3320 3321 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3322 ++TotalArrayPairs; 3323 3324 unsigned Pairs = 1; 3325 SmallVector<Subscript, 2> Pair(Pairs); 3326 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3327 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3328 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3329 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3330 Pair[0].Src = SrcSCEV; 3331 Pair[0].Dst = DstSCEV; 3332 3333 if (Delinearize) { 3334 if (tryDelinearize(Src, Dst, Pair)) { 3335 DEBUG(dbgs() << " delinearized\n"); 3336 Pairs = Pair.size(); 3337 } 3338 } 3339 3340 for (unsigned P = 0; P < Pairs; ++P) { 3341 Pair[P].Loops.resize(MaxLevels + 1); 3342 Pair[P].GroupLoops.resize(MaxLevels + 1); 3343 Pair[P].Group.resize(Pairs); 3344 removeMatchingExtensions(&Pair[P]); 3345 Pair[P].Classification = 3346 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3347 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3348 Pair[P].Loops); 3349 Pair[P].GroupLoops = Pair[P].Loops; 3350 Pair[P].Group.set(P); 3351 DEBUG(dbgs() << " subscript " << P << "\n"); 3352 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3353 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3354 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3355 DEBUG(dbgs() << "\tloops = "); 3356 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3357 } 3358 3359 SmallBitVector Separable(Pairs); 3360 SmallBitVector Coupled(Pairs); 3361 3362 // Partition subscripts into separable and minimally-coupled groups 3363 // Algorithm in paper is algorithmically better; 3364 // this may be faster in practice. Check someday. 3365 // 3366 // Here's an example of how it works. Consider this code: 3367 // 3368 // for (i = ...) { 3369 // for (j = ...) { 3370 // for (k = ...) { 3371 // for (l = ...) { 3372 // for (m = ...) { 3373 // A[i][j][k][m] = ...; 3374 // ... = A[0][j][l][i + j]; 3375 // } 3376 // } 3377 // } 3378 // } 3379 // } 3380 // 3381 // There are 4 subscripts here: 3382 // 0 [i] and [0] 3383 // 1 [j] and [j] 3384 // 2 [k] and [l] 3385 // 3 [m] and [i + j] 3386 // 3387 // We've already classified each subscript pair as ZIV, SIV, etc., 3388 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3389 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3390 // and set Pair[P].Group = {P}. 3391 // 3392 // Src Dst Classification Loops GroupLoops Group 3393 // 0 [i] [0] SIV {1} {1} {0} 3394 // 1 [j] [j] SIV {2} {2} {1} 3395 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3396 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3397 // 3398 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3399 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3400 // 3401 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3402 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3403 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3404 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3405 // to either Separable or Coupled). 3406 // 3407 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3408 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3409 // so Pair[3].Group = {0, 1, 3} and Done = false. 3410 // 3411 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3412 // Since Done remains true, we add 2 to the set of Separable pairs. 3413 // 3414 // Finally, we consider 3. There's nothing to compare it with, 3415 // so Done remains true and we add it to the Coupled set. 3416 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3417 // 3418 // In the end, we've got 1 separable subscript and 1 coupled group. 3419 for (unsigned SI = 0; SI < Pairs; ++SI) { 3420 if (Pair[SI].Classification == Subscript::NonLinear) { 3421 // ignore these, but collect loops for later 3422 ++NonlinearSubscriptPairs; 3423 collectCommonLoops(Pair[SI].Src, 3424 LI->getLoopFor(Src->getParent()), 3425 Pair[SI].Loops); 3426 collectCommonLoops(Pair[SI].Dst, 3427 LI->getLoopFor(Dst->getParent()), 3428 Pair[SI].Loops); 3429 Result.Consistent = false; 3430 } else if (Pair[SI].Classification == Subscript::ZIV) { 3431 // always separable 3432 Separable.set(SI); 3433 } 3434 else { 3435 // SIV, RDIV, or MIV, so check for coupled group 3436 bool Done = true; 3437 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3438 SmallBitVector Intersection = Pair[SI].GroupLoops; 3439 Intersection &= Pair[SJ].GroupLoops; 3440 if (Intersection.any()) { 3441 // accumulate set of all the loops in group 3442 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3443 // accumulate set of all subscripts in group 3444 Pair[SJ].Group |= Pair[SI].Group; 3445 Done = false; 3446 } 3447 } 3448 if (Done) { 3449 if (Pair[SI].Group.count() == 1) { 3450 Separable.set(SI); 3451 ++SeparableSubscriptPairs; 3452 } 3453 else { 3454 Coupled.set(SI); 3455 ++CoupledSubscriptPairs; 3456 } 3457 } 3458 } 3459 } 3460 3461 DEBUG(dbgs() << " Separable = "); 3462 DEBUG(dumpSmallBitVector(Separable)); 3463 DEBUG(dbgs() << " Coupled = "); 3464 DEBUG(dumpSmallBitVector(Coupled)); 3465 3466 Constraint NewConstraint; 3467 NewConstraint.setAny(SE); 3468 3469 // test separable subscripts 3470 for (unsigned SI : Separable.set_bits()) { 3471 DEBUG(dbgs() << "testing subscript " << SI); 3472 switch (Pair[SI].Classification) { 3473 case Subscript::ZIV: 3474 DEBUG(dbgs() << ", ZIV\n"); 3475 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3476 return nullptr; 3477 break; 3478 case Subscript::SIV: { 3479 DEBUG(dbgs() << ", SIV\n"); 3480 unsigned Level; 3481 const SCEV *SplitIter = nullptr; 3482 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3483 SplitIter)) 3484 return nullptr; 3485 break; 3486 } 3487 case Subscript::RDIV: 3488 DEBUG(dbgs() << ", RDIV\n"); 3489 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3490 return nullptr; 3491 break; 3492 case Subscript::MIV: 3493 DEBUG(dbgs() << ", MIV\n"); 3494 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3495 return nullptr; 3496 break; 3497 default: 3498 llvm_unreachable("subscript has unexpected classification"); 3499 } 3500 } 3501 3502 if (Coupled.count()) { 3503 // test coupled subscript groups 3504 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3505 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3506 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3507 for (unsigned II = 0; II <= MaxLevels; ++II) 3508 Constraints[II].setAny(SE); 3509 for (unsigned SI : Coupled.set_bits()) { 3510 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3511 SmallBitVector Group(Pair[SI].Group); 3512 SmallBitVector Sivs(Pairs); 3513 SmallBitVector Mivs(Pairs); 3514 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3515 SmallVector<Subscript *, 4> PairsInGroup; 3516 for (unsigned SJ : Group.set_bits()) { 3517 DEBUG(dbgs() << SJ << " "); 3518 if (Pair[SJ].Classification == Subscript::SIV) 3519 Sivs.set(SJ); 3520 else 3521 Mivs.set(SJ); 3522 PairsInGroup.push_back(&Pair[SJ]); 3523 } 3524 unifySubscriptType(PairsInGroup); 3525 DEBUG(dbgs() << "}\n"); 3526 while (Sivs.any()) { 3527 bool Changed = false; 3528 for (unsigned SJ : Sivs.set_bits()) { 3529 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3530 // SJ is an SIV subscript that's part of the current coupled group 3531 unsigned Level; 3532 const SCEV *SplitIter = nullptr; 3533 DEBUG(dbgs() << "SIV\n"); 3534 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3535 SplitIter)) 3536 return nullptr; 3537 ConstrainedLevels.set(Level); 3538 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3539 if (Constraints[Level].isEmpty()) { 3540 ++DeltaIndependence; 3541 return nullptr; 3542 } 3543 Changed = true; 3544 } 3545 Sivs.reset(SJ); 3546 } 3547 if (Changed) { 3548 // propagate, possibly creating new SIVs and ZIVs 3549 DEBUG(dbgs() << " propagating\n"); 3550 DEBUG(dbgs() << "\tMivs = "); 3551 DEBUG(dumpSmallBitVector(Mivs)); 3552 for (unsigned SJ : Mivs.set_bits()) { 3553 // SJ is an MIV subscript that's part of the current coupled group 3554 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3555 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3556 Constraints, Result.Consistent)) { 3557 DEBUG(dbgs() << "\t Changed\n"); 3558 ++DeltaPropagations; 3559 Pair[SJ].Classification = 3560 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3561 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3562 Pair[SJ].Loops); 3563 switch (Pair[SJ].Classification) { 3564 case Subscript::ZIV: 3565 DEBUG(dbgs() << "ZIV\n"); 3566 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3567 return nullptr; 3568 Mivs.reset(SJ); 3569 break; 3570 case Subscript::SIV: 3571 Sivs.set(SJ); 3572 Mivs.reset(SJ); 3573 break; 3574 case Subscript::RDIV: 3575 case Subscript::MIV: 3576 break; 3577 default: 3578 llvm_unreachable("bad subscript classification"); 3579 } 3580 } 3581 } 3582 } 3583 } 3584 3585 // test & propagate remaining RDIVs 3586 for (unsigned SJ : Mivs.set_bits()) { 3587 if (Pair[SJ].Classification == Subscript::RDIV) { 3588 DEBUG(dbgs() << "RDIV test\n"); 3589 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3590 return nullptr; 3591 // I don't yet understand how to propagate RDIV results 3592 Mivs.reset(SJ); 3593 } 3594 } 3595 3596 // test remaining MIVs 3597 // This code is temporary. 3598 // Better to somehow test all remaining subscripts simultaneously. 3599 for (unsigned SJ : Mivs.set_bits()) { 3600 if (Pair[SJ].Classification == Subscript::MIV) { 3601 DEBUG(dbgs() << "MIV test\n"); 3602 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3603 return nullptr; 3604 } 3605 else 3606 llvm_unreachable("expected only MIV subscripts at this point"); 3607 } 3608 3609 // update Result.DV from constraint vector 3610 DEBUG(dbgs() << " updating\n"); 3611 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3612 if (SJ > CommonLevels) 3613 break; 3614 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3615 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3616 return nullptr; 3617 } 3618 } 3619 } 3620 3621 // Make sure the Scalar flags are set correctly. 3622 SmallBitVector CompleteLoops(MaxLevels + 1); 3623 for (unsigned SI = 0; SI < Pairs; ++SI) 3624 CompleteLoops |= Pair[SI].Loops; 3625 for (unsigned II = 1; II <= CommonLevels; ++II) 3626 if (CompleteLoops[II]) 3627 Result.DV[II - 1].Scalar = false; 3628 3629 if (PossiblyLoopIndependent) { 3630 // Make sure the LoopIndependent flag is set correctly. 3631 // All directions must include equal, otherwise no 3632 // loop-independent dependence is possible. 3633 for (unsigned II = 1; II <= CommonLevels; ++II) { 3634 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3635 Result.LoopIndependent = false; 3636 break; 3637 } 3638 } 3639 } 3640 else { 3641 // On the other hand, if all directions are equal and there's no 3642 // loop-independent dependence possible, then no dependence exists. 3643 bool AllEqual = true; 3644 for (unsigned II = 1; II <= CommonLevels; ++II) { 3645 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3646 AllEqual = false; 3647 break; 3648 } 3649 } 3650 if (AllEqual) 3651 return nullptr; 3652 } 3653 3654 return make_unique<FullDependence>(std::move(Result)); 3655 } 3656 3657 3658 3659 //===----------------------------------------------------------------------===// 3660 // getSplitIteration - 3661 // Rather than spend rarely-used space recording the splitting iteration 3662 // during the Weak-Crossing SIV test, we re-compute it on demand. 3663 // The re-computation is basically a repeat of the entire dependence test, 3664 // though simplified since we know that the dependence exists. 3665 // It's tedious, since we must go through all propagations, etc. 3666 // 3667 // Care is required to keep this code up to date with respect to the routine 3668 // above, depends(). 3669 // 3670 // Generally, the dependence analyzer will be used to build 3671 // a dependence graph for a function (basically a map from instructions 3672 // to dependences). Looking for cycles in the graph shows us loops 3673 // that cannot be trivially vectorized/parallelized. 3674 // 3675 // We can try to improve the situation by examining all the dependences 3676 // that make up the cycle, looking for ones we can break. 3677 // Sometimes, peeling the first or last iteration of a loop will break 3678 // dependences, and we've got flags for those possibilities. 3679 // Sometimes, splitting a loop at some other iteration will do the trick, 3680 // and we've got a flag for that case. Rather than waste the space to 3681 // record the exact iteration (since we rarely know), we provide 3682 // a method that calculates the iteration. It's a drag that it must work 3683 // from scratch, but wonderful in that it's possible. 3684 // 3685 // Here's an example: 3686 // 3687 // for (i = 0; i < 10; i++) 3688 // A[i] = ... 3689 // ... = A[11 - i] 3690 // 3691 // There's a loop-carried flow dependence from the store to the load, 3692 // found by the weak-crossing SIV test. The dependence will have a flag, 3693 // indicating that the dependence can be broken by splitting the loop. 3694 // Calling getSplitIteration will return 5. 3695 // Splitting the loop breaks the dependence, like so: 3696 // 3697 // for (i = 0; i <= 5; i++) 3698 // A[i] = ... 3699 // ... = A[11 - i] 3700 // for (i = 6; i < 10; i++) 3701 // A[i] = ... 3702 // ... = A[11 - i] 3703 // 3704 // breaks the dependence and allows us to vectorize/parallelize 3705 // both loops. 3706 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3707 unsigned SplitLevel) { 3708 assert(Dep.isSplitable(SplitLevel) && 3709 "Dep should be splitable at SplitLevel"); 3710 Instruction *Src = Dep.getSrc(); 3711 Instruction *Dst = Dep.getDst(); 3712 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3713 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3714 assert(isLoadOrStore(Src)); 3715 assert(isLoadOrStore(Dst)); 3716 Value *SrcPtr = getLoadStorePointerOperand(Src); 3717 Value *DstPtr = getLoadStorePointerOperand(Dst); 3718 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3719 SrcPtr) == MustAlias); 3720 3721 // establish loop nesting levels 3722 establishNestingLevels(Src, Dst); 3723 3724 FullDependence Result(Src, Dst, false, CommonLevels); 3725 3726 unsigned Pairs = 1; 3727 SmallVector<Subscript, 2> Pair(Pairs); 3728 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3729 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3730 Pair[0].Src = SrcSCEV; 3731 Pair[0].Dst = DstSCEV; 3732 3733 if (Delinearize) { 3734 if (tryDelinearize(Src, Dst, Pair)) { 3735 DEBUG(dbgs() << " delinearized\n"); 3736 Pairs = Pair.size(); 3737 } 3738 } 3739 3740 for (unsigned P = 0; P < Pairs; ++P) { 3741 Pair[P].Loops.resize(MaxLevels + 1); 3742 Pair[P].GroupLoops.resize(MaxLevels + 1); 3743 Pair[P].Group.resize(Pairs); 3744 removeMatchingExtensions(&Pair[P]); 3745 Pair[P].Classification = 3746 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3747 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3748 Pair[P].Loops); 3749 Pair[P].GroupLoops = Pair[P].Loops; 3750 Pair[P].Group.set(P); 3751 } 3752 3753 SmallBitVector Separable(Pairs); 3754 SmallBitVector Coupled(Pairs); 3755 3756 // partition subscripts into separable and minimally-coupled groups 3757 for (unsigned SI = 0; SI < Pairs; ++SI) { 3758 if (Pair[SI].Classification == Subscript::NonLinear) { 3759 // ignore these, but collect loops for later 3760 collectCommonLoops(Pair[SI].Src, 3761 LI->getLoopFor(Src->getParent()), 3762 Pair[SI].Loops); 3763 collectCommonLoops(Pair[SI].Dst, 3764 LI->getLoopFor(Dst->getParent()), 3765 Pair[SI].Loops); 3766 Result.Consistent = false; 3767 } 3768 else if (Pair[SI].Classification == Subscript::ZIV) 3769 Separable.set(SI); 3770 else { 3771 // SIV, RDIV, or MIV, so check for coupled group 3772 bool Done = true; 3773 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3774 SmallBitVector Intersection = Pair[SI].GroupLoops; 3775 Intersection &= Pair[SJ].GroupLoops; 3776 if (Intersection.any()) { 3777 // accumulate set of all the loops in group 3778 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3779 // accumulate set of all subscripts in group 3780 Pair[SJ].Group |= Pair[SI].Group; 3781 Done = false; 3782 } 3783 } 3784 if (Done) { 3785 if (Pair[SI].Group.count() == 1) 3786 Separable.set(SI); 3787 else 3788 Coupled.set(SI); 3789 } 3790 } 3791 } 3792 3793 Constraint NewConstraint; 3794 NewConstraint.setAny(SE); 3795 3796 // test separable subscripts 3797 for (unsigned SI : Separable.set_bits()) { 3798 switch (Pair[SI].Classification) { 3799 case Subscript::SIV: { 3800 unsigned Level; 3801 const SCEV *SplitIter = nullptr; 3802 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3803 Result, NewConstraint, SplitIter); 3804 if (Level == SplitLevel) { 3805 assert(SplitIter != nullptr); 3806 return SplitIter; 3807 } 3808 break; 3809 } 3810 case Subscript::ZIV: 3811 case Subscript::RDIV: 3812 case Subscript::MIV: 3813 break; 3814 default: 3815 llvm_unreachable("subscript has unexpected classification"); 3816 } 3817 } 3818 3819 if (Coupled.count()) { 3820 // test coupled subscript groups 3821 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3822 for (unsigned II = 0; II <= MaxLevels; ++II) 3823 Constraints[II].setAny(SE); 3824 for (unsigned SI : Coupled.set_bits()) { 3825 SmallBitVector Group(Pair[SI].Group); 3826 SmallBitVector Sivs(Pairs); 3827 SmallBitVector Mivs(Pairs); 3828 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3829 for (unsigned SJ : Group.set_bits()) { 3830 if (Pair[SJ].Classification == Subscript::SIV) 3831 Sivs.set(SJ); 3832 else 3833 Mivs.set(SJ); 3834 } 3835 while (Sivs.any()) { 3836 bool Changed = false; 3837 for (unsigned SJ : Sivs.set_bits()) { 3838 // SJ is an SIV subscript that's part of the current coupled group 3839 unsigned Level; 3840 const SCEV *SplitIter = nullptr; 3841 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3842 Result, NewConstraint, SplitIter); 3843 if (Level == SplitLevel && SplitIter) 3844 return SplitIter; 3845 ConstrainedLevels.set(Level); 3846 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3847 Changed = true; 3848 Sivs.reset(SJ); 3849 } 3850 if (Changed) { 3851 // propagate, possibly creating new SIVs and ZIVs 3852 for (unsigned SJ : Mivs.set_bits()) { 3853 // SJ is an MIV subscript that's part of the current coupled group 3854 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3855 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3856 Pair[SJ].Classification = 3857 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3858 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3859 Pair[SJ].Loops); 3860 switch (Pair[SJ].Classification) { 3861 case Subscript::ZIV: 3862 Mivs.reset(SJ); 3863 break; 3864 case Subscript::SIV: 3865 Sivs.set(SJ); 3866 Mivs.reset(SJ); 3867 break; 3868 case Subscript::RDIV: 3869 case Subscript::MIV: 3870 break; 3871 default: 3872 llvm_unreachable("bad subscript classification"); 3873 } 3874 } 3875 } 3876 } 3877 } 3878 } 3879 } 3880 llvm_unreachable("somehow reached end of routine"); 3881 return nullptr; 3882 } 3883