1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/Config/llvm-config.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Debug.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/raw_ostream.h" 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "da" 72 73 //===----------------------------------------------------------------------===// 74 // statistics 75 76 STATISTIC(TotalArrayPairs, "Array pairs tested"); 77 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 78 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 79 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 80 STATISTIC(ZIVapplications, "ZIV applications"); 81 STATISTIC(ZIVindependence, "ZIV independence"); 82 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 83 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 84 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 85 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 86 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 87 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 88 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 89 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 90 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 91 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 92 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 93 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 94 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 95 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 96 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 97 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 98 STATISTIC(DeltaApplications, "Delta applications"); 99 STATISTIC(DeltaSuccesses, "Delta successes"); 100 STATISTIC(DeltaIndependence, "Delta independence"); 101 STATISTIC(DeltaPropagations, "Delta propagations"); 102 STATISTIC(GCDapplications, "GCD applications"); 103 STATISTIC(GCDsuccesses, "GCD successes"); 104 STATISTIC(GCDindependence, "GCD independence"); 105 STATISTIC(BanerjeeApplications, "Banerjee applications"); 106 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 107 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 108 109 static cl::opt<bool> 110 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 111 cl::desc("Try to delinearize array references.")); 112 113 //===----------------------------------------------------------------------===// 114 // basics 115 116 DependenceAnalysis::Result 117 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 118 auto &AA = FAM.getResult<AAManager>(F); 119 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 120 auto &LI = FAM.getResult<LoopAnalysis>(F); 121 return DependenceInfo(&F, &AA, &SE, &LI); 122 } 123 124 AnalysisKey DependenceAnalysis::Key; 125 126 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 127 "Dependence Analysis", true, true) 128 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 129 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 131 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 132 true, true) 133 134 char DependenceAnalysisWrapperPass::ID = 0; 135 136 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 137 return new DependenceAnalysisWrapperPass(); 138 } 139 140 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 141 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 142 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 143 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 144 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 145 return false; 146 } 147 148 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 149 150 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 151 152 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 153 AU.setPreservesAll(); 154 AU.addRequiredTransitive<AAResultsWrapperPass>(); 155 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 156 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 157 } 158 159 160 // Used to test the dependence analyzer. 161 // Looks through the function, noting loads and stores. 162 // Calls depends() on every possible pair and prints out the result. 163 // Ignores all other instructions. 164 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 165 auto *F = DA->getFunction(); 166 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 167 ++SrcI) { 168 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 169 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 170 DstI != DstE; ++DstI) { 171 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 172 OS << "da analyze - "; 173 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 174 D->dump(OS); 175 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 176 if (D->isSplitable(Level)) { 177 OS << "da analyze - split level = " << Level; 178 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 179 OS << "!\n"; 180 } 181 } 182 } 183 else 184 OS << "none!\n"; 185 } 186 } 187 } 188 } 189 } 190 191 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 192 const Module *) const { 193 dumpExampleDependence(OS, info.get()); 194 } 195 196 PreservedAnalyses 197 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 198 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 199 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 200 return PreservedAnalyses::all(); 201 } 202 203 //===----------------------------------------------------------------------===// 204 // Dependence methods 205 206 // Returns true if this is an input dependence. 207 bool Dependence::isInput() const { 208 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 209 } 210 211 212 // Returns true if this is an output dependence. 213 bool Dependence::isOutput() const { 214 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 215 } 216 217 218 // Returns true if this is an flow (aka true) dependence. 219 bool Dependence::isFlow() const { 220 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 221 } 222 223 224 // Returns true if this is an anti dependence. 225 bool Dependence::isAnti() const { 226 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 227 } 228 229 230 // Returns true if a particular level is scalar; that is, 231 // if no subscript in the source or destination mention the induction 232 // variable associated with the loop at this level. 233 // Leave this out of line, so it will serve as a virtual method anchor 234 bool Dependence::isScalar(unsigned level) const { 235 return false; 236 } 237 238 239 //===----------------------------------------------------------------------===// 240 // FullDependence methods 241 242 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 243 bool PossiblyLoopIndependent, 244 unsigned CommonLevels) 245 : Dependence(Source, Destination), Levels(CommonLevels), 246 LoopIndependent(PossiblyLoopIndependent) { 247 Consistent = true; 248 if (CommonLevels) 249 DV = make_unique<DVEntry[]>(CommonLevels); 250 } 251 252 // The rest are simple getters that hide the implementation. 253 254 // getDirection - Returns the direction associated with a particular level. 255 unsigned FullDependence::getDirection(unsigned Level) const { 256 assert(0 < Level && Level <= Levels && "Level out of range"); 257 return DV[Level - 1].Direction; 258 } 259 260 261 // Returns the distance (or NULL) associated with a particular level. 262 const SCEV *FullDependence::getDistance(unsigned Level) const { 263 assert(0 < Level && Level <= Levels && "Level out of range"); 264 return DV[Level - 1].Distance; 265 } 266 267 268 // Returns true if a particular level is scalar; that is, 269 // if no subscript in the source or destination mention the induction 270 // variable associated with the loop at this level. 271 bool FullDependence::isScalar(unsigned Level) const { 272 assert(0 < Level && Level <= Levels && "Level out of range"); 273 return DV[Level - 1].Scalar; 274 } 275 276 277 // Returns true if peeling the first iteration from this loop 278 // will break this dependence. 279 bool FullDependence::isPeelFirst(unsigned Level) const { 280 assert(0 < Level && Level <= Levels && "Level out of range"); 281 return DV[Level - 1].PeelFirst; 282 } 283 284 285 // Returns true if peeling the last iteration from this loop 286 // will break this dependence. 287 bool FullDependence::isPeelLast(unsigned Level) const { 288 assert(0 < Level && Level <= Levels && "Level out of range"); 289 return DV[Level - 1].PeelLast; 290 } 291 292 293 // Returns true if splitting this loop will break the dependence. 294 bool FullDependence::isSplitable(unsigned Level) const { 295 assert(0 < Level && Level <= Levels && "Level out of range"); 296 return DV[Level - 1].Splitable; 297 } 298 299 300 //===----------------------------------------------------------------------===// 301 // DependenceInfo::Constraint methods 302 303 // If constraint is a point <X, Y>, returns X. 304 // Otherwise assert. 305 const SCEV *DependenceInfo::Constraint::getX() const { 306 assert(Kind == Point && "Kind should be Point"); 307 return A; 308 } 309 310 311 // If constraint is a point <X, Y>, returns Y. 312 // Otherwise assert. 313 const SCEV *DependenceInfo::Constraint::getY() const { 314 assert(Kind == Point && "Kind should be Point"); 315 return B; 316 } 317 318 319 // If constraint is a line AX + BY = C, returns A. 320 // Otherwise assert. 321 const SCEV *DependenceInfo::Constraint::getA() const { 322 assert((Kind == Line || Kind == Distance) && 323 "Kind should be Line (or Distance)"); 324 return A; 325 } 326 327 328 // If constraint is a line AX + BY = C, returns B. 329 // Otherwise assert. 330 const SCEV *DependenceInfo::Constraint::getB() const { 331 assert((Kind == Line || Kind == Distance) && 332 "Kind should be Line (or Distance)"); 333 return B; 334 } 335 336 337 // If constraint is a line AX + BY = C, returns C. 338 // Otherwise assert. 339 const SCEV *DependenceInfo::Constraint::getC() const { 340 assert((Kind == Line || Kind == Distance) && 341 "Kind should be Line (or Distance)"); 342 return C; 343 } 344 345 346 // If constraint is a distance, returns D. 347 // Otherwise assert. 348 const SCEV *DependenceInfo::Constraint::getD() const { 349 assert(Kind == Distance && "Kind should be Distance"); 350 return SE->getNegativeSCEV(C); 351 } 352 353 354 // Returns the loop associated with this constraint. 355 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 356 assert((Kind == Distance || Kind == Line || Kind == Point) && 357 "Kind should be Distance, Line, or Point"); 358 return AssociatedLoop; 359 } 360 361 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 362 const Loop *CurLoop) { 363 Kind = Point; 364 A = X; 365 B = Y; 366 AssociatedLoop = CurLoop; 367 } 368 369 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 370 const SCEV *CC, const Loop *CurLoop) { 371 Kind = Line; 372 A = AA; 373 B = BB; 374 C = CC; 375 AssociatedLoop = CurLoop; 376 } 377 378 void DependenceInfo::Constraint::setDistance(const SCEV *D, 379 const Loop *CurLoop) { 380 Kind = Distance; 381 A = SE->getOne(D->getType()); 382 B = SE->getNegativeSCEV(A); 383 C = SE->getNegativeSCEV(D); 384 AssociatedLoop = CurLoop; 385 } 386 387 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 388 389 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 390 SE = NewSE; 391 Kind = Any; 392 } 393 394 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 395 // For debugging purposes. Dumps the constraint out to OS. 396 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 397 if (isEmpty()) 398 OS << " Empty\n"; 399 else if (isAny()) 400 OS << " Any\n"; 401 else if (isPoint()) 402 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 403 else if (isDistance()) 404 OS << " Distance is " << *getD() << 405 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 406 else if (isLine()) 407 OS << " Line is " << *getA() << "*X + " << 408 *getB() << "*Y = " << *getC() << "\n"; 409 else 410 llvm_unreachable("unknown constraint type in Constraint::dump"); 411 } 412 #endif 413 414 415 // Updates X with the intersection 416 // of the Constraints X and Y. Returns true if X has changed. 417 // Corresponds to Figure 4 from the paper 418 // 419 // Practical Dependence Testing 420 // Goff, Kennedy, Tseng 421 // PLDI 1991 422 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 423 ++DeltaApplications; 424 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 425 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 426 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 427 assert(!Y->isPoint() && "Y must not be a Point"); 428 if (X->isAny()) { 429 if (Y->isAny()) 430 return false; 431 *X = *Y; 432 return true; 433 } 434 if (X->isEmpty()) 435 return false; 436 if (Y->isEmpty()) { 437 X->setEmpty(); 438 return true; 439 } 440 441 if (X->isDistance() && Y->isDistance()) { 442 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 443 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 444 return false; 445 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 446 X->setEmpty(); 447 ++DeltaSuccesses; 448 return true; 449 } 450 // Hmmm, interesting situation. 451 // I guess if either is constant, keep it and ignore the other. 452 if (isa<SCEVConstant>(Y->getD())) { 453 *X = *Y; 454 return true; 455 } 456 return false; 457 } 458 459 // At this point, the pseudo-code in Figure 4 of the paper 460 // checks if (X->isPoint() && Y->isPoint()). 461 // This case can't occur in our implementation, 462 // since a Point can only arise as the result of intersecting 463 // two Line constraints, and the right-hand value, Y, is never 464 // the result of an intersection. 465 assert(!(X->isPoint() && Y->isPoint()) && 466 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 467 468 if (X->isLine() && Y->isLine()) { 469 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 470 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 471 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 472 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 473 // slopes are equal, so lines are parallel 474 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 475 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 476 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 477 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 478 return false; 479 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 480 X->setEmpty(); 481 ++DeltaSuccesses; 482 return true; 483 } 484 return false; 485 } 486 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 487 // slopes differ, so lines intersect 488 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 489 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 490 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 491 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 492 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 493 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 494 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 495 const SCEVConstant *C1A2_C2A1 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 497 const SCEVConstant *C1B2_C2B1 = 498 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 499 const SCEVConstant *A1B2_A2B1 = 500 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 501 const SCEVConstant *A2B1_A1B2 = 502 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 503 if (!C1B2_C2B1 || !C1A2_C2A1 || 504 !A1B2_A2B1 || !A2B1_A1B2) 505 return false; 506 APInt Xtop = C1B2_C2B1->getAPInt(); 507 APInt Xbot = A1B2_A2B1->getAPInt(); 508 APInt Ytop = C1A2_C2A1->getAPInt(); 509 APInt Ybot = A2B1_A1B2->getAPInt(); 510 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 511 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 512 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 513 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 514 APInt Xq = Xtop; // these need to be initialized, even 515 APInt Xr = Xtop; // though they're just going to be overwritten 516 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 517 APInt Yq = Ytop; 518 APInt Yr = Ytop; 519 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 520 if (Xr != 0 || Yr != 0) { 521 X->setEmpty(); 522 ++DeltaSuccesses; 523 return true; 524 } 525 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 526 if (Xq.slt(0) || Yq.slt(0)) { 527 X->setEmpty(); 528 ++DeltaSuccesses; 529 return true; 530 } 531 if (const SCEVConstant *CUB = 532 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 533 const APInt &UpperBound = CUB->getAPInt(); 534 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 535 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 536 X->setEmpty(); 537 ++DeltaSuccesses; 538 return true; 539 } 540 } 541 X->setPoint(SE->getConstant(Xq), 542 SE->getConstant(Yq), 543 X->getAssociatedLoop()); 544 ++DeltaSuccesses; 545 return true; 546 } 547 return false; 548 } 549 550 // if (X->isLine() && Y->isPoint()) This case can't occur. 551 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 552 553 if (X->isPoint() && Y->isLine()) { 554 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 555 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 556 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 557 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 558 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 559 return false; 560 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 561 X->setEmpty(); 562 ++DeltaSuccesses; 563 return true; 564 } 565 return false; 566 } 567 568 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 569 return false; 570 } 571 572 573 //===----------------------------------------------------------------------===// 574 // DependenceInfo methods 575 576 // For debugging purposes. Dumps a dependence to OS. 577 void Dependence::dump(raw_ostream &OS) const { 578 bool Splitable = false; 579 if (isConfused()) 580 OS << "confused"; 581 else { 582 if (isConsistent()) 583 OS << "consistent "; 584 if (isFlow()) 585 OS << "flow"; 586 else if (isOutput()) 587 OS << "output"; 588 else if (isAnti()) 589 OS << "anti"; 590 else if (isInput()) 591 OS << "input"; 592 unsigned Levels = getLevels(); 593 OS << " ["; 594 for (unsigned II = 1; II <= Levels; ++II) { 595 if (isSplitable(II)) 596 Splitable = true; 597 if (isPeelFirst(II)) 598 OS << 'p'; 599 const SCEV *Distance = getDistance(II); 600 if (Distance) 601 OS << *Distance; 602 else if (isScalar(II)) 603 OS << "S"; 604 else { 605 unsigned Direction = getDirection(II); 606 if (Direction == DVEntry::ALL) 607 OS << "*"; 608 else { 609 if (Direction & DVEntry::LT) 610 OS << "<"; 611 if (Direction & DVEntry::EQ) 612 OS << "="; 613 if (Direction & DVEntry::GT) 614 OS << ">"; 615 } 616 } 617 if (isPeelLast(II)) 618 OS << 'p'; 619 if (II < Levels) 620 OS << " "; 621 } 622 if (isLoopIndependent()) 623 OS << "|<"; 624 OS << "]"; 625 if (Splitable) 626 OS << " splitable"; 627 } 628 OS << "!\n"; 629 } 630 631 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 632 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 633 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 634 // Otherwise the underlying objects are checked to see if they point to 635 // different identifiable objects. 636 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 637 const DataLayout &DL, 638 const MemoryLocation &LocA, 639 const MemoryLocation &LocB) { 640 // Check the original locations (minus size) for noalias, which can happen for 641 // tbaa, incompatible underlying object locations, etc. 642 MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags); 643 MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags); 644 if (AA->alias(LocAS, LocBS) == NoAlias) 645 return NoAlias; 646 647 // Check the underlying objects are the same 648 const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); 649 const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); 650 651 // If the underlying objects are the same, they must alias 652 if (AObj == BObj) 653 return MustAlias; 654 655 // We may have hit the recursion limit for underlying objects, or have 656 // underlying objects where we don't know they will alias. 657 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 658 return MayAlias; 659 660 // Otherwise we know the objects are different and both identified objects so 661 // must not alias. 662 return NoAlias; 663 } 664 665 666 // Returns true if the load or store can be analyzed. Atomic and volatile 667 // operations have properties which this analysis does not understand. 668 static 669 bool isLoadOrStore(const Instruction *I) { 670 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 671 return LI->isUnordered(); 672 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 673 return SI->isUnordered(); 674 return false; 675 } 676 677 678 // Examines the loop nesting of the Src and Dst 679 // instructions and establishes their shared loops. Sets the variables 680 // CommonLevels, SrcLevels, and MaxLevels. 681 // The source and destination instructions needn't be contained in the same 682 // loop. The routine establishNestingLevels finds the level of most deeply 683 // nested loop that contains them both, CommonLevels. An instruction that's 684 // not contained in a loop is at level = 0. MaxLevels is equal to the level 685 // of the source plus the level of the destination, minus CommonLevels. 686 // This lets us allocate vectors MaxLevels in length, with room for every 687 // distinct loop referenced in both the source and destination subscripts. 688 // The variable SrcLevels is the nesting depth of the source instruction. 689 // It's used to help calculate distinct loops referenced by the destination. 690 // Here's the map from loops to levels: 691 // 0 - unused 692 // 1 - outermost common loop 693 // ... - other common loops 694 // CommonLevels - innermost common loop 695 // ... - loops containing Src but not Dst 696 // SrcLevels - innermost loop containing Src but not Dst 697 // ... - loops containing Dst but not Src 698 // MaxLevels - innermost loops containing Dst but not Src 699 // Consider the follow code fragment: 700 // for (a = ...) { 701 // for (b = ...) { 702 // for (c = ...) { 703 // for (d = ...) { 704 // A[] = ...; 705 // } 706 // } 707 // for (e = ...) { 708 // for (f = ...) { 709 // for (g = ...) { 710 // ... = A[]; 711 // } 712 // } 713 // } 714 // } 715 // } 716 // If we're looking at the possibility of a dependence between the store 717 // to A (the Src) and the load from A (the Dst), we'll note that they 718 // have 2 loops in common, so CommonLevels will equal 2 and the direction 719 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 720 // A map from loop names to loop numbers would look like 721 // a - 1 722 // b - 2 = CommonLevels 723 // c - 3 724 // d - 4 = SrcLevels 725 // e - 5 726 // f - 6 727 // g - 7 = MaxLevels 728 void DependenceInfo::establishNestingLevels(const Instruction *Src, 729 const Instruction *Dst) { 730 const BasicBlock *SrcBlock = Src->getParent(); 731 const BasicBlock *DstBlock = Dst->getParent(); 732 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 733 unsigned DstLevel = LI->getLoopDepth(DstBlock); 734 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 735 const Loop *DstLoop = LI->getLoopFor(DstBlock); 736 SrcLevels = SrcLevel; 737 MaxLevels = SrcLevel + DstLevel; 738 while (SrcLevel > DstLevel) { 739 SrcLoop = SrcLoop->getParentLoop(); 740 SrcLevel--; 741 } 742 while (DstLevel > SrcLevel) { 743 DstLoop = DstLoop->getParentLoop(); 744 DstLevel--; 745 } 746 while (SrcLoop != DstLoop) { 747 SrcLoop = SrcLoop->getParentLoop(); 748 DstLoop = DstLoop->getParentLoop(); 749 SrcLevel--; 750 } 751 CommonLevels = SrcLevel; 752 MaxLevels -= CommonLevels; 753 } 754 755 756 // Given one of the loops containing the source, return 757 // its level index in our numbering scheme. 758 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 759 return SrcLoop->getLoopDepth(); 760 } 761 762 763 // Given one of the loops containing the destination, 764 // return its level index in our numbering scheme. 765 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 766 unsigned D = DstLoop->getLoopDepth(); 767 if (D > CommonLevels) 768 return D - CommonLevels + SrcLevels; 769 else 770 return D; 771 } 772 773 774 // Returns true if Expression is loop invariant in LoopNest. 775 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 776 const Loop *LoopNest) const { 777 if (!LoopNest) 778 return true; 779 return SE->isLoopInvariant(Expression, LoopNest) && 780 isLoopInvariant(Expression, LoopNest->getParentLoop()); 781 } 782 783 784 785 // Finds the set of loops from the LoopNest that 786 // have a level <= CommonLevels and are referred to by the SCEV Expression. 787 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 788 const Loop *LoopNest, 789 SmallBitVector &Loops) const { 790 while (LoopNest) { 791 unsigned Level = LoopNest->getLoopDepth(); 792 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 793 Loops.set(Level); 794 LoopNest = LoopNest->getParentLoop(); 795 } 796 } 797 798 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 799 800 unsigned widestWidthSeen = 0; 801 Type *widestType; 802 803 // Go through each pair and find the widest bit to which we need 804 // to extend all of them. 805 for (Subscript *Pair : Pairs) { 806 const SCEV *Src = Pair->Src; 807 const SCEV *Dst = Pair->Dst; 808 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 809 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 810 if (SrcTy == nullptr || DstTy == nullptr) { 811 assert(SrcTy == DstTy && "This function only unify integer types and " 812 "expect Src and Dst share the same type " 813 "otherwise."); 814 continue; 815 } 816 if (SrcTy->getBitWidth() > widestWidthSeen) { 817 widestWidthSeen = SrcTy->getBitWidth(); 818 widestType = SrcTy; 819 } 820 if (DstTy->getBitWidth() > widestWidthSeen) { 821 widestWidthSeen = DstTy->getBitWidth(); 822 widestType = DstTy; 823 } 824 } 825 826 827 assert(widestWidthSeen > 0); 828 829 // Now extend each pair to the widest seen. 830 for (Subscript *Pair : Pairs) { 831 const SCEV *Src = Pair->Src; 832 const SCEV *Dst = Pair->Dst; 833 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 834 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 835 if (SrcTy == nullptr || DstTy == nullptr) { 836 assert(SrcTy == DstTy && "This function only unify integer types and " 837 "expect Src and Dst share the same type " 838 "otherwise."); 839 continue; 840 } 841 if (SrcTy->getBitWidth() < widestWidthSeen) 842 // Sign-extend Src to widestType 843 Pair->Src = SE->getSignExtendExpr(Src, widestType); 844 if (DstTy->getBitWidth() < widestWidthSeen) { 845 // Sign-extend Dst to widestType 846 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 847 } 848 } 849 } 850 851 // removeMatchingExtensions - Examines a subscript pair. 852 // If the source and destination are identically sign (or zero) 853 // extended, it strips off the extension in an effect to simplify 854 // the actual analysis. 855 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 856 const SCEV *Src = Pair->Src; 857 const SCEV *Dst = Pair->Dst; 858 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 859 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 860 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 861 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 862 const SCEV *SrcCastOp = SrcCast->getOperand(); 863 const SCEV *DstCastOp = DstCast->getOperand(); 864 if (SrcCastOp->getType() == DstCastOp->getType()) { 865 Pair->Src = SrcCastOp; 866 Pair->Dst = DstCastOp; 867 } 868 } 869 } 870 871 872 // Examine the scev and return true iff it's linear. 873 // Collect any loops mentioned in the set of "Loops". 874 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 875 SmallBitVector &Loops) { 876 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 877 if (!AddRec) 878 return isLoopInvariant(Src, LoopNest); 879 const SCEV *Start = AddRec->getStart(); 880 const SCEV *Step = AddRec->getStepRecurrence(*SE); 881 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 882 if (!isa<SCEVCouldNotCompute>(UB)) { 883 if (SE->getTypeSizeInBits(Start->getType()) < 884 SE->getTypeSizeInBits(UB->getType())) { 885 if (!AddRec->getNoWrapFlags()) 886 return false; 887 } 888 } 889 if (!isLoopInvariant(Step, LoopNest)) 890 return false; 891 Loops.set(mapSrcLoop(AddRec->getLoop())); 892 return checkSrcSubscript(Start, LoopNest, Loops); 893 } 894 895 896 897 // Examine the scev and return true iff it's linear. 898 // Collect any loops mentioned in the set of "Loops". 899 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 900 SmallBitVector &Loops) { 901 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 902 if (!AddRec) 903 return isLoopInvariant(Dst, LoopNest); 904 const SCEV *Start = AddRec->getStart(); 905 const SCEV *Step = AddRec->getStepRecurrence(*SE); 906 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 907 if (!isa<SCEVCouldNotCompute>(UB)) { 908 if (SE->getTypeSizeInBits(Start->getType()) < 909 SE->getTypeSizeInBits(UB->getType())) { 910 if (!AddRec->getNoWrapFlags()) 911 return false; 912 } 913 } 914 if (!isLoopInvariant(Step, LoopNest)) 915 return false; 916 Loops.set(mapDstLoop(AddRec->getLoop())); 917 return checkDstSubscript(Start, LoopNest, Loops); 918 } 919 920 921 // Examines the subscript pair (the Src and Dst SCEVs) 922 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 923 // Collects the associated loops in a set. 924 DependenceInfo::Subscript::ClassificationKind 925 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 926 const SCEV *Dst, const Loop *DstLoopNest, 927 SmallBitVector &Loops) { 928 SmallBitVector SrcLoops(MaxLevels + 1); 929 SmallBitVector DstLoops(MaxLevels + 1); 930 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 931 return Subscript::NonLinear; 932 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 933 return Subscript::NonLinear; 934 Loops = SrcLoops; 935 Loops |= DstLoops; 936 unsigned N = Loops.count(); 937 if (N == 0) 938 return Subscript::ZIV; 939 if (N == 1) 940 return Subscript::SIV; 941 if (N == 2 && (SrcLoops.count() == 0 || 942 DstLoops.count() == 0 || 943 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 944 return Subscript::RDIV; 945 return Subscript::MIV; 946 } 947 948 949 // A wrapper around SCEV::isKnownPredicate. 950 // Looks for cases where we're interested in comparing for equality. 951 // If both X and Y have been identically sign or zero extended, 952 // it strips off the (confusing) extensions before invoking 953 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 954 // will be similarly updated. 955 // 956 // If SCEV::isKnownPredicate can't prove the predicate, 957 // we try simple subtraction, which seems to help in some cases 958 // involving symbolics. 959 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 960 const SCEV *Y) const { 961 if (Pred == CmpInst::ICMP_EQ || 962 Pred == CmpInst::ICMP_NE) { 963 if ((isa<SCEVSignExtendExpr>(X) && 964 isa<SCEVSignExtendExpr>(Y)) || 965 (isa<SCEVZeroExtendExpr>(X) && 966 isa<SCEVZeroExtendExpr>(Y))) { 967 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 968 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 969 const SCEV *Xop = CX->getOperand(); 970 const SCEV *Yop = CY->getOperand(); 971 if (Xop->getType() == Yop->getType()) { 972 X = Xop; 973 Y = Yop; 974 } 975 } 976 } 977 if (SE->isKnownPredicate(Pred, X, Y)) 978 return true; 979 // If SE->isKnownPredicate can't prove the condition, 980 // we try the brute-force approach of subtracting 981 // and testing the difference. 982 // By testing with SE->isKnownPredicate first, we avoid 983 // the possibility of overflow when the arguments are constants. 984 const SCEV *Delta = SE->getMinusSCEV(X, Y); 985 switch (Pred) { 986 case CmpInst::ICMP_EQ: 987 return Delta->isZero(); 988 case CmpInst::ICMP_NE: 989 return SE->isKnownNonZero(Delta); 990 case CmpInst::ICMP_SGE: 991 return SE->isKnownNonNegative(Delta); 992 case CmpInst::ICMP_SLE: 993 return SE->isKnownNonPositive(Delta); 994 case CmpInst::ICMP_SGT: 995 return SE->isKnownPositive(Delta); 996 case CmpInst::ICMP_SLT: 997 return SE->isKnownNegative(Delta); 998 default: 999 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1000 } 1001 } 1002 1003 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1004 /// with some extra checking if S is an AddRec and we can prove less-than using 1005 /// the loop bounds. 1006 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1007 // First unify to the same type 1008 auto *SType = dyn_cast<IntegerType>(S->getType()); 1009 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1010 if (!SType || !SizeType) 1011 return false; 1012 Type *MaxType = 1013 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1014 S = SE->getTruncateOrZeroExtend(S, MaxType); 1015 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1016 1017 // Special check for addrecs using BE taken count 1018 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1019 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1020 if (AddRec->isAffine()) { 1021 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1022 if (!isa<SCEVCouldNotCompute>(BECount)) { 1023 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1024 if (SE->isKnownNegative(Limit)) 1025 return true; 1026 } 1027 } 1028 } 1029 1030 // Check using normal isKnownNegative 1031 const SCEV *LimitedBound = 1032 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1033 return SE->isKnownNegative(LimitedBound); 1034 } 1035 1036 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1037 bool Inbounds = false; 1038 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1039 Inbounds = SrcGEP->isInBounds(); 1040 if (Inbounds) { 1041 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1042 if (AddRec->isAffine()) { 1043 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1044 // If both parts are NonNegative, the end result will be NonNegative 1045 if (SE->isKnownNonNegative(AddRec->getStart()) && 1046 SE->isKnownNonNegative(AddRec->getOperand(1))) 1047 return true; 1048 } 1049 } 1050 } 1051 1052 return SE->isKnownNonNegative(S); 1053 } 1054 1055 // All subscripts are all the same type. 1056 // Loop bound may be smaller (e.g., a char). 1057 // Should zero extend loop bound, since it's always >= 0. 1058 // This routine collects upper bound and extends or truncates if needed. 1059 // Truncating is safe when subscripts are known not to wrap. Cases without 1060 // nowrap flags should have been rejected earlier. 1061 // Return null if no bound available. 1062 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1063 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1064 const SCEV *UB = SE->getBackedgeTakenCount(L); 1065 return SE->getTruncateOrZeroExtend(UB, T); 1066 } 1067 return nullptr; 1068 } 1069 1070 1071 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1072 // If the cast fails, returns NULL. 1073 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1074 Type *T) const { 1075 if (const SCEV *UB = collectUpperBound(L, T)) 1076 return dyn_cast<SCEVConstant>(UB); 1077 return nullptr; 1078 } 1079 1080 1081 // testZIV - 1082 // When we have a pair of subscripts of the form [c1] and [c2], 1083 // where c1 and c2 are both loop invariant, we attack it using 1084 // the ZIV test. Basically, we test by comparing the two values, 1085 // but there are actually three possible results: 1086 // 1) the values are equal, so there's a dependence 1087 // 2) the values are different, so there's no dependence 1088 // 3) the values might be equal, so we have to assume a dependence. 1089 // 1090 // Return true if dependence disproved. 1091 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1092 FullDependence &Result) const { 1093 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1094 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1095 ++ZIVapplications; 1096 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1097 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1098 return false; // provably dependent 1099 } 1100 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1101 LLVM_DEBUG(dbgs() << " provably independent\n"); 1102 ++ZIVindependence; 1103 return true; // provably independent 1104 } 1105 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1106 Result.Consistent = false; 1107 return false; // possibly dependent 1108 } 1109 1110 1111 // strongSIVtest - 1112 // From the paper, Practical Dependence Testing, Section 4.2.1 1113 // 1114 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1115 // where i is an induction variable, c1 and c2 are loop invariant, 1116 // and a is a constant, we can solve it exactly using the Strong SIV test. 1117 // 1118 // Can prove independence. Failing that, can compute distance (and direction). 1119 // In the presence of symbolic terms, we can sometimes make progress. 1120 // 1121 // If there's a dependence, 1122 // 1123 // c1 + a*i = c2 + a*i' 1124 // 1125 // The dependence distance is 1126 // 1127 // d = i' - i = (c1 - c2)/a 1128 // 1129 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1130 // loop's upper bound. If a dependence exists, the dependence direction is 1131 // defined as 1132 // 1133 // { < if d > 0 1134 // direction = { = if d = 0 1135 // { > if d < 0 1136 // 1137 // Return true if dependence disproved. 1138 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1139 const SCEV *DstConst, const Loop *CurLoop, 1140 unsigned Level, FullDependence &Result, 1141 Constraint &NewConstraint) const { 1142 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1143 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1144 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1145 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1146 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1147 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1148 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1149 ++StrongSIVapplications; 1150 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1151 Level--; 1152 1153 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1154 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1155 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1156 1157 // check that |Delta| < iteration count 1158 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1159 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1160 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1161 const SCEV *AbsDelta = 1162 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1163 const SCEV *AbsCoeff = 1164 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1165 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1166 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1167 // Distance greater than trip count - no dependence 1168 ++StrongSIVindependence; 1169 ++StrongSIVsuccesses; 1170 return true; 1171 } 1172 } 1173 1174 // Can we compute distance? 1175 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1176 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1177 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1178 APInt Distance = ConstDelta; // these need to be initialized 1179 APInt Remainder = ConstDelta; 1180 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1181 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1182 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1183 // Make sure Coeff divides Delta exactly 1184 if (Remainder != 0) { 1185 // Coeff doesn't divide Distance, no dependence 1186 ++StrongSIVindependence; 1187 ++StrongSIVsuccesses; 1188 return true; 1189 } 1190 Result.DV[Level].Distance = SE->getConstant(Distance); 1191 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1192 if (Distance.sgt(0)) 1193 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1194 else if (Distance.slt(0)) 1195 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1196 else 1197 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1198 ++StrongSIVsuccesses; 1199 } 1200 else if (Delta->isZero()) { 1201 // since 0/X == 0 1202 Result.DV[Level].Distance = Delta; 1203 NewConstraint.setDistance(Delta, CurLoop); 1204 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1205 ++StrongSIVsuccesses; 1206 } 1207 else { 1208 if (Coeff->isOne()) { 1209 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1210 Result.DV[Level].Distance = Delta; // since X/1 == X 1211 NewConstraint.setDistance(Delta, CurLoop); 1212 } 1213 else { 1214 Result.Consistent = false; 1215 NewConstraint.setLine(Coeff, 1216 SE->getNegativeSCEV(Coeff), 1217 SE->getNegativeSCEV(Delta), CurLoop); 1218 } 1219 1220 // maybe we can get a useful direction 1221 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1222 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1223 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1224 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1225 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1226 // The double negatives above are confusing. 1227 // It helps to read !SE->isKnownNonZero(Delta) 1228 // as "Delta might be Zero" 1229 unsigned NewDirection = Dependence::DVEntry::NONE; 1230 if ((DeltaMaybePositive && CoeffMaybePositive) || 1231 (DeltaMaybeNegative && CoeffMaybeNegative)) 1232 NewDirection = Dependence::DVEntry::LT; 1233 if (DeltaMaybeZero) 1234 NewDirection |= Dependence::DVEntry::EQ; 1235 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1236 (DeltaMaybePositive && CoeffMaybeNegative)) 1237 NewDirection |= Dependence::DVEntry::GT; 1238 if (NewDirection < Result.DV[Level].Direction) 1239 ++StrongSIVsuccesses; 1240 Result.DV[Level].Direction &= NewDirection; 1241 } 1242 return false; 1243 } 1244 1245 1246 // weakCrossingSIVtest - 1247 // From the paper, Practical Dependence Testing, Section 4.2.2 1248 // 1249 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1250 // where i is an induction variable, c1 and c2 are loop invariant, 1251 // and a is a constant, we can solve it exactly using the 1252 // Weak-Crossing SIV test. 1253 // 1254 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1255 // the two lines, where i = i', yielding 1256 // 1257 // c1 + a*i = c2 - a*i 1258 // 2a*i = c2 - c1 1259 // i = (c2 - c1)/2a 1260 // 1261 // If i < 0, there is no dependence. 1262 // If i > upperbound, there is no dependence. 1263 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1264 // If i = upperbound, there's a dependence with distance = 0. 1265 // If i is integral, there's a dependence (all directions). 1266 // If the non-integer part = 1/2, there's a dependence (<> directions). 1267 // Otherwise, there's no dependence. 1268 // 1269 // Can prove independence. Failing that, 1270 // can sometimes refine the directions. 1271 // Can determine iteration for splitting. 1272 // 1273 // Return true if dependence disproved. 1274 bool DependenceInfo::weakCrossingSIVtest( 1275 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1276 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1277 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1278 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1279 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1280 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1281 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1282 ++WeakCrossingSIVapplications; 1283 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1284 Level--; 1285 Result.Consistent = false; 1286 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1287 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1288 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1289 if (Delta->isZero()) { 1290 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1291 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1292 ++WeakCrossingSIVsuccesses; 1293 if (!Result.DV[Level].Direction) { 1294 ++WeakCrossingSIVindependence; 1295 return true; 1296 } 1297 Result.DV[Level].Distance = Delta; // = 0 1298 return false; 1299 } 1300 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1301 if (!ConstCoeff) 1302 return false; 1303 1304 Result.DV[Level].Splitable = true; 1305 if (SE->isKnownNegative(ConstCoeff)) { 1306 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1307 assert(ConstCoeff && 1308 "dynamic cast of negative of ConstCoeff should yield constant"); 1309 Delta = SE->getNegativeSCEV(Delta); 1310 } 1311 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1312 1313 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1314 SplitIter = SE->getUDivExpr( 1315 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1316 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1317 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1318 1319 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1320 if (!ConstDelta) 1321 return false; 1322 1323 // We're certain that ConstCoeff > 0; therefore, 1324 // if Delta < 0, then no dependence. 1325 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1326 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1327 if (SE->isKnownNegative(Delta)) { 1328 // No dependence, Delta < 0 1329 ++WeakCrossingSIVindependence; 1330 ++WeakCrossingSIVsuccesses; 1331 return true; 1332 } 1333 1334 // We're certain that Delta > 0 and ConstCoeff > 0. 1335 // Check Delta/(2*ConstCoeff) against upper loop bound 1336 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1337 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1338 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1339 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1340 ConstantTwo); 1341 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1342 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1343 // Delta too big, no dependence 1344 ++WeakCrossingSIVindependence; 1345 ++WeakCrossingSIVsuccesses; 1346 return true; 1347 } 1348 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1349 // i = i' = UB 1350 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1351 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1352 ++WeakCrossingSIVsuccesses; 1353 if (!Result.DV[Level].Direction) { 1354 ++WeakCrossingSIVindependence; 1355 return true; 1356 } 1357 Result.DV[Level].Splitable = false; 1358 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1359 return false; 1360 } 1361 } 1362 1363 // check that Coeff divides Delta 1364 APInt APDelta = ConstDelta->getAPInt(); 1365 APInt APCoeff = ConstCoeff->getAPInt(); 1366 APInt Distance = APDelta; // these need to be initialzed 1367 APInt Remainder = APDelta; 1368 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1369 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1370 if (Remainder != 0) { 1371 // Coeff doesn't divide Delta, no dependence 1372 ++WeakCrossingSIVindependence; 1373 ++WeakCrossingSIVsuccesses; 1374 return true; 1375 } 1376 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1377 1378 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1379 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1380 Remainder = Distance.srem(Two); 1381 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1382 if (Remainder != 0) { 1383 // Equal direction isn't possible 1384 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1385 ++WeakCrossingSIVsuccesses; 1386 } 1387 return false; 1388 } 1389 1390 1391 // Kirch's algorithm, from 1392 // 1393 // Optimizing Supercompilers for Supercomputers 1394 // Michael Wolfe 1395 // MIT Press, 1989 1396 // 1397 // Program 2.1, page 29. 1398 // Computes the GCD of AM and BM. 1399 // Also finds a solution to the equation ax - by = gcd(a, b). 1400 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1401 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1402 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1403 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1404 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1405 APInt G0 = AM.abs(); 1406 APInt G1 = BM.abs(); 1407 APInt Q = G0; // these need to be initialized 1408 APInt R = G0; 1409 APInt::sdivrem(G0, G1, Q, R); 1410 while (R != 0) { 1411 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1412 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1413 G0 = G1; G1 = R; 1414 APInt::sdivrem(G0, G1, Q, R); 1415 } 1416 G = G1; 1417 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1418 X = AM.slt(0) ? -A1 : A1; 1419 Y = BM.slt(0) ? B1 : -B1; 1420 1421 // make sure gcd divides Delta 1422 R = Delta.srem(G); 1423 if (R != 0) 1424 return true; // gcd doesn't divide Delta, no dependence 1425 Q = Delta.sdiv(G); 1426 X *= Q; 1427 Y *= Q; 1428 return false; 1429 } 1430 1431 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1432 APInt Q = A; // these need to be initialized 1433 APInt R = A; 1434 APInt::sdivrem(A, B, Q, R); 1435 if (R == 0) 1436 return Q; 1437 if ((A.sgt(0) && B.sgt(0)) || 1438 (A.slt(0) && B.slt(0))) 1439 return Q; 1440 else 1441 return Q - 1; 1442 } 1443 1444 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1445 APInt Q = A; // these need to be initialized 1446 APInt R = A; 1447 APInt::sdivrem(A, B, Q, R); 1448 if (R == 0) 1449 return Q; 1450 if ((A.sgt(0) && B.sgt(0)) || 1451 (A.slt(0) && B.slt(0))) 1452 return Q + 1; 1453 else 1454 return Q; 1455 } 1456 1457 1458 static 1459 APInt maxAPInt(APInt A, APInt B) { 1460 return A.sgt(B) ? A : B; 1461 } 1462 1463 1464 static 1465 APInt minAPInt(APInt A, APInt B) { 1466 return A.slt(B) ? A : B; 1467 } 1468 1469 1470 // exactSIVtest - 1471 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1472 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1473 // and a2 are constant, we can solve it exactly using an algorithm developed 1474 // by Banerjee and Wolfe. See Section 2.5.3 in 1475 // 1476 // Optimizing Supercompilers for Supercomputers 1477 // Michael Wolfe 1478 // MIT Press, 1989 1479 // 1480 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1481 // so use them if possible. They're also a bit better with symbolics and, 1482 // in the case of the strong SIV test, can compute Distances. 1483 // 1484 // Return true if dependence disproved. 1485 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1486 const SCEV *SrcConst, const SCEV *DstConst, 1487 const Loop *CurLoop, unsigned Level, 1488 FullDependence &Result, 1489 Constraint &NewConstraint) const { 1490 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1491 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1492 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1493 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1494 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1495 ++ExactSIVapplications; 1496 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1497 Level--; 1498 Result.Consistent = false; 1499 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1500 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1501 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1502 Delta, CurLoop); 1503 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1504 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1505 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1506 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1507 return false; 1508 1509 // find gcd 1510 APInt G, X, Y; 1511 APInt AM = ConstSrcCoeff->getAPInt(); 1512 APInt BM = ConstDstCoeff->getAPInt(); 1513 unsigned Bits = AM.getBitWidth(); 1514 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1515 // gcd doesn't divide Delta, no dependence 1516 ++ExactSIVindependence; 1517 ++ExactSIVsuccesses; 1518 return true; 1519 } 1520 1521 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1522 1523 // since SCEV construction normalizes, LM = 0 1524 APInt UM(Bits, 1, true); 1525 bool UMvalid = false; 1526 // UM is perhaps unavailable, let's check 1527 if (const SCEVConstant *CUB = 1528 collectConstantUpperBound(CurLoop, Delta->getType())) { 1529 UM = CUB->getAPInt(); 1530 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1531 UMvalid = true; 1532 } 1533 1534 APInt TU(APInt::getSignedMaxValue(Bits)); 1535 APInt TL(APInt::getSignedMinValue(Bits)); 1536 1537 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1538 APInt TMUL = BM.sdiv(G); 1539 if (TMUL.sgt(0)) { 1540 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1541 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1542 if (UMvalid) { 1543 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1544 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1545 } 1546 } 1547 else { 1548 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1549 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1550 if (UMvalid) { 1551 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1552 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1553 } 1554 } 1555 1556 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1557 TMUL = AM.sdiv(G); 1558 if (TMUL.sgt(0)) { 1559 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1560 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1561 if (UMvalid) { 1562 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1563 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1564 } 1565 } 1566 else { 1567 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1568 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1569 if (UMvalid) { 1570 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1571 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1572 } 1573 } 1574 if (TL.sgt(TU)) { 1575 ++ExactSIVindependence; 1576 ++ExactSIVsuccesses; 1577 return true; 1578 } 1579 1580 // explore directions 1581 unsigned NewDirection = Dependence::DVEntry::NONE; 1582 1583 // less than 1584 APInt SaveTU(TU); // save these 1585 APInt SaveTL(TL); 1586 LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); 1587 TMUL = AM - BM; 1588 if (TMUL.sgt(0)) { 1589 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1590 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1591 } 1592 else { 1593 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1594 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1595 } 1596 if (TL.sle(TU)) { 1597 NewDirection |= Dependence::DVEntry::LT; 1598 ++ExactSIVsuccesses; 1599 } 1600 1601 // equal 1602 TU = SaveTU; // restore 1603 TL = SaveTL; 1604 LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); 1605 if (TMUL.sgt(0)) { 1606 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1607 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1608 } 1609 else { 1610 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1611 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1612 } 1613 TMUL = BM - AM; 1614 if (TMUL.sgt(0)) { 1615 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1616 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1617 } 1618 else { 1619 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1620 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1621 } 1622 if (TL.sle(TU)) { 1623 NewDirection |= Dependence::DVEntry::EQ; 1624 ++ExactSIVsuccesses; 1625 } 1626 1627 // greater than 1628 TU = SaveTU; // restore 1629 TL = SaveTL; 1630 LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); 1631 if (TMUL.sgt(0)) { 1632 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1633 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1634 } 1635 else { 1636 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1637 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1638 } 1639 if (TL.sle(TU)) { 1640 NewDirection |= Dependence::DVEntry::GT; 1641 ++ExactSIVsuccesses; 1642 } 1643 1644 // finished 1645 Result.DV[Level].Direction &= NewDirection; 1646 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1647 ++ExactSIVindependence; 1648 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1649 } 1650 1651 1652 1653 // Return true if the divisor evenly divides the dividend. 1654 static 1655 bool isRemainderZero(const SCEVConstant *Dividend, 1656 const SCEVConstant *Divisor) { 1657 const APInt &ConstDividend = Dividend->getAPInt(); 1658 const APInt &ConstDivisor = Divisor->getAPInt(); 1659 return ConstDividend.srem(ConstDivisor) == 0; 1660 } 1661 1662 1663 // weakZeroSrcSIVtest - 1664 // From the paper, Practical Dependence Testing, Section 4.2.2 1665 // 1666 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1667 // where i is an induction variable, c1 and c2 are loop invariant, 1668 // and a is a constant, we can solve it exactly using the 1669 // Weak-Zero SIV test. 1670 // 1671 // Given 1672 // 1673 // c1 = c2 + a*i 1674 // 1675 // we get 1676 // 1677 // (c1 - c2)/a = i 1678 // 1679 // If i is not an integer, there's no dependence. 1680 // If i < 0 or > UB, there's no dependence. 1681 // If i = 0, the direction is >= and peeling the 1682 // 1st iteration will break the dependence. 1683 // If i = UB, the direction is <= and peeling the 1684 // last iteration will break the dependence. 1685 // Otherwise, the direction is *. 1686 // 1687 // Can prove independence. Failing that, we can sometimes refine 1688 // the directions. Can sometimes show that first or last 1689 // iteration carries all the dependences (so worth peeling). 1690 // 1691 // (see also weakZeroDstSIVtest) 1692 // 1693 // Return true if dependence disproved. 1694 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1695 const SCEV *SrcConst, 1696 const SCEV *DstConst, 1697 const Loop *CurLoop, unsigned Level, 1698 FullDependence &Result, 1699 Constraint &NewConstraint) const { 1700 // For the WeakSIV test, it's possible the loop isn't common to 1701 // the Src and Dst loops. If it isn't, then there's no need to 1702 // record a direction. 1703 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1704 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1705 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1706 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1707 ++WeakZeroSIVapplications; 1708 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1709 Level--; 1710 Result.Consistent = false; 1711 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1712 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1713 CurLoop); 1714 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1715 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1716 if (Level < CommonLevels) { 1717 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1718 Result.DV[Level].PeelFirst = true; 1719 ++WeakZeroSIVsuccesses; 1720 } 1721 return false; // dependences caused by first iteration 1722 } 1723 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1724 if (!ConstCoeff) 1725 return false; 1726 const SCEV *AbsCoeff = 1727 SE->isKnownNegative(ConstCoeff) ? 1728 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1729 const SCEV *NewDelta = 1730 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1731 1732 // check that Delta/SrcCoeff < iteration count 1733 // really check NewDelta < count*AbsCoeff 1734 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1735 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1736 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1737 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1738 ++WeakZeroSIVindependence; 1739 ++WeakZeroSIVsuccesses; 1740 return true; 1741 } 1742 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1743 // dependences caused by last iteration 1744 if (Level < CommonLevels) { 1745 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1746 Result.DV[Level].PeelLast = true; 1747 ++WeakZeroSIVsuccesses; 1748 } 1749 return false; 1750 } 1751 } 1752 1753 // check that Delta/SrcCoeff >= 0 1754 // really check that NewDelta >= 0 1755 if (SE->isKnownNegative(NewDelta)) { 1756 // No dependence, newDelta < 0 1757 ++WeakZeroSIVindependence; 1758 ++WeakZeroSIVsuccesses; 1759 return true; 1760 } 1761 1762 // if SrcCoeff doesn't divide Delta, then no dependence 1763 if (isa<SCEVConstant>(Delta) && 1764 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1765 ++WeakZeroSIVindependence; 1766 ++WeakZeroSIVsuccesses; 1767 return true; 1768 } 1769 return false; 1770 } 1771 1772 1773 // weakZeroDstSIVtest - 1774 // From the paper, Practical Dependence Testing, Section 4.2.2 1775 // 1776 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1777 // where i is an induction variable, c1 and c2 are loop invariant, 1778 // and a is a constant, we can solve it exactly using the 1779 // Weak-Zero SIV test. 1780 // 1781 // Given 1782 // 1783 // c1 + a*i = c2 1784 // 1785 // we get 1786 // 1787 // i = (c2 - c1)/a 1788 // 1789 // If i is not an integer, there's no dependence. 1790 // If i < 0 or > UB, there's no dependence. 1791 // If i = 0, the direction is <= and peeling the 1792 // 1st iteration will break the dependence. 1793 // If i = UB, the direction is >= and peeling the 1794 // last iteration will break the dependence. 1795 // Otherwise, the direction is *. 1796 // 1797 // Can prove independence. Failing that, we can sometimes refine 1798 // the directions. Can sometimes show that first or last 1799 // iteration carries all the dependences (so worth peeling). 1800 // 1801 // (see also weakZeroSrcSIVtest) 1802 // 1803 // Return true if dependence disproved. 1804 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1805 const SCEV *SrcConst, 1806 const SCEV *DstConst, 1807 const Loop *CurLoop, unsigned Level, 1808 FullDependence &Result, 1809 Constraint &NewConstraint) const { 1810 // For the WeakSIV test, it's possible the loop isn't common to the 1811 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1812 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1813 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1814 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1815 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1816 ++WeakZeroSIVapplications; 1817 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1818 Level--; 1819 Result.Consistent = false; 1820 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1821 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1822 CurLoop); 1823 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1824 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1825 if (Level < CommonLevels) { 1826 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1827 Result.DV[Level].PeelFirst = true; 1828 ++WeakZeroSIVsuccesses; 1829 } 1830 return false; // dependences caused by first iteration 1831 } 1832 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1833 if (!ConstCoeff) 1834 return false; 1835 const SCEV *AbsCoeff = 1836 SE->isKnownNegative(ConstCoeff) ? 1837 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1838 const SCEV *NewDelta = 1839 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1840 1841 // check that Delta/SrcCoeff < iteration count 1842 // really check NewDelta < count*AbsCoeff 1843 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1844 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1845 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1846 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1847 ++WeakZeroSIVindependence; 1848 ++WeakZeroSIVsuccesses; 1849 return true; 1850 } 1851 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1852 // dependences caused by last iteration 1853 if (Level < CommonLevels) { 1854 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1855 Result.DV[Level].PeelLast = true; 1856 ++WeakZeroSIVsuccesses; 1857 } 1858 return false; 1859 } 1860 } 1861 1862 // check that Delta/SrcCoeff >= 0 1863 // really check that NewDelta >= 0 1864 if (SE->isKnownNegative(NewDelta)) { 1865 // No dependence, newDelta < 0 1866 ++WeakZeroSIVindependence; 1867 ++WeakZeroSIVsuccesses; 1868 return true; 1869 } 1870 1871 // if SrcCoeff doesn't divide Delta, then no dependence 1872 if (isa<SCEVConstant>(Delta) && 1873 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1874 ++WeakZeroSIVindependence; 1875 ++WeakZeroSIVsuccesses; 1876 return true; 1877 } 1878 return false; 1879 } 1880 1881 1882 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1883 // Things of the form [c1 + a*i] and [c2 + b*j], 1884 // where i and j are induction variable, c1 and c2 are loop invariant, 1885 // and a and b are constants. 1886 // Returns true if any possible dependence is disproved. 1887 // Marks the result as inconsistent. 1888 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1889 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1890 const SCEV *SrcConst, const SCEV *DstConst, 1891 const Loop *SrcLoop, const Loop *DstLoop, 1892 FullDependence &Result) const { 1893 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1894 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1895 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1896 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1897 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1898 ++ExactRDIVapplications; 1899 Result.Consistent = false; 1900 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1901 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1902 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1903 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1904 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1905 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1906 return false; 1907 1908 // find gcd 1909 APInt G, X, Y; 1910 APInt AM = ConstSrcCoeff->getAPInt(); 1911 APInt BM = ConstDstCoeff->getAPInt(); 1912 unsigned Bits = AM.getBitWidth(); 1913 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1914 // gcd doesn't divide Delta, no dependence 1915 ++ExactRDIVindependence; 1916 return true; 1917 } 1918 1919 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1920 1921 // since SCEV construction seems to normalize, LM = 0 1922 APInt SrcUM(Bits, 1, true); 1923 bool SrcUMvalid = false; 1924 // SrcUM is perhaps unavailable, let's check 1925 if (const SCEVConstant *UpperBound = 1926 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1927 SrcUM = UpperBound->getAPInt(); 1928 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1929 SrcUMvalid = true; 1930 } 1931 1932 APInt DstUM(Bits, 1, true); 1933 bool DstUMvalid = false; 1934 // UM is perhaps unavailable, let's check 1935 if (const SCEVConstant *UpperBound = 1936 collectConstantUpperBound(DstLoop, Delta->getType())) { 1937 DstUM = UpperBound->getAPInt(); 1938 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1939 DstUMvalid = true; 1940 } 1941 1942 APInt TU(APInt::getSignedMaxValue(Bits)); 1943 APInt TL(APInt::getSignedMinValue(Bits)); 1944 1945 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1946 APInt TMUL = BM.sdiv(G); 1947 if (TMUL.sgt(0)) { 1948 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1949 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1950 if (SrcUMvalid) { 1951 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1952 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1953 } 1954 } 1955 else { 1956 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1957 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1958 if (SrcUMvalid) { 1959 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1960 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1961 } 1962 } 1963 1964 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1965 TMUL = AM.sdiv(G); 1966 if (TMUL.sgt(0)) { 1967 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1968 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1969 if (DstUMvalid) { 1970 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1971 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1972 } 1973 } 1974 else { 1975 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1976 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1977 if (DstUMvalid) { 1978 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1979 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1980 } 1981 } 1982 if (TL.sgt(TU)) 1983 ++ExactRDIVindependence; 1984 return TL.sgt(TU); 1985 } 1986 1987 1988 // symbolicRDIVtest - 1989 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1990 // introduce a special case of Banerjee's Inequalities (also called the 1991 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1992 // particularly cases with symbolics. Since it's only able to disprove 1993 // dependence (not compute distances or directions), we'll use it as a 1994 // fall back for the other tests. 1995 // 1996 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1997 // where i and j are induction variables and c1 and c2 are loop invariants, 1998 // we can use the symbolic tests to disprove some dependences, serving as a 1999 // backup for the RDIV test. Note that i and j can be the same variable, 2000 // letting this test serve as a backup for the various SIV tests. 2001 // 2002 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2003 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2004 // loop bounds for the i and j loops, respectively. So, ... 2005 // 2006 // c1 + a1*i = c2 + a2*j 2007 // a1*i - a2*j = c2 - c1 2008 // 2009 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2010 // range of the maximum and minimum possible values of a1*i - a2*j. 2011 // Considering the signs of a1 and a2, we have 4 possible cases: 2012 // 2013 // 1) If a1 >= 0 and a2 >= 0, then 2014 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2015 // -a2*N2 <= c2 - c1 <= a1*N1 2016 // 2017 // 2) If a1 >= 0 and a2 <= 0, then 2018 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2019 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2020 // 2021 // 3) If a1 <= 0 and a2 >= 0, then 2022 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2023 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2024 // 2025 // 4) If a1 <= 0 and a2 <= 0, then 2026 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2027 // a1*N1 <= c2 - c1 <= -a2*N2 2028 // 2029 // return true if dependence disproved 2030 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2031 const SCEV *C1, const SCEV *C2, 2032 const Loop *Loop1, 2033 const Loop *Loop2) const { 2034 ++SymbolicRDIVapplications; 2035 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2036 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2037 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2038 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2039 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2040 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2041 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2042 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2043 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2044 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2045 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2046 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2047 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2048 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2049 if (SE->isKnownNonNegative(A1)) { 2050 if (SE->isKnownNonNegative(A2)) { 2051 // A1 >= 0 && A2 >= 0 2052 if (N1) { 2053 // make sure that c2 - c1 <= a1*N1 2054 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2055 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2056 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2057 ++SymbolicRDIVindependence; 2058 return true; 2059 } 2060 } 2061 if (N2) { 2062 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2063 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2064 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2065 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2066 ++SymbolicRDIVindependence; 2067 return true; 2068 } 2069 } 2070 } 2071 else if (SE->isKnownNonPositive(A2)) { 2072 // a1 >= 0 && a2 <= 0 2073 if (N1 && N2) { 2074 // make sure that c2 - c1 <= a1*N1 - a2*N2 2075 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2076 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2077 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2078 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2079 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2080 ++SymbolicRDIVindependence; 2081 return true; 2082 } 2083 } 2084 // make sure that 0 <= c2 - c1 2085 if (SE->isKnownNegative(C2_C1)) { 2086 ++SymbolicRDIVindependence; 2087 return true; 2088 } 2089 } 2090 } 2091 else if (SE->isKnownNonPositive(A1)) { 2092 if (SE->isKnownNonNegative(A2)) { 2093 // a1 <= 0 && a2 >= 0 2094 if (N1 && N2) { 2095 // make sure that a1*N1 - a2*N2 <= c2 - c1 2096 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2097 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2098 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2099 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2100 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2101 ++SymbolicRDIVindependence; 2102 return true; 2103 } 2104 } 2105 // make sure that c2 - c1 <= 0 2106 if (SE->isKnownPositive(C2_C1)) { 2107 ++SymbolicRDIVindependence; 2108 return true; 2109 } 2110 } 2111 else if (SE->isKnownNonPositive(A2)) { 2112 // a1 <= 0 && a2 <= 0 2113 if (N1) { 2114 // make sure that a1*N1 <= c2 - c1 2115 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2116 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2117 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2118 ++SymbolicRDIVindependence; 2119 return true; 2120 } 2121 } 2122 if (N2) { 2123 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2124 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2125 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2126 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2127 ++SymbolicRDIVindependence; 2128 return true; 2129 } 2130 } 2131 } 2132 } 2133 return false; 2134 } 2135 2136 2137 // testSIV - 2138 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2139 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2140 // a2 are constant, we attack it with an SIV test. While they can all be 2141 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2142 // they apply; they're cheaper and sometimes more precise. 2143 // 2144 // Return true if dependence disproved. 2145 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2146 FullDependence &Result, Constraint &NewConstraint, 2147 const SCEV *&SplitIter) const { 2148 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2149 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2150 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2151 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2152 if (SrcAddRec && DstAddRec) { 2153 const SCEV *SrcConst = SrcAddRec->getStart(); 2154 const SCEV *DstConst = DstAddRec->getStart(); 2155 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2156 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2157 const Loop *CurLoop = SrcAddRec->getLoop(); 2158 assert(CurLoop == DstAddRec->getLoop() && 2159 "both loops in SIV should be same"); 2160 Level = mapSrcLoop(CurLoop); 2161 bool disproven; 2162 if (SrcCoeff == DstCoeff) 2163 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2164 Level, Result, NewConstraint); 2165 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2166 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2167 Level, Result, NewConstraint, SplitIter); 2168 else 2169 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2170 Level, Result, NewConstraint); 2171 return disproven || 2172 gcdMIVtest(Src, Dst, Result) || 2173 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2174 } 2175 if (SrcAddRec) { 2176 const SCEV *SrcConst = SrcAddRec->getStart(); 2177 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2178 const SCEV *DstConst = Dst; 2179 const Loop *CurLoop = SrcAddRec->getLoop(); 2180 Level = mapSrcLoop(CurLoop); 2181 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2182 Level, Result, NewConstraint) || 2183 gcdMIVtest(Src, Dst, Result); 2184 } 2185 if (DstAddRec) { 2186 const SCEV *DstConst = DstAddRec->getStart(); 2187 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2188 const SCEV *SrcConst = Src; 2189 const Loop *CurLoop = DstAddRec->getLoop(); 2190 Level = mapDstLoop(CurLoop); 2191 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2192 CurLoop, Level, Result, NewConstraint) || 2193 gcdMIVtest(Src, Dst, Result); 2194 } 2195 llvm_unreachable("SIV test expected at least one AddRec"); 2196 return false; 2197 } 2198 2199 2200 // testRDIV - 2201 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2202 // where i and j are induction variables, c1 and c2 are loop invariant, 2203 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2204 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2205 // It doesn't make sense to talk about distance or direction in this case, 2206 // so there's no point in making special versions of the Strong SIV test or 2207 // the Weak-crossing SIV test. 2208 // 2209 // With minor algebra, this test can also be used for things like 2210 // [c1 + a1*i + a2*j][c2]. 2211 // 2212 // Return true if dependence disproved. 2213 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2214 FullDependence &Result) const { 2215 // we have 3 possible situations here: 2216 // 1) [a*i + b] and [c*j + d] 2217 // 2) [a*i + c*j + b] and [d] 2218 // 3) [b] and [a*i + c*j + d] 2219 // We need to find what we've got and get organized 2220 2221 const SCEV *SrcConst, *DstConst; 2222 const SCEV *SrcCoeff, *DstCoeff; 2223 const Loop *SrcLoop, *DstLoop; 2224 2225 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2226 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2227 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2228 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2229 if (SrcAddRec && DstAddRec) { 2230 SrcConst = SrcAddRec->getStart(); 2231 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2232 SrcLoop = SrcAddRec->getLoop(); 2233 DstConst = DstAddRec->getStart(); 2234 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2235 DstLoop = DstAddRec->getLoop(); 2236 } 2237 else if (SrcAddRec) { 2238 if (const SCEVAddRecExpr *tmpAddRec = 2239 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2240 SrcConst = tmpAddRec->getStart(); 2241 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2242 SrcLoop = tmpAddRec->getLoop(); 2243 DstConst = Dst; 2244 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2245 DstLoop = SrcAddRec->getLoop(); 2246 } 2247 else 2248 llvm_unreachable("RDIV reached by surprising SCEVs"); 2249 } 2250 else if (DstAddRec) { 2251 if (const SCEVAddRecExpr *tmpAddRec = 2252 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2253 DstConst = tmpAddRec->getStart(); 2254 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2255 DstLoop = tmpAddRec->getLoop(); 2256 SrcConst = Src; 2257 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2258 SrcLoop = DstAddRec->getLoop(); 2259 } 2260 else 2261 llvm_unreachable("RDIV reached by surprising SCEVs"); 2262 } 2263 else 2264 llvm_unreachable("RDIV expected at least one AddRec"); 2265 return exactRDIVtest(SrcCoeff, DstCoeff, 2266 SrcConst, DstConst, 2267 SrcLoop, DstLoop, 2268 Result) || 2269 gcdMIVtest(Src, Dst, Result) || 2270 symbolicRDIVtest(SrcCoeff, DstCoeff, 2271 SrcConst, DstConst, 2272 SrcLoop, DstLoop); 2273 } 2274 2275 2276 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2277 // Return true if dependence disproved. 2278 // Can sometimes refine direction vectors. 2279 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2280 const SmallBitVector &Loops, 2281 FullDependence &Result) const { 2282 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2283 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2284 Result.Consistent = false; 2285 return gcdMIVtest(Src, Dst, Result) || 2286 banerjeeMIVtest(Src, Dst, Loops, Result); 2287 } 2288 2289 2290 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2291 // in this case 10. If there is no constant part, returns NULL. 2292 static 2293 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2294 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2295 return Constant; 2296 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2297 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2298 return Constant; 2299 return nullptr; 2300 } 2301 2302 2303 //===----------------------------------------------------------------------===// 2304 // gcdMIVtest - 2305 // Tests an MIV subscript pair for dependence. 2306 // Returns true if any possible dependence is disproved. 2307 // Marks the result as inconsistent. 2308 // Can sometimes disprove the equal direction for 1 or more loops, 2309 // as discussed in Michael Wolfe's book, 2310 // High Performance Compilers for Parallel Computing, page 235. 2311 // 2312 // We spend some effort (code!) to handle cases like 2313 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2314 // but M and N are just loop-invariant variables. 2315 // This should help us handle linearized subscripts; 2316 // also makes this test a useful backup to the various SIV tests. 2317 // 2318 // It occurs to me that the presence of loop-invariant variables 2319 // changes the nature of the test from "greatest common divisor" 2320 // to "a common divisor". 2321 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2322 FullDependence &Result) const { 2323 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2324 ++GCDapplications; 2325 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2326 APInt RunningGCD = APInt::getNullValue(BitWidth); 2327 2328 // Examine Src coefficients. 2329 // Compute running GCD and record source constant. 2330 // Because we're looking for the constant at the end of the chain, 2331 // we can't quit the loop just because the GCD == 1. 2332 const SCEV *Coefficients = Src; 2333 while (const SCEVAddRecExpr *AddRec = 2334 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2335 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2336 // If the coefficient is the product of a constant and other stuff, 2337 // we can use the constant in the GCD computation. 2338 const auto *Constant = getConstantPart(Coeff); 2339 if (!Constant) 2340 return false; 2341 APInt ConstCoeff = Constant->getAPInt(); 2342 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2343 Coefficients = AddRec->getStart(); 2344 } 2345 const SCEV *SrcConst = Coefficients; 2346 2347 // Examine Dst coefficients. 2348 // Compute running GCD and record destination constant. 2349 // Because we're looking for the constant at the end of the chain, 2350 // we can't quit the loop just because the GCD == 1. 2351 Coefficients = Dst; 2352 while (const SCEVAddRecExpr *AddRec = 2353 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2354 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2355 // If the coefficient is the product of a constant and other stuff, 2356 // we can use the constant in the GCD computation. 2357 const auto *Constant = getConstantPart(Coeff); 2358 if (!Constant) 2359 return false; 2360 APInt ConstCoeff = Constant->getAPInt(); 2361 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2362 Coefficients = AddRec->getStart(); 2363 } 2364 const SCEV *DstConst = Coefficients; 2365 2366 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2367 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2368 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2369 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2370 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2371 // If Delta is a sum of products, we may be able to make further progress. 2372 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2373 const SCEV *Operand = Sum->getOperand(Op); 2374 if (isa<SCEVConstant>(Operand)) { 2375 assert(!Constant && "Surprised to find multiple constants"); 2376 Constant = cast<SCEVConstant>(Operand); 2377 } 2378 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2379 // Search for constant operand to participate in GCD; 2380 // If none found; return false. 2381 const SCEVConstant *ConstOp = getConstantPart(Product); 2382 if (!ConstOp) 2383 return false; 2384 APInt ConstOpValue = ConstOp->getAPInt(); 2385 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2386 ConstOpValue.abs()); 2387 } 2388 else 2389 return false; 2390 } 2391 } 2392 if (!Constant) 2393 return false; 2394 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2395 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2396 if (ConstDelta == 0) 2397 return false; 2398 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2399 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2400 APInt Remainder = ConstDelta.srem(RunningGCD); 2401 if (Remainder != 0) { 2402 ++GCDindependence; 2403 return true; 2404 } 2405 2406 // Try to disprove equal directions. 2407 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2408 // the code above can't disprove the dependence because the GCD = 1. 2409 // So we consider what happen if i = i' and what happens if j = j'. 2410 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2411 // which is infeasible, so we can disallow the = direction for the i level. 2412 // Setting j = j' doesn't help matters, so we end up with a direction vector 2413 // of [<>, *] 2414 // 2415 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2416 // we need to remember that the constant part is 5 and the RunningGCD should 2417 // be initialized to ExtraGCD = 30. 2418 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2419 2420 bool Improved = false; 2421 Coefficients = Src; 2422 while (const SCEVAddRecExpr *AddRec = 2423 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2424 Coefficients = AddRec->getStart(); 2425 const Loop *CurLoop = AddRec->getLoop(); 2426 RunningGCD = ExtraGCD; 2427 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2428 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2429 const SCEV *Inner = Src; 2430 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2431 AddRec = cast<SCEVAddRecExpr>(Inner); 2432 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2433 if (CurLoop == AddRec->getLoop()) 2434 ; // SrcCoeff == Coeff 2435 else { 2436 // If the coefficient is the product of a constant and other stuff, 2437 // we can use the constant in the GCD computation. 2438 Constant = getConstantPart(Coeff); 2439 if (!Constant) 2440 return false; 2441 APInt ConstCoeff = Constant->getAPInt(); 2442 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2443 } 2444 Inner = AddRec->getStart(); 2445 } 2446 Inner = Dst; 2447 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2448 AddRec = cast<SCEVAddRecExpr>(Inner); 2449 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2450 if (CurLoop == AddRec->getLoop()) 2451 DstCoeff = Coeff; 2452 else { 2453 // If the coefficient is the product of a constant and other stuff, 2454 // we can use the constant in the GCD computation. 2455 Constant = getConstantPart(Coeff); 2456 if (!Constant) 2457 return false; 2458 APInt ConstCoeff = Constant->getAPInt(); 2459 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2460 } 2461 Inner = AddRec->getStart(); 2462 } 2463 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2464 // If the coefficient is the product of a constant and other stuff, 2465 // we can use the constant in the GCD computation. 2466 Constant = getConstantPart(Delta); 2467 if (!Constant) 2468 // The difference of the two coefficients might not be a product 2469 // or constant, in which case we give up on this direction. 2470 continue; 2471 APInt ConstCoeff = Constant->getAPInt(); 2472 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2473 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2474 if (RunningGCD != 0) { 2475 Remainder = ConstDelta.srem(RunningGCD); 2476 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2477 if (Remainder != 0) { 2478 unsigned Level = mapSrcLoop(CurLoop); 2479 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2480 Improved = true; 2481 } 2482 } 2483 } 2484 if (Improved) 2485 ++GCDsuccesses; 2486 LLVM_DEBUG(dbgs() << "all done\n"); 2487 return false; 2488 } 2489 2490 2491 //===----------------------------------------------------------------------===// 2492 // banerjeeMIVtest - 2493 // Use Banerjee's Inequalities to test an MIV subscript pair. 2494 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2495 // Generally follows the discussion in Section 2.5.2 of 2496 // 2497 // Optimizing Supercompilers for Supercomputers 2498 // Michael Wolfe 2499 // 2500 // The inequalities given on page 25 are simplified in that loops are 2501 // normalized so that the lower bound is always 0 and the stride is always 1. 2502 // For example, Wolfe gives 2503 // 2504 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2505 // 2506 // where A_k is the coefficient of the kth index in the source subscript, 2507 // B_k is the coefficient of the kth index in the destination subscript, 2508 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2509 // index, and N_k is the stride of the kth index. Since all loops are normalized 2510 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2511 // equation to 2512 // 2513 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2514 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2515 // 2516 // Similar simplifications are possible for the other equations. 2517 // 2518 // When we can't determine the number of iterations for a loop, 2519 // we use NULL as an indicator for the worst case, infinity. 2520 // When computing the upper bound, NULL denotes +inf; 2521 // for the lower bound, NULL denotes -inf. 2522 // 2523 // Return true if dependence disproved. 2524 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2525 const SmallBitVector &Loops, 2526 FullDependence &Result) const { 2527 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2528 ++BanerjeeApplications; 2529 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2530 const SCEV *A0; 2531 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2532 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2533 const SCEV *B0; 2534 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2535 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2536 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2537 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2538 2539 // Compute bounds for all the * directions. 2540 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2541 for (unsigned K = 1; K <= MaxLevels; ++K) { 2542 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2543 Bound[K].Direction = Dependence::DVEntry::ALL; 2544 Bound[K].DirSet = Dependence::DVEntry::NONE; 2545 findBoundsALL(A, B, Bound, K); 2546 #ifndef NDEBUG 2547 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2548 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2549 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2550 else 2551 LLVM_DEBUG(dbgs() << "-inf\t"); 2552 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2553 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2554 else 2555 LLVM_DEBUG(dbgs() << "+inf\n"); 2556 #endif 2557 } 2558 2559 // Test the *, *, *, ... case. 2560 bool Disproved = false; 2561 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2562 // Explore the direction vector hierarchy. 2563 unsigned DepthExpanded = 0; 2564 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2565 Loops, DepthExpanded, Delta); 2566 if (NewDeps > 0) { 2567 bool Improved = false; 2568 for (unsigned K = 1; K <= CommonLevels; ++K) { 2569 if (Loops[K]) { 2570 unsigned Old = Result.DV[K - 1].Direction; 2571 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2572 Improved |= Old != Result.DV[K - 1].Direction; 2573 if (!Result.DV[K - 1].Direction) { 2574 Improved = false; 2575 Disproved = true; 2576 break; 2577 } 2578 } 2579 } 2580 if (Improved) 2581 ++BanerjeeSuccesses; 2582 } 2583 else { 2584 ++BanerjeeIndependence; 2585 Disproved = true; 2586 } 2587 } 2588 else { 2589 ++BanerjeeIndependence; 2590 Disproved = true; 2591 } 2592 delete [] Bound; 2593 delete [] A; 2594 delete [] B; 2595 return Disproved; 2596 } 2597 2598 2599 // Hierarchically expands the direction vector 2600 // search space, combining the directions of discovered dependences 2601 // in the DirSet field of Bound. Returns the number of distinct 2602 // dependences discovered. If the dependence is disproved, 2603 // it will return 0. 2604 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2605 CoefficientInfo *B, BoundInfo *Bound, 2606 const SmallBitVector &Loops, 2607 unsigned &DepthExpanded, 2608 const SCEV *Delta) const { 2609 if (Level > CommonLevels) { 2610 // record result 2611 LLVM_DEBUG(dbgs() << "\t["); 2612 for (unsigned K = 1; K <= CommonLevels; ++K) { 2613 if (Loops[K]) { 2614 Bound[K].DirSet |= Bound[K].Direction; 2615 #ifndef NDEBUG 2616 switch (Bound[K].Direction) { 2617 case Dependence::DVEntry::LT: 2618 LLVM_DEBUG(dbgs() << " <"); 2619 break; 2620 case Dependence::DVEntry::EQ: 2621 LLVM_DEBUG(dbgs() << " ="); 2622 break; 2623 case Dependence::DVEntry::GT: 2624 LLVM_DEBUG(dbgs() << " >"); 2625 break; 2626 case Dependence::DVEntry::ALL: 2627 LLVM_DEBUG(dbgs() << " *"); 2628 break; 2629 default: 2630 llvm_unreachable("unexpected Bound[K].Direction"); 2631 } 2632 #endif 2633 } 2634 } 2635 LLVM_DEBUG(dbgs() << " ]\n"); 2636 return 1; 2637 } 2638 if (Loops[Level]) { 2639 if (Level > DepthExpanded) { 2640 DepthExpanded = Level; 2641 // compute bounds for <, =, > at current level 2642 findBoundsLT(A, B, Bound, Level); 2643 findBoundsGT(A, B, Bound, Level); 2644 findBoundsEQ(A, B, Bound, Level); 2645 #ifndef NDEBUG 2646 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2647 LLVM_DEBUG(dbgs() << "\t <\t"); 2648 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2649 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2650 << '\t'); 2651 else 2652 LLVM_DEBUG(dbgs() << "-inf\t"); 2653 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2654 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2655 << '\n'); 2656 else 2657 LLVM_DEBUG(dbgs() << "+inf\n"); 2658 LLVM_DEBUG(dbgs() << "\t =\t"); 2659 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2660 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2661 << '\t'); 2662 else 2663 LLVM_DEBUG(dbgs() << "-inf\t"); 2664 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2665 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2666 << '\n'); 2667 else 2668 LLVM_DEBUG(dbgs() << "+inf\n"); 2669 LLVM_DEBUG(dbgs() << "\t >\t"); 2670 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2671 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2672 << '\t'); 2673 else 2674 LLVM_DEBUG(dbgs() << "-inf\t"); 2675 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2676 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2677 << '\n'); 2678 else 2679 LLVM_DEBUG(dbgs() << "+inf\n"); 2680 #endif 2681 } 2682 2683 unsigned NewDeps = 0; 2684 2685 // test bounds for <, *, *, ... 2686 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2687 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2688 Loops, DepthExpanded, Delta); 2689 2690 // Test bounds for =, *, *, ... 2691 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2692 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2693 Loops, DepthExpanded, Delta); 2694 2695 // test bounds for >, *, *, ... 2696 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2697 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2698 Loops, DepthExpanded, Delta); 2699 2700 Bound[Level].Direction = Dependence::DVEntry::ALL; 2701 return NewDeps; 2702 } 2703 else 2704 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2705 } 2706 2707 2708 // Returns true iff the current bounds are plausible. 2709 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2710 BoundInfo *Bound, const SCEV *Delta) const { 2711 Bound[Level].Direction = DirKind; 2712 if (const SCEV *LowerBound = getLowerBound(Bound)) 2713 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2714 return false; 2715 if (const SCEV *UpperBound = getUpperBound(Bound)) 2716 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2717 return false; 2718 return true; 2719 } 2720 2721 2722 // Computes the upper and lower bounds for level K 2723 // using the * direction. Records them in Bound. 2724 // Wolfe gives the equations 2725 // 2726 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2727 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2728 // 2729 // Since we normalize loops, we can simplify these equations to 2730 // 2731 // LB^*_k = (A^-_k - B^+_k)U_k 2732 // UB^*_k = (A^+_k - B^-_k)U_k 2733 // 2734 // We must be careful to handle the case where the upper bound is unknown. 2735 // Note that the lower bound is always <= 0 2736 // and the upper bound is always >= 0. 2737 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2738 BoundInfo *Bound, unsigned K) const { 2739 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2740 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2741 if (Bound[K].Iterations) { 2742 Bound[K].Lower[Dependence::DVEntry::ALL] = 2743 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2744 Bound[K].Iterations); 2745 Bound[K].Upper[Dependence::DVEntry::ALL] = 2746 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2747 Bound[K].Iterations); 2748 } 2749 else { 2750 // If the difference is 0, we won't need to know the number of iterations. 2751 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2752 Bound[K].Lower[Dependence::DVEntry::ALL] = 2753 SE->getZero(A[K].Coeff->getType()); 2754 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2755 Bound[K].Upper[Dependence::DVEntry::ALL] = 2756 SE->getZero(A[K].Coeff->getType()); 2757 } 2758 } 2759 2760 2761 // Computes the upper and lower bounds for level K 2762 // using the = direction. Records them in Bound. 2763 // Wolfe gives the equations 2764 // 2765 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2766 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2767 // 2768 // Since we normalize loops, we can simplify these equations to 2769 // 2770 // LB^=_k = (A_k - B_k)^- U_k 2771 // UB^=_k = (A_k - B_k)^+ U_k 2772 // 2773 // We must be careful to handle the case where the upper bound is unknown. 2774 // Note that the lower bound is always <= 0 2775 // and the upper bound is always >= 0. 2776 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2777 BoundInfo *Bound, unsigned K) const { 2778 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2779 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2780 if (Bound[K].Iterations) { 2781 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2782 const SCEV *NegativePart = getNegativePart(Delta); 2783 Bound[K].Lower[Dependence::DVEntry::EQ] = 2784 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2785 const SCEV *PositivePart = getPositivePart(Delta); 2786 Bound[K].Upper[Dependence::DVEntry::EQ] = 2787 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2788 } 2789 else { 2790 // If the positive/negative part of the difference is 0, 2791 // we won't need to know the number of iterations. 2792 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2793 const SCEV *NegativePart = getNegativePart(Delta); 2794 if (NegativePart->isZero()) 2795 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2796 const SCEV *PositivePart = getPositivePart(Delta); 2797 if (PositivePart->isZero()) 2798 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2799 } 2800 } 2801 2802 2803 // Computes the upper and lower bounds for level K 2804 // using the < direction. Records them in Bound. 2805 // Wolfe gives the equations 2806 // 2807 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2808 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2809 // 2810 // Since we normalize loops, we can simplify these equations to 2811 // 2812 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2813 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2814 // 2815 // We must be careful to handle the case where the upper bound is unknown. 2816 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2817 BoundInfo *Bound, unsigned K) const { 2818 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2819 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2820 if (Bound[K].Iterations) { 2821 const SCEV *Iter_1 = SE->getMinusSCEV( 2822 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2823 const SCEV *NegPart = 2824 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2825 Bound[K].Lower[Dependence::DVEntry::LT] = 2826 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2827 const SCEV *PosPart = 2828 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2829 Bound[K].Upper[Dependence::DVEntry::LT] = 2830 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2831 } 2832 else { 2833 // If the positive/negative part of the difference is 0, 2834 // we won't need to know the number of iterations. 2835 const SCEV *NegPart = 2836 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2837 if (NegPart->isZero()) 2838 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2839 const SCEV *PosPart = 2840 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2841 if (PosPart->isZero()) 2842 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2843 } 2844 } 2845 2846 2847 // Computes the upper and lower bounds for level K 2848 // using the > direction. Records them in Bound. 2849 // Wolfe gives the equations 2850 // 2851 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2852 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2853 // 2854 // Since we normalize loops, we can simplify these equations to 2855 // 2856 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2857 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2858 // 2859 // We must be careful to handle the case where the upper bound is unknown. 2860 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2861 BoundInfo *Bound, unsigned K) const { 2862 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2863 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2864 if (Bound[K].Iterations) { 2865 const SCEV *Iter_1 = SE->getMinusSCEV( 2866 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2867 const SCEV *NegPart = 2868 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2869 Bound[K].Lower[Dependence::DVEntry::GT] = 2870 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2871 const SCEV *PosPart = 2872 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2873 Bound[K].Upper[Dependence::DVEntry::GT] = 2874 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2875 } 2876 else { 2877 // If the positive/negative part of the difference is 0, 2878 // we won't need to know the number of iterations. 2879 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2880 if (NegPart->isZero()) 2881 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2882 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2883 if (PosPart->isZero()) 2884 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2885 } 2886 } 2887 2888 2889 // X^+ = max(X, 0) 2890 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2891 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2892 } 2893 2894 2895 // X^- = min(X, 0) 2896 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2897 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2898 } 2899 2900 2901 // Walks through the subscript, 2902 // collecting each coefficient, the associated loop bounds, 2903 // and recording its positive and negative parts for later use. 2904 DependenceInfo::CoefficientInfo * 2905 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2906 const SCEV *&Constant) const { 2907 const SCEV *Zero = SE->getZero(Subscript->getType()); 2908 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2909 for (unsigned K = 1; K <= MaxLevels; ++K) { 2910 CI[K].Coeff = Zero; 2911 CI[K].PosPart = Zero; 2912 CI[K].NegPart = Zero; 2913 CI[K].Iterations = nullptr; 2914 } 2915 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2916 const Loop *L = AddRec->getLoop(); 2917 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2918 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2919 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2920 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2921 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2922 Subscript = AddRec->getStart(); 2923 } 2924 Constant = Subscript; 2925 #ifndef NDEBUG 2926 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2927 for (unsigned K = 1; K <= MaxLevels; ++K) { 2928 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2929 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2930 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2931 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2932 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2933 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2934 if (CI[K].Iterations) 2935 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2936 else 2937 LLVM_DEBUG(dbgs() << "+inf"); 2938 LLVM_DEBUG(dbgs() << '\n'); 2939 } 2940 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2941 #endif 2942 return CI; 2943 } 2944 2945 2946 // Looks through all the bounds info and 2947 // computes the lower bound given the current direction settings 2948 // at each level. If the lower bound for any level is -inf, 2949 // the result is -inf. 2950 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2951 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2952 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2953 if (Bound[K].Lower[Bound[K].Direction]) 2954 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2955 else 2956 Sum = nullptr; 2957 } 2958 return Sum; 2959 } 2960 2961 2962 // Looks through all the bounds info and 2963 // computes the upper bound given the current direction settings 2964 // at each level. If the upper bound at any level is +inf, 2965 // the result is +inf. 2966 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2967 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2968 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2969 if (Bound[K].Upper[Bound[K].Direction]) 2970 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2971 else 2972 Sum = nullptr; 2973 } 2974 return Sum; 2975 } 2976 2977 2978 //===----------------------------------------------------------------------===// 2979 // Constraint manipulation for Delta test. 2980 2981 // Given a linear SCEV, 2982 // return the coefficient (the step) 2983 // corresponding to the specified loop. 2984 // If there isn't one, return 0. 2985 // For example, given a*i + b*j + c*k, finding the coefficient 2986 // corresponding to the j loop would yield b. 2987 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2988 const Loop *TargetLoop) const { 2989 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2990 if (!AddRec) 2991 return SE->getZero(Expr->getType()); 2992 if (AddRec->getLoop() == TargetLoop) 2993 return AddRec->getStepRecurrence(*SE); 2994 return findCoefficient(AddRec->getStart(), TargetLoop); 2995 } 2996 2997 2998 // Given a linear SCEV, 2999 // return the SCEV given by zeroing out the coefficient 3000 // corresponding to the specified loop. 3001 // For example, given a*i + b*j + c*k, zeroing the coefficient 3002 // corresponding to the j loop would yield a*i + c*k. 3003 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3004 const Loop *TargetLoop) const { 3005 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3006 if (!AddRec) 3007 return Expr; // ignore 3008 if (AddRec->getLoop() == TargetLoop) 3009 return AddRec->getStart(); 3010 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3011 AddRec->getStepRecurrence(*SE), 3012 AddRec->getLoop(), 3013 AddRec->getNoWrapFlags()); 3014 } 3015 3016 3017 // Given a linear SCEV Expr, 3018 // return the SCEV given by adding some Value to the 3019 // coefficient corresponding to the specified TargetLoop. 3020 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3021 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3022 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3023 const Loop *TargetLoop, 3024 const SCEV *Value) const { 3025 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3026 if (!AddRec) // create a new addRec 3027 return SE->getAddRecExpr(Expr, 3028 Value, 3029 TargetLoop, 3030 SCEV::FlagAnyWrap); // Worst case, with no info. 3031 if (AddRec->getLoop() == TargetLoop) { 3032 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3033 if (Sum->isZero()) 3034 return AddRec->getStart(); 3035 return SE->getAddRecExpr(AddRec->getStart(), 3036 Sum, 3037 AddRec->getLoop(), 3038 AddRec->getNoWrapFlags()); 3039 } 3040 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3041 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3042 return SE->getAddRecExpr( 3043 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3044 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3045 AddRec->getNoWrapFlags()); 3046 } 3047 3048 3049 // Review the constraints, looking for opportunities 3050 // to simplify a subscript pair (Src and Dst). 3051 // Return true if some simplification occurs. 3052 // If the simplification isn't exact (that is, if it is conservative 3053 // in terms of dependence), set consistent to false. 3054 // Corresponds to Figure 5 from the paper 3055 // 3056 // Practical Dependence Testing 3057 // Goff, Kennedy, Tseng 3058 // PLDI 1991 3059 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3060 SmallBitVector &Loops, 3061 SmallVectorImpl<Constraint> &Constraints, 3062 bool &Consistent) { 3063 bool Result = false; 3064 for (unsigned LI : Loops.set_bits()) { 3065 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3066 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3067 if (Constraints[LI].isDistance()) 3068 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3069 else if (Constraints[LI].isLine()) 3070 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3071 else if (Constraints[LI].isPoint()) 3072 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3073 } 3074 return Result; 3075 } 3076 3077 3078 // Attempt to propagate a distance 3079 // constraint into a subscript pair (Src and Dst). 3080 // Return true if some simplification occurs. 3081 // If the simplification isn't exact (that is, if it is conservative 3082 // in terms of dependence), set consistent to false. 3083 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3084 Constraint &CurConstraint, 3085 bool &Consistent) { 3086 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3087 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3088 const SCEV *A_K = findCoefficient(Src, CurLoop); 3089 if (A_K->isZero()) 3090 return false; 3091 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3092 Src = SE->getMinusSCEV(Src, DA_K); 3093 Src = zeroCoefficient(Src, CurLoop); 3094 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3095 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3096 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3097 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3098 if (!findCoefficient(Dst, CurLoop)->isZero()) 3099 Consistent = false; 3100 return true; 3101 } 3102 3103 3104 // Attempt to propagate a line 3105 // constraint into a subscript pair (Src and Dst). 3106 // Return true if some simplification occurs. 3107 // If the simplification isn't exact (that is, if it is conservative 3108 // in terms of dependence), set consistent to false. 3109 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3110 Constraint &CurConstraint, 3111 bool &Consistent) { 3112 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3113 const SCEV *A = CurConstraint.getA(); 3114 const SCEV *B = CurConstraint.getB(); 3115 const SCEV *C = CurConstraint.getC(); 3116 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3117 << "\n"); 3118 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3119 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3120 if (A->isZero()) { 3121 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3122 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3123 if (!Bconst || !Cconst) return false; 3124 APInt Beta = Bconst->getAPInt(); 3125 APInt Charlie = Cconst->getAPInt(); 3126 APInt CdivB = Charlie.sdiv(Beta); 3127 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3128 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3129 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3130 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3131 Dst = zeroCoefficient(Dst, CurLoop); 3132 if (!findCoefficient(Src, CurLoop)->isZero()) 3133 Consistent = false; 3134 } 3135 else if (B->isZero()) { 3136 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3137 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3138 if (!Aconst || !Cconst) return false; 3139 APInt Alpha = Aconst->getAPInt(); 3140 APInt Charlie = Cconst->getAPInt(); 3141 APInt CdivA = Charlie.sdiv(Alpha); 3142 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3143 const SCEV *A_K = findCoefficient(Src, CurLoop); 3144 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3145 Src = zeroCoefficient(Src, CurLoop); 3146 if (!findCoefficient(Dst, CurLoop)->isZero()) 3147 Consistent = false; 3148 } 3149 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3150 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3151 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3152 if (!Aconst || !Cconst) return false; 3153 APInt Alpha = Aconst->getAPInt(); 3154 APInt Charlie = Cconst->getAPInt(); 3155 APInt CdivA = Charlie.sdiv(Alpha); 3156 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3157 const SCEV *A_K = findCoefficient(Src, CurLoop); 3158 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3159 Src = zeroCoefficient(Src, CurLoop); 3160 Dst = addToCoefficient(Dst, CurLoop, A_K); 3161 if (!findCoefficient(Dst, CurLoop)->isZero()) 3162 Consistent = false; 3163 } 3164 else { 3165 // paper is incorrect here, or perhaps just misleading 3166 const SCEV *A_K = findCoefficient(Src, CurLoop); 3167 Src = SE->getMulExpr(Src, A); 3168 Dst = SE->getMulExpr(Dst, A); 3169 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3170 Src = zeroCoefficient(Src, CurLoop); 3171 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3172 if (!findCoefficient(Dst, CurLoop)->isZero()) 3173 Consistent = false; 3174 } 3175 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3176 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3177 return true; 3178 } 3179 3180 3181 // Attempt to propagate a point 3182 // constraint into a subscript pair (Src and Dst). 3183 // Return true if some simplification occurs. 3184 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3185 Constraint &CurConstraint) { 3186 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3187 const SCEV *A_K = findCoefficient(Src, CurLoop); 3188 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3189 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3190 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3191 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3192 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3193 Src = zeroCoefficient(Src, CurLoop); 3194 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3195 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3196 Dst = zeroCoefficient(Dst, CurLoop); 3197 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3198 return true; 3199 } 3200 3201 3202 // Update direction vector entry based on the current constraint. 3203 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3204 const Constraint &CurConstraint) const { 3205 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3206 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3207 if (CurConstraint.isAny()) 3208 ; // use defaults 3209 else if (CurConstraint.isDistance()) { 3210 // this one is consistent, the others aren't 3211 Level.Scalar = false; 3212 Level.Distance = CurConstraint.getD(); 3213 unsigned NewDirection = Dependence::DVEntry::NONE; 3214 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3215 NewDirection = Dependence::DVEntry::EQ; 3216 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3217 NewDirection |= Dependence::DVEntry::LT; 3218 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3219 NewDirection |= Dependence::DVEntry::GT; 3220 Level.Direction &= NewDirection; 3221 } 3222 else if (CurConstraint.isLine()) { 3223 Level.Scalar = false; 3224 Level.Distance = nullptr; 3225 // direction should be accurate 3226 } 3227 else if (CurConstraint.isPoint()) { 3228 Level.Scalar = false; 3229 Level.Distance = nullptr; 3230 unsigned NewDirection = Dependence::DVEntry::NONE; 3231 if (!isKnownPredicate(CmpInst::ICMP_NE, 3232 CurConstraint.getY(), 3233 CurConstraint.getX())) 3234 // if X may be = Y 3235 NewDirection |= Dependence::DVEntry::EQ; 3236 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3237 CurConstraint.getY(), 3238 CurConstraint.getX())) 3239 // if Y may be > X 3240 NewDirection |= Dependence::DVEntry::LT; 3241 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3242 CurConstraint.getY(), 3243 CurConstraint.getX())) 3244 // if Y may be < X 3245 NewDirection |= Dependence::DVEntry::GT; 3246 Level.Direction &= NewDirection; 3247 } 3248 else 3249 llvm_unreachable("constraint has unexpected kind"); 3250 } 3251 3252 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3253 /// source and destination array references are recurrences on a nested loop, 3254 /// this function flattens the nested recurrences into separate recurrences 3255 /// for each loop level. 3256 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3257 SmallVectorImpl<Subscript> &Pair) { 3258 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3259 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3260 Value *SrcPtr = getLoadStorePointerOperand(Src); 3261 Value *DstPtr = getLoadStorePointerOperand(Dst); 3262 3263 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3264 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3265 3266 // Below code mimics the code in Delinearization.cpp 3267 const SCEV *SrcAccessFn = 3268 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3269 const SCEV *DstAccessFn = 3270 SE->getSCEVAtScope(DstPtr, DstLoop); 3271 3272 const SCEVUnknown *SrcBase = 3273 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3274 const SCEVUnknown *DstBase = 3275 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3276 3277 if (!SrcBase || !DstBase || SrcBase != DstBase) 3278 return false; 3279 3280 const SCEV *ElementSize = SE->getElementSize(Src); 3281 if (ElementSize != SE->getElementSize(Dst)) 3282 return false; 3283 3284 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3285 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3286 3287 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3288 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3289 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3290 return false; 3291 3292 // First step: collect parametric terms in both array references. 3293 SmallVector<const SCEV *, 4> Terms; 3294 SE->collectParametricTerms(SrcAR, Terms); 3295 SE->collectParametricTerms(DstAR, Terms); 3296 3297 // Second step: find subscript sizes. 3298 SmallVector<const SCEV *, 4> Sizes; 3299 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3300 3301 // Third step: compute the access functions for each subscript. 3302 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3303 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3304 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3305 3306 // Fail when there is only a subscript: that's a linearized access function. 3307 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3308 SrcSubscripts.size() != DstSubscripts.size()) 3309 return false; 3310 3311 int size = SrcSubscripts.size(); 3312 3313 // Statically check that the array bounds are in-range. The first subscript we 3314 // don't have a size for and it cannot overflow into another subscript, so is 3315 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3316 // and dst. 3317 // FIXME: It may be better to record these sizes and add them as constraints 3318 // to the dependency checks. 3319 for (int i = 1; i < size; ++i) { 3320 if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) 3321 return false; 3322 3323 if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) 3324 return false; 3325 3326 if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) 3327 return false; 3328 3329 if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) 3330 return false; 3331 } 3332 3333 LLVM_DEBUG({ 3334 dbgs() << "\nSrcSubscripts: "; 3335 for (int i = 0; i < size; i++) 3336 dbgs() << *SrcSubscripts[i]; 3337 dbgs() << "\nDstSubscripts: "; 3338 for (int i = 0; i < size; i++) 3339 dbgs() << *DstSubscripts[i]; 3340 }); 3341 3342 // The delinearization transforms a single-subscript MIV dependence test into 3343 // a multi-subscript SIV dependence test that is easier to compute. So we 3344 // resize Pair to contain as many pairs of subscripts as the delinearization 3345 // has found, and then initialize the pairs following the delinearization. 3346 Pair.resize(size); 3347 for (int i = 0; i < size; ++i) { 3348 Pair[i].Src = SrcSubscripts[i]; 3349 Pair[i].Dst = DstSubscripts[i]; 3350 unifySubscriptType(&Pair[i]); 3351 } 3352 3353 return true; 3354 } 3355 3356 //===----------------------------------------------------------------------===// 3357 3358 #ifndef NDEBUG 3359 // For debugging purposes, dump a small bit vector to dbgs(). 3360 static void dumpSmallBitVector(SmallBitVector &BV) { 3361 dbgs() << "{"; 3362 for (unsigned VI : BV.set_bits()) { 3363 dbgs() << VI; 3364 if (BV.find_next(VI) >= 0) 3365 dbgs() << ' '; 3366 } 3367 dbgs() << "}\n"; 3368 } 3369 #endif 3370 3371 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3372 FunctionAnalysisManager::Invalidator &Inv) { 3373 // Check if the analysis itself has been invalidated. 3374 auto PAC = PA.getChecker<DependenceAnalysis>(); 3375 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3376 return true; 3377 3378 // Check transitive dependencies. 3379 return Inv.invalidate<AAManager>(F, PA) || 3380 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3381 Inv.invalidate<LoopAnalysis>(F, PA); 3382 } 3383 3384 // depends - 3385 // Returns NULL if there is no dependence. 3386 // Otherwise, return a Dependence with as many details as possible. 3387 // Corresponds to Section 3.1 in the paper 3388 // 3389 // Practical Dependence Testing 3390 // Goff, Kennedy, Tseng 3391 // PLDI 1991 3392 // 3393 // Care is required to keep the routine below, getSplitIteration(), 3394 // up to date with respect to this routine. 3395 std::unique_ptr<Dependence> 3396 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3397 bool PossiblyLoopIndependent) { 3398 if (Src == Dst) 3399 PossiblyLoopIndependent = false; 3400 3401 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3402 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3403 // if both instructions don't reference memory, there's no dependence 3404 return nullptr; 3405 3406 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3407 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3408 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3409 return make_unique<Dependence>(Src, Dst); 3410 } 3411 3412 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3413 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3414 Value *SrcPtr = getLoadStorePointerOperand(Src); 3415 Value *DstPtr = getLoadStorePointerOperand(Dst); 3416 3417 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3418 MemoryLocation::get(Dst), 3419 MemoryLocation::get(Src))) { 3420 case MayAlias: 3421 case PartialAlias: 3422 // cannot analyse objects if we don't understand their aliasing. 3423 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3424 return make_unique<Dependence>(Src, Dst); 3425 case NoAlias: 3426 // If the objects noalias, they are distinct, accesses are independent. 3427 LLVM_DEBUG(dbgs() << "no alias\n"); 3428 return nullptr; 3429 case MustAlias: 3430 break; // The underlying objects alias; test accesses for dependence. 3431 } 3432 3433 // establish loop nesting levels 3434 establishNestingLevels(Src, Dst); 3435 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3436 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3437 3438 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3439 ++TotalArrayPairs; 3440 3441 unsigned Pairs = 1; 3442 SmallVector<Subscript, 2> Pair(Pairs); 3443 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3444 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3445 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3446 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3447 Pair[0].Src = SrcSCEV; 3448 Pair[0].Dst = DstSCEV; 3449 3450 if (Delinearize) { 3451 if (tryDelinearize(Src, Dst, Pair)) { 3452 LLVM_DEBUG(dbgs() << " delinearized\n"); 3453 Pairs = Pair.size(); 3454 } 3455 } 3456 3457 for (unsigned P = 0; P < Pairs; ++P) { 3458 Pair[P].Loops.resize(MaxLevels + 1); 3459 Pair[P].GroupLoops.resize(MaxLevels + 1); 3460 Pair[P].Group.resize(Pairs); 3461 removeMatchingExtensions(&Pair[P]); 3462 Pair[P].Classification = 3463 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3464 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3465 Pair[P].Loops); 3466 Pair[P].GroupLoops = Pair[P].Loops; 3467 Pair[P].Group.set(P); 3468 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3469 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3470 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3471 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3472 LLVM_DEBUG(dbgs() << "\tloops = "); 3473 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3474 } 3475 3476 SmallBitVector Separable(Pairs); 3477 SmallBitVector Coupled(Pairs); 3478 3479 // Partition subscripts into separable and minimally-coupled groups 3480 // Algorithm in paper is algorithmically better; 3481 // this may be faster in practice. Check someday. 3482 // 3483 // Here's an example of how it works. Consider this code: 3484 // 3485 // for (i = ...) { 3486 // for (j = ...) { 3487 // for (k = ...) { 3488 // for (l = ...) { 3489 // for (m = ...) { 3490 // A[i][j][k][m] = ...; 3491 // ... = A[0][j][l][i + j]; 3492 // } 3493 // } 3494 // } 3495 // } 3496 // } 3497 // 3498 // There are 4 subscripts here: 3499 // 0 [i] and [0] 3500 // 1 [j] and [j] 3501 // 2 [k] and [l] 3502 // 3 [m] and [i + j] 3503 // 3504 // We've already classified each subscript pair as ZIV, SIV, etc., 3505 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3506 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3507 // and set Pair[P].Group = {P}. 3508 // 3509 // Src Dst Classification Loops GroupLoops Group 3510 // 0 [i] [0] SIV {1} {1} {0} 3511 // 1 [j] [j] SIV {2} {2} {1} 3512 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3513 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3514 // 3515 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3516 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3517 // 3518 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3519 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3520 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3521 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3522 // to either Separable or Coupled). 3523 // 3524 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3525 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3526 // so Pair[3].Group = {0, 1, 3} and Done = false. 3527 // 3528 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3529 // Since Done remains true, we add 2 to the set of Separable pairs. 3530 // 3531 // Finally, we consider 3. There's nothing to compare it with, 3532 // so Done remains true and we add it to the Coupled set. 3533 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3534 // 3535 // In the end, we've got 1 separable subscript and 1 coupled group. 3536 for (unsigned SI = 0; SI < Pairs; ++SI) { 3537 if (Pair[SI].Classification == Subscript::NonLinear) { 3538 // ignore these, but collect loops for later 3539 ++NonlinearSubscriptPairs; 3540 collectCommonLoops(Pair[SI].Src, 3541 LI->getLoopFor(Src->getParent()), 3542 Pair[SI].Loops); 3543 collectCommonLoops(Pair[SI].Dst, 3544 LI->getLoopFor(Dst->getParent()), 3545 Pair[SI].Loops); 3546 Result.Consistent = false; 3547 } else if (Pair[SI].Classification == Subscript::ZIV) { 3548 // always separable 3549 Separable.set(SI); 3550 } 3551 else { 3552 // SIV, RDIV, or MIV, so check for coupled group 3553 bool Done = true; 3554 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3555 SmallBitVector Intersection = Pair[SI].GroupLoops; 3556 Intersection &= Pair[SJ].GroupLoops; 3557 if (Intersection.any()) { 3558 // accumulate set of all the loops in group 3559 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3560 // accumulate set of all subscripts in group 3561 Pair[SJ].Group |= Pair[SI].Group; 3562 Done = false; 3563 } 3564 } 3565 if (Done) { 3566 if (Pair[SI].Group.count() == 1) { 3567 Separable.set(SI); 3568 ++SeparableSubscriptPairs; 3569 } 3570 else { 3571 Coupled.set(SI); 3572 ++CoupledSubscriptPairs; 3573 } 3574 } 3575 } 3576 } 3577 3578 LLVM_DEBUG(dbgs() << " Separable = "); 3579 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3580 LLVM_DEBUG(dbgs() << " Coupled = "); 3581 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3582 3583 Constraint NewConstraint; 3584 NewConstraint.setAny(SE); 3585 3586 // test separable subscripts 3587 for (unsigned SI : Separable.set_bits()) { 3588 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3589 switch (Pair[SI].Classification) { 3590 case Subscript::ZIV: 3591 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3592 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3593 return nullptr; 3594 break; 3595 case Subscript::SIV: { 3596 LLVM_DEBUG(dbgs() << ", SIV\n"); 3597 unsigned Level; 3598 const SCEV *SplitIter = nullptr; 3599 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3600 SplitIter)) 3601 return nullptr; 3602 break; 3603 } 3604 case Subscript::RDIV: 3605 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3606 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3607 return nullptr; 3608 break; 3609 case Subscript::MIV: 3610 LLVM_DEBUG(dbgs() << ", MIV\n"); 3611 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3612 return nullptr; 3613 break; 3614 default: 3615 llvm_unreachable("subscript has unexpected classification"); 3616 } 3617 } 3618 3619 if (Coupled.count()) { 3620 // test coupled subscript groups 3621 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3622 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3623 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3624 for (unsigned II = 0; II <= MaxLevels; ++II) 3625 Constraints[II].setAny(SE); 3626 for (unsigned SI : Coupled.set_bits()) { 3627 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3628 SmallBitVector Group(Pair[SI].Group); 3629 SmallBitVector Sivs(Pairs); 3630 SmallBitVector Mivs(Pairs); 3631 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3632 SmallVector<Subscript *, 4> PairsInGroup; 3633 for (unsigned SJ : Group.set_bits()) { 3634 LLVM_DEBUG(dbgs() << SJ << " "); 3635 if (Pair[SJ].Classification == Subscript::SIV) 3636 Sivs.set(SJ); 3637 else 3638 Mivs.set(SJ); 3639 PairsInGroup.push_back(&Pair[SJ]); 3640 } 3641 unifySubscriptType(PairsInGroup); 3642 LLVM_DEBUG(dbgs() << "}\n"); 3643 while (Sivs.any()) { 3644 bool Changed = false; 3645 for (unsigned SJ : Sivs.set_bits()) { 3646 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3647 // SJ is an SIV subscript that's part of the current coupled group 3648 unsigned Level; 3649 const SCEV *SplitIter = nullptr; 3650 LLVM_DEBUG(dbgs() << "SIV\n"); 3651 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3652 SplitIter)) 3653 return nullptr; 3654 ConstrainedLevels.set(Level); 3655 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3656 if (Constraints[Level].isEmpty()) { 3657 ++DeltaIndependence; 3658 return nullptr; 3659 } 3660 Changed = true; 3661 } 3662 Sivs.reset(SJ); 3663 } 3664 if (Changed) { 3665 // propagate, possibly creating new SIVs and ZIVs 3666 LLVM_DEBUG(dbgs() << " propagating\n"); 3667 LLVM_DEBUG(dbgs() << "\tMivs = "); 3668 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3669 for (unsigned SJ : Mivs.set_bits()) { 3670 // SJ is an MIV subscript that's part of the current coupled group 3671 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3672 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3673 Constraints, Result.Consistent)) { 3674 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3675 ++DeltaPropagations; 3676 Pair[SJ].Classification = 3677 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3678 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3679 Pair[SJ].Loops); 3680 switch (Pair[SJ].Classification) { 3681 case Subscript::ZIV: 3682 LLVM_DEBUG(dbgs() << "ZIV\n"); 3683 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3684 return nullptr; 3685 Mivs.reset(SJ); 3686 break; 3687 case Subscript::SIV: 3688 Sivs.set(SJ); 3689 Mivs.reset(SJ); 3690 break; 3691 case Subscript::RDIV: 3692 case Subscript::MIV: 3693 break; 3694 default: 3695 llvm_unreachable("bad subscript classification"); 3696 } 3697 } 3698 } 3699 } 3700 } 3701 3702 // test & propagate remaining RDIVs 3703 for (unsigned SJ : Mivs.set_bits()) { 3704 if (Pair[SJ].Classification == Subscript::RDIV) { 3705 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3706 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3707 return nullptr; 3708 // I don't yet understand how to propagate RDIV results 3709 Mivs.reset(SJ); 3710 } 3711 } 3712 3713 // test remaining MIVs 3714 // This code is temporary. 3715 // Better to somehow test all remaining subscripts simultaneously. 3716 for (unsigned SJ : Mivs.set_bits()) { 3717 if (Pair[SJ].Classification == Subscript::MIV) { 3718 LLVM_DEBUG(dbgs() << "MIV test\n"); 3719 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3720 return nullptr; 3721 } 3722 else 3723 llvm_unreachable("expected only MIV subscripts at this point"); 3724 } 3725 3726 // update Result.DV from constraint vector 3727 LLVM_DEBUG(dbgs() << " updating\n"); 3728 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3729 if (SJ > CommonLevels) 3730 break; 3731 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3732 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3733 return nullptr; 3734 } 3735 } 3736 } 3737 3738 // Make sure the Scalar flags are set correctly. 3739 SmallBitVector CompleteLoops(MaxLevels + 1); 3740 for (unsigned SI = 0; SI < Pairs; ++SI) 3741 CompleteLoops |= Pair[SI].Loops; 3742 for (unsigned II = 1; II <= CommonLevels; ++II) 3743 if (CompleteLoops[II]) 3744 Result.DV[II - 1].Scalar = false; 3745 3746 if (PossiblyLoopIndependent) { 3747 // Make sure the LoopIndependent flag is set correctly. 3748 // All directions must include equal, otherwise no 3749 // loop-independent dependence is possible. 3750 for (unsigned II = 1; II <= CommonLevels; ++II) { 3751 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3752 Result.LoopIndependent = false; 3753 break; 3754 } 3755 } 3756 } 3757 else { 3758 // On the other hand, if all directions are equal and there's no 3759 // loop-independent dependence possible, then no dependence exists. 3760 bool AllEqual = true; 3761 for (unsigned II = 1; II <= CommonLevels; ++II) { 3762 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3763 AllEqual = false; 3764 break; 3765 } 3766 } 3767 if (AllEqual) 3768 return nullptr; 3769 } 3770 3771 return make_unique<FullDependence>(std::move(Result)); 3772 } 3773 3774 3775 3776 //===----------------------------------------------------------------------===// 3777 // getSplitIteration - 3778 // Rather than spend rarely-used space recording the splitting iteration 3779 // during the Weak-Crossing SIV test, we re-compute it on demand. 3780 // The re-computation is basically a repeat of the entire dependence test, 3781 // though simplified since we know that the dependence exists. 3782 // It's tedious, since we must go through all propagations, etc. 3783 // 3784 // Care is required to keep this code up to date with respect to the routine 3785 // above, depends(). 3786 // 3787 // Generally, the dependence analyzer will be used to build 3788 // a dependence graph for a function (basically a map from instructions 3789 // to dependences). Looking for cycles in the graph shows us loops 3790 // that cannot be trivially vectorized/parallelized. 3791 // 3792 // We can try to improve the situation by examining all the dependences 3793 // that make up the cycle, looking for ones we can break. 3794 // Sometimes, peeling the first or last iteration of a loop will break 3795 // dependences, and we've got flags for those possibilities. 3796 // Sometimes, splitting a loop at some other iteration will do the trick, 3797 // and we've got a flag for that case. Rather than waste the space to 3798 // record the exact iteration (since we rarely know), we provide 3799 // a method that calculates the iteration. It's a drag that it must work 3800 // from scratch, but wonderful in that it's possible. 3801 // 3802 // Here's an example: 3803 // 3804 // for (i = 0; i < 10; i++) 3805 // A[i] = ... 3806 // ... = A[11 - i] 3807 // 3808 // There's a loop-carried flow dependence from the store to the load, 3809 // found by the weak-crossing SIV test. The dependence will have a flag, 3810 // indicating that the dependence can be broken by splitting the loop. 3811 // Calling getSplitIteration will return 5. 3812 // Splitting the loop breaks the dependence, like so: 3813 // 3814 // for (i = 0; i <= 5; i++) 3815 // A[i] = ... 3816 // ... = A[11 - i] 3817 // for (i = 6; i < 10; i++) 3818 // A[i] = ... 3819 // ... = A[11 - i] 3820 // 3821 // breaks the dependence and allows us to vectorize/parallelize 3822 // both loops. 3823 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3824 unsigned SplitLevel) { 3825 assert(Dep.isSplitable(SplitLevel) && 3826 "Dep should be splitable at SplitLevel"); 3827 Instruction *Src = Dep.getSrc(); 3828 Instruction *Dst = Dep.getDst(); 3829 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3830 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3831 assert(isLoadOrStore(Src)); 3832 assert(isLoadOrStore(Dst)); 3833 Value *SrcPtr = getLoadStorePointerOperand(Src); 3834 Value *DstPtr = getLoadStorePointerOperand(Dst); 3835 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3836 MemoryLocation::get(Dst), 3837 MemoryLocation::get(Src)) == MustAlias); 3838 3839 // establish loop nesting levels 3840 establishNestingLevels(Src, Dst); 3841 3842 FullDependence Result(Src, Dst, false, CommonLevels); 3843 3844 unsigned Pairs = 1; 3845 SmallVector<Subscript, 2> Pair(Pairs); 3846 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3847 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3848 Pair[0].Src = SrcSCEV; 3849 Pair[0].Dst = DstSCEV; 3850 3851 if (Delinearize) { 3852 if (tryDelinearize(Src, Dst, Pair)) { 3853 LLVM_DEBUG(dbgs() << " delinearized\n"); 3854 Pairs = Pair.size(); 3855 } 3856 } 3857 3858 for (unsigned P = 0; P < Pairs; ++P) { 3859 Pair[P].Loops.resize(MaxLevels + 1); 3860 Pair[P].GroupLoops.resize(MaxLevels + 1); 3861 Pair[P].Group.resize(Pairs); 3862 removeMatchingExtensions(&Pair[P]); 3863 Pair[P].Classification = 3864 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3865 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3866 Pair[P].Loops); 3867 Pair[P].GroupLoops = Pair[P].Loops; 3868 Pair[P].Group.set(P); 3869 } 3870 3871 SmallBitVector Separable(Pairs); 3872 SmallBitVector Coupled(Pairs); 3873 3874 // partition subscripts into separable and minimally-coupled groups 3875 for (unsigned SI = 0; SI < Pairs; ++SI) { 3876 if (Pair[SI].Classification == Subscript::NonLinear) { 3877 // ignore these, but collect loops for later 3878 collectCommonLoops(Pair[SI].Src, 3879 LI->getLoopFor(Src->getParent()), 3880 Pair[SI].Loops); 3881 collectCommonLoops(Pair[SI].Dst, 3882 LI->getLoopFor(Dst->getParent()), 3883 Pair[SI].Loops); 3884 Result.Consistent = false; 3885 } 3886 else if (Pair[SI].Classification == Subscript::ZIV) 3887 Separable.set(SI); 3888 else { 3889 // SIV, RDIV, or MIV, so check for coupled group 3890 bool Done = true; 3891 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3892 SmallBitVector Intersection = Pair[SI].GroupLoops; 3893 Intersection &= Pair[SJ].GroupLoops; 3894 if (Intersection.any()) { 3895 // accumulate set of all the loops in group 3896 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3897 // accumulate set of all subscripts in group 3898 Pair[SJ].Group |= Pair[SI].Group; 3899 Done = false; 3900 } 3901 } 3902 if (Done) { 3903 if (Pair[SI].Group.count() == 1) 3904 Separable.set(SI); 3905 else 3906 Coupled.set(SI); 3907 } 3908 } 3909 } 3910 3911 Constraint NewConstraint; 3912 NewConstraint.setAny(SE); 3913 3914 // test separable subscripts 3915 for (unsigned SI : Separable.set_bits()) { 3916 switch (Pair[SI].Classification) { 3917 case Subscript::SIV: { 3918 unsigned Level; 3919 const SCEV *SplitIter = nullptr; 3920 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3921 Result, NewConstraint, SplitIter); 3922 if (Level == SplitLevel) { 3923 assert(SplitIter != nullptr); 3924 return SplitIter; 3925 } 3926 break; 3927 } 3928 case Subscript::ZIV: 3929 case Subscript::RDIV: 3930 case Subscript::MIV: 3931 break; 3932 default: 3933 llvm_unreachable("subscript has unexpected classification"); 3934 } 3935 } 3936 3937 if (Coupled.count()) { 3938 // test coupled subscript groups 3939 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3940 for (unsigned II = 0; II <= MaxLevels; ++II) 3941 Constraints[II].setAny(SE); 3942 for (unsigned SI : Coupled.set_bits()) { 3943 SmallBitVector Group(Pair[SI].Group); 3944 SmallBitVector Sivs(Pairs); 3945 SmallBitVector Mivs(Pairs); 3946 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3947 for (unsigned SJ : Group.set_bits()) { 3948 if (Pair[SJ].Classification == Subscript::SIV) 3949 Sivs.set(SJ); 3950 else 3951 Mivs.set(SJ); 3952 } 3953 while (Sivs.any()) { 3954 bool Changed = false; 3955 for (unsigned SJ : Sivs.set_bits()) { 3956 // SJ is an SIV subscript that's part of the current coupled group 3957 unsigned Level; 3958 const SCEV *SplitIter = nullptr; 3959 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3960 Result, NewConstraint, SplitIter); 3961 if (Level == SplitLevel && SplitIter) 3962 return SplitIter; 3963 ConstrainedLevels.set(Level); 3964 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3965 Changed = true; 3966 Sivs.reset(SJ); 3967 } 3968 if (Changed) { 3969 // propagate, possibly creating new SIVs and ZIVs 3970 for (unsigned SJ : Mivs.set_bits()) { 3971 // SJ is an MIV subscript that's part of the current coupled group 3972 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3973 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3974 Pair[SJ].Classification = 3975 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3976 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3977 Pair[SJ].Loops); 3978 switch (Pair[SJ].Classification) { 3979 case Subscript::ZIV: 3980 Mivs.reset(SJ); 3981 break; 3982 case Subscript::SIV: 3983 Sivs.set(SJ); 3984 Mivs.reset(SJ); 3985 break; 3986 case Subscript::RDIV: 3987 case Subscript::MIV: 3988 break; 3989 default: 3990 llvm_unreachable("bad subscript classification"); 3991 } 3992 } 3993 } 3994 } 3995 } 3996 } 3997 } 3998 llvm_unreachable("somehow reached end of routine"); 3999 return nullptr; 4000 } 4001