1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 10 // accesses. Currently, it is an (incomplete) implementation of the approach 11 // described in 12 // 13 // Practical Dependence Testing 14 // Goff, Kennedy, Tseng 15 // PLDI 1991 16 // 17 // There's a single entry point that analyzes the dependence between a pair 18 // of memory references in a function, returning either NULL, for no dependence, 19 // or a more-or-less detailed description of the dependence between them. 20 // 21 // Currently, the implementation cannot propagate constraints between 22 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 23 // Both of these are conservative weaknesses; 24 // that is, not a source of correctness problems. 25 // 26 // Since Clang linearizes some array subscripts, the dependence 27 // analysis is using SCEV->delinearize to recover the representation of multiple 28 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 29 // delinearization is controlled by the flag -da-delinearize. 30 // 31 // We should pay some careful attention to the possibility of integer overflow 32 // in the implementation of the various tests. This could happen with Add, 33 // Subtract, or Multiply, with both APInt's and SCEV's. 34 // 35 // Some non-linear subscript pairs can be handled by the GCD test 36 // (and perhaps other tests). 37 // Should explore how often these things occur. 38 // 39 // Finally, it seems like certain test cases expose weaknesses in the SCEV 40 // simplification, especially in the handling of sign and zero extensions. 41 // It could be useful to spend time exploring these. 42 // 43 // Please note that this is work in progress and the interface is subject to 44 // change. 45 // 46 //===----------------------------------------------------------------------===// 47 // // 48 // In memory of Ken Kennedy, 1945 - 2007 // 49 // // 50 //===----------------------------------------------------------------------===// 51 52 #include "llvm/Analysis/DependenceAnalysis.h" 53 #include "llvm/ADT/STLExtras.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/LoopInfo.h" 57 #include "llvm/Analysis/ScalarEvolution.h" 58 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 59 #include "llvm/Analysis/ValueTracking.h" 60 #include "llvm/Config/llvm-config.h" 61 #include "llvm/IR/InstIterator.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/InitializePasses.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 static cl::opt<bool> DisableDelinearizationChecks( 114 "da-disable-delinearization-checks", cl::init(false), cl::Hidden, 115 cl::ZeroOrMore, 116 cl::desc( 117 "Disable checks that try to statically verify validity of " 118 "delinearized subscripts. Enabling this option may result in incorrect " 119 "dependence vectors for languages that allow the subscript of one " 120 "dimension to underflow or overflow into another dimension.")); 121 122 //===----------------------------------------------------------------------===// 123 // basics 124 125 DependenceAnalysis::Result 126 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 127 auto &AA = FAM.getResult<AAManager>(F); 128 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 129 auto &LI = FAM.getResult<LoopAnalysis>(F); 130 return DependenceInfo(&F, &AA, &SE, &LI); 131 } 132 133 AnalysisKey DependenceAnalysis::Key; 134 135 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 136 "Dependence Analysis", true, true) 137 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 138 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 139 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 140 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 141 true, true) 142 143 char DependenceAnalysisWrapperPass::ID = 0; 144 145 DependenceAnalysisWrapperPass::DependenceAnalysisWrapperPass() 146 : FunctionPass(ID) { 147 initializeDependenceAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); 148 } 149 150 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 151 return new DependenceAnalysisWrapperPass(); 152 } 153 154 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 155 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 156 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 157 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 158 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 159 return false; 160 } 161 162 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 163 164 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 165 166 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 167 AU.setPreservesAll(); 168 AU.addRequiredTransitive<AAResultsWrapperPass>(); 169 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 170 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 171 } 172 173 174 // Used to test the dependence analyzer. 175 // Looks through the function, noting loads and stores. 176 // Calls depends() on every possible pair and prints out the result. 177 // Ignores all other instructions. 178 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 179 auto *F = DA->getFunction(); 180 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 181 ++SrcI) { 182 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 183 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 184 DstI != DstE; ++DstI) { 185 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 186 OS << "da analyze - "; 187 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 188 D->dump(OS); 189 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 190 if (D->isSplitable(Level)) { 191 OS << "da analyze - split level = " << Level; 192 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 193 OS << "!\n"; 194 } 195 } 196 } 197 else 198 OS << "none!\n"; 199 } 200 } 201 } 202 } 203 } 204 205 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 206 const Module *) const { 207 dumpExampleDependence(OS, info.get()); 208 } 209 210 PreservedAnalyses 211 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 212 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 213 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 214 return PreservedAnalyses::all(); 215 } 216 217 //===----------------------------------------------------------------------===// 218 // Dependence methods 219 220 // Returns true if this is an input dependence. 221 bool Dependence::isInput() const { 222 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 223 } 224 225 226 // Returns true if this is an output dependence. 227 bool Dependence::isOutput() const { 228 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 229 } 230 231 232 // Returns true if this is an flow (aka true) dependence. 233 bool Dependence::isFlow() const { 234 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 235 } 236 237 238 // Returns true if this is an anti dependence. 239 bool Dependence::isAnti() const { 240 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 241 } 242 243 244 // Returns true if a particular level is scalar; that is, 245 // if no subscript in the source or destination mention the induction 246 // variable associated with the loop at this level. 247 // Leave this out of line, so it will serve as a virtual method anchor 248 bool Dependence::isScalar(unsigned level) const { 249 return false; 250 } 251 252 253 //===----------------------------------------------------------------------===// 254 // FullDependence methods 255 256 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 257 bool PossiblyLoopIndependent, 258 unsigned CommonLevels) 259 : Dependence(Source, Destination), Levels(CommonLevels), 260 LoopIndependent(PossiblyLoopIndependent) { 261 Consistent = true; 262 if (CommonLevels) 263 DV = std::make_unique<DVEntry[]>(CommonLevels); 264 } 265 266 // The rest are simple getters that hide the implementation. 267 268 // getDirection - Returns the direction associated with a particular level. 269 unsigned FullDependence::getDirection(unsigned Level) const { 270 assert(0 < Level && Level <= Levels && "Level out of range"); 271 return DV[Level - 1].Direction; 272 } 273 274 275 // Returns the distance (or NULL) associated with a particular level. 276 const SCEV *FullDependence::getDistance(unsigned Level) const { 277 assert(0 < Level && Level <= Levels && "Level out of range"); 278 return DV[Level - 1].Distance; 279 } 280 281 282 // Returns true if a particular level is scalar; that is, 283 // if no subscript in the source or destination mention the induction 284 // variable associated with the loop at this level. 285 bool FullDependence::isScalar(unsigned Level) const { 286 assert(0 < Level && Level <= Levels && "Level out of range"); 287 return DV[Level - 1].Scalar; 288 } 289 290 291 // Returns true if peeling the first iteration from this loop 292 // will break this dependence. 293 bool FullDependence::isPeelFirst(unsigned Level) const { 294 assert(0 < Level && Level <= Levels && "Level out of range"); 295 return DV[Level - 1].PeelFirst; 296 } 297 298 299 // Returns true if peeling the last iteration from this loop 300 // will break this dependence. 301 bool FullDependence::isPeelLast(unsigned Level) const { 302 assert(0 < Level && Level <= Levels && "Level out of range"); 303 return DV[Level - 1].PeelLast; 304 } 305 306 307 // Returns true if splitting this loop will break the dependence. 308 bool FullDependence::isSplitable(unsigned Level) const { 309 assert(0 < Level && Level <= Levels && "Level out of range"); 310 return DV[Level - 1].Splitable; 311 } 312 313 314 //===----------------------------------------------------------------------===// 315 // DependenceInfo::Constraint methods 316 317 // If constraint is a point <X, Y>, returns X. 318 // Otherwise assert. 319 const SCEV *DependenceInfo::Constraint::getX() const { 320 assert(Kind == Point && "Kind should be Point"); 321 return A; 322 } 323 324 325 // If constraint is a point <X, Y>, returns Y. 326 // Otherwise assert. 327 const SCEV *DependenceInfo::Constraint::getY() const { 328 assert(Kind == Point && "Kind should be Point"); 329 return B; 330 } 331 332 333 // If constraint is a line AX + BY = C, returns A. 334 // Otherwise assert. 335 const SCEV *DependenceInfo::Constraint::getA() const { 336 assert((Kind == Line || Kind == Distance) && 337 "Kind should be Line (or Distance)"); 338 return A; 339 } 340 341 342 // If constraint is a line AX + BY = C, returns B. 343 // Otherwise assert. 344 const SCEV *DependenceInfo::Constraint::getB() const { 345 assert((Kind == Line || Kind == Distance) && 346 "Kind should be Line (or Distance)"); 347 return B; 348 } 349 350 351 // If constraint is a line AX + BY = C, returns C. 352 // Otherwise assert. 353 const SCEV *DependenceInfo::Constraint::getC() const { 354 assert((Kind == Line || Kind == Distance) && 355 "Kind should be Line (or Distance)"); 356 return C; 357 } 358 359 360 // If constraint is a distance, returns D. 361 // Otherwise assert. 362 const SCEV *DependenceInfo::Constraint::getD() const { 363 assert(Kind == Distance && "Kind should be Distance"); 364 return SE->getNegativeSCEV(C); 365 } 366 367 368 // Returns the loop associated with this constraint. 369 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 370 assert((Kind == Distance || Kind == Line || Kind == Point) && 371 "Kind should be Distance, Line, or Point"); 372 return AssociatedLoop; 373 } 374 375 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 376 const Loop *CurLoop) { 377 Kind = Point; 378 A = X; 379 B = Y; 380 AssociatedLoop = CurLoop; 381 } 382 383 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 384 const SCEV *CC, const Loop *CurLoop) { 385 Kind = Line; 386 A = AA; 387 B = BB; 388 C = CC; 389 AssociatedLoop = CurLoop; 390 } 391 392 void DependenceInfo::Constraint::setDistance(const SCEV *D, 393 const Loop *CurLoop) { 394 Kind = Distance; 395 A = SE->getOne(D->getType()); 396 B = SE->getNegativeSCEV(A); 397 C = SE->getNegativeSCEV(D); 398 AssociatedLoop = CurLoop; 399 } 400 401 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 402 403 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 404 SE = NewSE; 405 Kind = Any; 406 } 407 408 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 409 // For debugging purposes. Dumps the constraint out to OS. 410 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 411 if (isEmpty()) 412 OS << " Empty\n"; 413 else if (isAny()) 414 OS << " Any\n"; 415 else if (isPoint()) 416 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 417 else if (isDistance()) 418 OS << " Distance is " << *getD() << 419 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 420 else if (isLine()) 421 OS << " Line is " << *getA() << "*X + " << 422 *getB() << "*Y = " << *getC() << "\n"; 423 else 424 llvm_unreachable("unknown constraint type in Constraint::dump"); 425 } 426 #endif 427 428 429 // Updates X with the intersection 430 // of the Constraints X and Y. Returns true if X has changed. 431 // Corresponds to Figure 4 from the paper 432 // 433 // Practical Dependence Testing 434 // Goff, Kennedy, Tseng 435 // PLDI 1991 436 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 437 ++DeltaApplications; 438 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 439 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 440 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 441 assert(!Y->isPoint() && "Y must not be a Point"); 442 if (X->isAny()) { 443 if (Y->isAny()) 444 return false; 445 *X = *Y; 446 return true; 447 } 448 if (X->isEmpty()) 449 return false; 450 if (Y->isEmpty()) { 451 X->setEmpty(); 452 return true; 453 } 454 455 if (X->isDistance() && Y->isDistance()) { 456 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 457 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 458 return false; 459 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 460 X->setEmpty(); 461 ++DeltaSuccesses; 462 return true; 463 } 464 // Hmmm, interesting situation. 465 // I guess if either is constant, keep it and ignore the other. 466 if (isa<SCEVConstant>(Y->getD())) { 467 *X = *Y; 468 return true; 469 } 470 return false; 471 } 472 473 // At this point, the pseudo-code in Figure 4 of the paper 474 // checks if (X->isPoint() && Y->isPoint()). 475 // This case can't occur in our implementation, 476 // since a Point can only arise as the result of intersecting 477 // two Line constraints, and the right-hand value, Y, is never 478 // the result of an intersection. 479 assert(!(X->isPoint() && Y->isPoint()) && 480 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 481 482 if (X->isLine() && Y->isLine()) { 483 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 484 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 485 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 486 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 487 // slopes are equal, so lines are parallel 488 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 489 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 490 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 491 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 492 return false; 493 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 494 X->setEmpty(); 495 ++DeltaSuccesses; 496 return true; 497 } 498 return false; 499 } 500 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 501 // slopes differ, so lines intersect 502 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 503 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 504 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 505 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 506 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 507 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 508 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 509 const SCEVConstant *C1A2_C2A1 = 510 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 511 const SCEVConstant *C1B2_C2B1 = 512 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 513 const SCEVConstant *A1B2_A2B1 = 514 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 515 const SCEVConstant *A2B1_A1B2 = 516 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 517 if (!C1B2_C2B1 || !C1A2_C2A1 || 518 !A1B2_A2B1 || !A2B1_A1B2) 519 return false; 520 APInt Xtop = C1B2_C2B1->getAPInt(); 521 APInt Xbot = A1B2_A2B1->getAPInt(); 522 APInt Ytop = C1A2_C2A1->getAPInt(); 523 APInt Ybot = A2B1_A1B2->getAPInt(); 524 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 525 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 526 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 527 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 528 APInt Xq = Xtop; // these need to be initialized, even 529 APInt Xr = Xtop; // though they're just going to be overwritten 530 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 531 APInt Yq = Ytop; 532 APInt Yr = Ytop; 533 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 534 if (Xr != 0 || Yr != 0) { 535 X->setEmpty(); 536 ++DeltaSuccesses; 537 return true; 538 } 539 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 540 if (Xq.slt(0) || Yq.slt(0)) { 541 X->setEmpty(); 542 ++DeltaSuccesses; 543 return true; 544 } 545 if (const SCEVConstant *CUB = 546 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 547 const APInt &UpperBound = CUB->getAPInt(); 548 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 549 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 550 X->setEmpty(); 551 ++DeltaSuccesses; 552 return true; 553 } 554 } 555 X->setPoint(SE->getConstant(Xq), 556 SE->getConstant(Yq), 557 X->getAssociatedLoop()); 558 ++DeltaSuccesses; 559 return true; 560 } 561 return false; 562 } 563 564 // if (X->isLine() && Y->isPoint()) This case can't occur. 565 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 566 567 if (X->isPoint() && Y->isLine()) { 568 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 569 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 570 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 571 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 572 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 573 return false; 574 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 575 X->setEmpty(); 576 ++DeltaSuccesses; 577 return true; 578 } 579 return false; 580 } 581 582 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 583 return false; 584 } 585 586 587 //===----------------------------------------------------------------------===// 588 // DependenceInfo methods 589 590 // For debugging purposes. Dumps a dependence to OS. 591 void Dependence::dump(raw_ostream &OS) const { 592 bool Splitable = false; 593 if (isConfused()) 594 OS << "confused"; 595 else { 596 if (isConsistent()) 597 OS << "consistent "; 598 if (isFlow()) 599 OS << "flow"; 600 else if (isOutput()) 601 OS << "output"; 602 else if (isAnti()) 603 OS << "anti"; 604 else if (isInput()) 605 OS << "input"; 606 unsigned Levels = getLevels(); 607 OS << " ["; 608 for (unsigned II = 1; II <= Levels; ++II) { 609 if (isSplitable(II)) 610 Splitable = true; 611 if (isPeelFirst(II)) 612 OS << 'p'; 613 const SCEV *Distance = getDistance(II); 614 if (Distance) 615 OS << *Distance; 616 else if (isScalar(II)) 617 OS << "S"; 618 else { 619 unsigned Direction = getDirection(II); 620 if (Direction == DVEntry::ALL) 621 OS << "*"; 622 else { 623 if (Direction & DVEntry::LT) 624 OS << "<"; 625 if (Direction & DVEntry::EQ) 626 OS << "="; 627 if (Direction & DVEntry::GT) 628 OS << ">"; 629 } 630 } 631 if (isPeelLast(II)) 632 OS << 'p'; 633 if (II < Levels) 634 OS << " "; 635 } 636 if (isLoopIndependent()) 637 OS << "|<"; 638 OS << "]"; 639 if (Splitable) 640 OS << " splitable"; 641 } 642 OS << "!\n"; 643 } 644 645 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 646 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 647 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 648 // Otherwise the underlying objects are checked to see if they point to 649 // different identifiable objects. 650 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 651 const DataLayout &DL, 652 const MemoryLocation &LocA, 653 const MemoryLocation &LocB) { 654 // Check the original locations (minus size) for noalias, which can happen for 655 // tbaa, incompatible underlying object locations, etc. 656 MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags); 657 MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags); 658 if (AA->alias(LocAS, LocBS) == NoAlias) 659 return NoAlias; 660 661 // Check the underlying objects are the same 662 const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); 663 const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); 664 665 // If the underlying objects are the same, they must alias 666 if (AObj == BObj) 667 return MustAlias; 668 669 // We may have hit the recursion limit for underlying objects, or have 670 // underlying objects where we don't know they will alias. 671 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 672 return MayAlias; 673 674 // Otherwise we know the objects are different and both identified objects so 675 // must not alias. 676 return NoAlias; 677 } 678 679 680 // Returns true if the load or store can be analyzed. Atomic and volatile 681 // operations have properties which this analysis does not understand. 682 static 683 bool isLoadOrStore(const Instruction *I) { 684 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 685 return LI->isUnordered(); 686 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 687 return SI->isUnordered(); 688 return false; 689 } 690 691 692 // Examines the loop nesting of the Src and Dst 693 // instructions and establishes their shared loops. Sets the variables 694 // CommonLevels, SrcLevels, and MaxLevels. 695 // The source and destination instructions needn't be contained in the same 696 // loop. The routine establishNestingLevels finds the level of most deeply 697 // nested loop that contains them both, CommonLevels. An instruction that's 698 // not contained in a loop is at level = 0. MaxLevels is equal to the level 699 // of the source plus the level of the destination, minus CommonLevels. 700 // This lets us allocate vectors MaxLevels in length, with room for every 701 // distinct loop referenced in both the source and destination subscripts. 702 // The variable SrcLevels is the nesting depth of the source instruction. 703 // It's used to help calculate distinct loops referenced by the destination. 704 // Here's the map from loops to levels: 705 // 0 - unused 706 // 1 - outermost common loop 707 // ... - other common loops 708 // CommonLevels - innermost common loop 709 // ... - loops containing Src but not Dst 710 // SrcLevels - innermost loop containing Src but not Dst 711 // ... - loops containing Dst but not Src 712 // MaxLevels - innermost loops containing Dst but not Src 713 // Consider the follow code fragment: 714 // for (a = ...) { 715 // for (b = ...) { 716 // for (c = ...) { 717 // for (d = ...) { 718 // A[] = ...; 719 // } 720 // } 721 // for (e = ...) { 722 // for (f = ...) { 723 // for (g = ...) { 724 // ... = A[]; 725 // } 726 // } 727 // } 728 // } 729 // } 730 // If we're looking at the possibility of a dependence between the store 731 // to A (the Src) and the load from A (the Dst), we'll note that they 732 // have 2 loops in common, so CommonLevels will equal 2 and the direction 733 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 734 // A map from loop names to loop numbers would look like 735 // a - 1 736 // b - 2 = CommonLevels 737 // c - 3 738 // d - 4 = SrcLevels 739 // e - 5 740 // f - 6 741 // g - 7 = MaxLevels 742 void DependenceInfo::establishNestingLevels(const Instruction *Src, 743 const Instruction *Dst) { 744 const BasicBlock *SrcBlock = Src->getParent(); 745 const BasicBlock *DstBlock = Dst->getParent(); 746 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 747 unsigned DstLevel = LI->getLoopDepth(DstBlock); 748 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 749 const Loop *DstLoop = LI->getLoopFor(DstBlock); 750 SrcLevels = SrcLevel; 751 MaxLevels = SrcLevel + DstLevel; 752 while (SrcLevel > DstLevel) { 753 SrcLoop = SrcLoop->getParentLoop(); 754 SrcLevel--; 755 } 756 while (DstLevel > SrcLevel) { 757 DstLoop = DstLoop->getParentLoop(); 758 DstLevel--; 759 } 760 while (SrcLoop != DstLoop) { 761 SrcLoop = SrcLoop->getParentLoop(); 762 DstLoop = DstLoop->getParentLoop(); 763 SrcLevel--; 764 } 765 CommonLevels = SrcLevel; 766 MaxLevels -= CommonLevels; 767 } 768 769 770 // Given one of the loops containing the source, return 771 // its level index in our numbering scheme. 772 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 773 return SrcLoop->getLoopDepth(); 774 } 775 776 777 // Given one of the loops containing the destination, 778 // return its level index in our numbering scheme. 779 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 780 unsigned D = DstLoop->getLoopDepth(); 781 if (D > CommonLevels) 782 return D - CommonLevels + SrcLevels; 783 else 784 return D; 785 } 786 787 788 // Returns true if Expression is loop invariant in LoopNest. 789 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 790 const Loop *LoopNest) const { 791 if (!LoopNest) 792 return true; 793 return SE->isLoopInvariant(Expression, LoopNest) && 794 isLoopInvariant(Expression, LoopNest->getParentLoop()); 795 } 796 797 798 799 // Finds the set of loops from the LoopNest that 800 // have a level <= CommonLevels and are referred to by the SCEV Expression. 801 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 802 const Loop *LoopNest, 803 SmallBitVector &Loops) const { 804 while (LoopNest) { 805 unsigned Level = LoopNest->getLoopDepth(); 806 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 807 Loops.set(Level); 808 LoopNest = LoopNest->getParentLoop(); 809 } 810 } 811 812 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 813 814 unsigned widestWidthSeen = 0; 815 Type *widestType; 816 817 // Go through each pair and find the widest bit to which we need 818 // to extend all of them. 819 for (Subscript *Pair : Pairs) { 820 const SCEV *Src = Pair->Src; 821 const SCEV *Dst = Pair->Dst; 822 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 823 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 824 if (SrcTy == nullptr || DstTy == nullptr) { 825 assert(SrcTy == DstTy && "This function only unify integer types and " 826 "expect Src and Dst share the same type " 827 "otherwise."); 828 continue; 829 } 830 if (SrcTy->getBitWidth() > widestWidthSeen) { 831 widestWidthSeen = SrcTy->getBitWidth(); 832 widestType = SrcTy; 833 } 834 if (DstTy->getBitWidth() > widestWidthSeen) { 835 widestWidthSeen = DstTy->getBitWidth(); 836 widestType = DstTy; 837 } 838 } 839 840 841 assert(widestWidthSeen > 0); 842 843 // Now extend each pair to the widest seen. 844 for (Subscript *Pair : Pairs) { 845 const SCEV *Src = Pair->Src; 846 const SCEV *Dst = Pair->Dst; 847 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 848 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 849 if (SrcTy == nullptr || DstTy == nullptr) { 850 assert(SrcTy == DstTy && "This function only unify integer types and " 851 "expect Src and Dst share the same type " 852 "otherwise."); 853 continue; 854 } 855 if (SrcTy->getBitWidth() < widestWidthSeen) 856 // Sign-extend Src to widestType 857 Pair->Src = SE->getSignExtendExpr(Src, widestType); 858 if (DstTy->getBitWidth() < widestWidthSeen) { 859 // Sign-extend Dst to widestType 860 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 861 } 862 } 863 } 864 865 // removeMatchingExtensions - Examines a subscript pair. 866 // If the source and destination are identically sign (or zero) 867 // extended, it strips off the extension in an effect to simplify 868 // the actual analysis. 869 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 870 const SCEV *Src = Pair->Src; 871 const SCEV *Dst = Pair->Dst; 872 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 873 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 874 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 875 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 876 const SCEV *SrcCastOp = SrcCast->getOperand(); 877 const SCEV *DstCastOp = DstCast->getOperand(); 878 if (SrcCastOp->getType() == DstCastOp->getType()) { 879 Pair->Src = SrcCastOp; 880 Pair->Dst = DstCastOp; 881 } 882 } 883 } 884 885 886 // Examine the scev and return true iff it's linear. 887 // Collect any loops mentioned in the set of "Loops". 888 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 889 SmallBitVector &Loops) { 890 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 891 if (!AddRec) 892 return isLoopInvariant(Src, LoopNest); 893 const SCEV *Start = AddRec->getStart(); 894 const SCEV *Step = AddRec->getStepRecurrence(*SE); 895 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 896 if (!isa<SCEVCouldNotCompute>(UB)) { 897 if (SE->getTypeSizeInBits(Start->getType()) < 898 SE->getTypeSizeInBits(UB->getType())) { 899 if (!AddRec->getNoWrapFlags()) 900 return false; 901 } 902 } 903 if (!isLoopInvariant(Step, LoopNest)) 904 return false; 905 Loops.set(mapSrcLoop(AddRec->getLoop())); 906 return checkSrcSubscript(Start, LoopNest, Loops); 907 } 908 909 910 911 // Examine the scev and return true iff it's linear. 912 // Collect any loops mentioned in the set of "Loops". 913 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 914 SmallBitVector &Loops) { 915 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 916 if (!AddRec) 917 return isLoopInvariant(Dst, LoopNest); 918 const SCEV *Start = AddRec->getStart(); 919 const SCEV *Step = AddRec->getStepRecurrence(*SE); 920 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 921 if (!isa<SCEVCouldNotCompute>(UB)) { 922 if (SE->getTypeSizeInBits(Start->getType()) < 923 SE->getTypeSizeInBits(UB->getType())) { 924 if (!AddRec->getNoWrapFlags()) 925 return false; 926 } 927 } 928 if (!isLoopInvariant(Step, LoopNest)) 929 return false; 930 Loops.set(mapDstLoop(AddRec->getLoop())); 931 return checkDstSubscript(Start, LoopNest, Loops); 932 } 933 934 935 // Examines the subscript pair (the Src and Dst SCEVs) 936 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 937 // Collects the associated loops in a set. 938 DependenceInfo::Subscript::ClassificationKind 939 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 940 const SCEV *Dst, const Loop *DstLoopNest, 941 SmallBitVector &Loops) { 942 SmallBitVector SrcLoops(MaxLevels + 1); 943 SmallBitVector DstLoops(MaxLevels + 1); 944 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 945 return Subscript::NonLinear; 946 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 947 return Subscript::NonLinear; 948 Loops = SrcLoops; 949 Loops |= DstLoops; 950 unsigned N = Loops.count(); 951 if (N == 0) 952 return Subscript::ZIV; 953 if (N == 1) 954 return Subscript::SIV; 955 if (N == 2 && (SrcLoops.count() == 0 || 956 DstLoops.count() == 0 || 957 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 958 return Subscript::RDIV; 959 return Subscript::MIV; 960 } 961 962 963 // A wrapper around SCEV::isKnownPredicate. 964 // Looks for cases where we're interested in comparing for equality. 965 // If both X and Y have been identically sign or zero extended, 966 // it strips off the (confusing) extensions before invoking 967 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 968 // will be similarly updated. 969 // 970 // If SCEV::isKnownPredicate can't prove the predicate, 971 // we try simple subtraction, which seems to help in some cases 972 // involving symbolics. 973 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 974 const SCEV *Y) const { 975 if (Pred == CmpInst::ICMP_EQ || 976 Pred == CmpInst::ICMP_NE) { 977 if ((isa<SCEVSignExtendExpr>(X) && 978 isa<SCEVSignExtendExpr>(Y)) || 979 (isa<SCEVZeroExtendExpr>(X) && 980 isa<SCEVZeroExtendExpr>(Y))) { 981 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 982 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 983 const SCEV *Xop = CX->getOperand(); 984 const SCEV *Yop = CY->getOperand(); 985 if (Xop->getType() == Yop->getType()) { 986 X = Xop; 987 Y = Yop; 988 } 989 } 990 } 991 if (SE->isKnownPredicate(Pred, X, Y)) 992 return true; 993 // If SE->isKnownPredicate can't prove the condition, 994 // we try the brute-force approach of subtracting 995 // and testing the difference. 996 // By testing with SE->isKnownPredicate first, we avoid 997 // the possibility of overflow when the arguments are constants. 998 const SCEV *Delta = SE->getMinusSCEV(X, Y); 999 switch (Pred) { 1000 case CmpInst::ICMP_EQ: 1001 return Delta->isZero(); 1002 case CmpInst::ICMP_NE: 1003 return SE->isKnownNonZero(Delta); 1004 case CmpInst::ICMP_SGE: 1005 return SE->isKnownNonNegative(Delta); 1006 case CmpInst::ICMP_SLE: 1007 return SE->isKnownNonPositive(Delta); 1008 case CmpInst::ICMP_SGT: 1009 return SE->isKnownPositive(Delta); 1010 case CmpInst::ICMP_SLT: 1011 return SE->isKnownNegative(Delta); 1012 default: 1013 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1014 } 1015 } 1016 1017 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1018 /// with some extra checking if S is an AddRec and we can prove less-than using 1019 /// the loop bounds. 1020 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1021 // First unify to the same type 1022 auto *SType = dyn_cast<IntegerType>(S->getType()); 1023 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1024 if (!SType || !SizeType) 1025 return false; 1026 Type *MaxType = 1027 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1028 S = SE->getTruncateOrZeroExtend(S, MaxType); 1029 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1030 1031 // Special check for addrecs using BE taken count 1032 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1033 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1034 if (AddRec->isAffine()) { 1035 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1036 if (!isa<SCEVCouldNotCompute>(BECount)) { 1037 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1038 if (SE->isKnownNegative(Limit)) 1039 return true; 1040 } 1041 } 1042 } 1043 1044 // Check using normal isKnownNegative 1045 const SCEV *LimitedBound = 1046 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1047 return SE->isKnownNegative(LimitedBound); 1048 } 1049 1050 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1051 bool Inbounds = false; 1052 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1053 Inbounds = SrcGEP->isInBounds(); 1054 if (Inbounds) { 1055 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1056 if (AddRec->isAffine()) { 1057 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1058 // If both parts are NonNegative, the end result will be NonNegative 1059 if (SE->isKnownNonNegative(AddRec->getStart()) && 1060 SE->isKnownNonNegative(AddRec->getOperand(1))) 1061 return true; 1062 } 1063 } 1064 } 1065 1066 return SE->isKnownNonNegative(S); 1067 } 1068 1069 // All subscripts are all the same type. 1070 // Loop bound may be smaller (e.g., a char). 1071 // Should zero extend loop bound, since it's always >= 0. 1072 // This routine collects upper bound and extends or truncates if needed. 1073 // Truncating is safe when subscripts are known not to wrap. Cases without 1074 // nowrap flags should have been rejected earlier. 1075 // Return null if no bound available. 1076 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1077 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1078 const SCEV *UB = SE->getBackedgeTakenCount(L); 1079 return SE->getTruncateOrZeroExtend(UB, T); 1080 } 1081 return nullptr; 1082 } 1083 1084 1085 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1086 // If the cast fails, returns NULL. 1087 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1088 Type *T) const { 1089 if (const SCEV *UB = collectUpperBound(L, T)) 1090 return dyn_cast<SCEVConstant>(UB); 1091 return nullptr; 1092 } 1093 1094 1095 // testZIV - 1096 // When we have a pair of subscripts of the form [c1] and [c2], 1097 // where c1 and c2 are both loop invariant, we attack it using 1098 // the ZIV test. Basically, we test by comparing the two values, 1099 // but there are actually three possible results: 1100 // 1) the values are equal, so there's a dependence 1101 // 2) the values are different, so there's no dependence 1102 // 3) the values might be equal, so we have to assume a dependence. 1103 // 1104 // Return true if dependence disproved. 1105 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1106 FullDependence &Result) const { 1107 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1108 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1109 ++ZIVapplications; 1110 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1111 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1112 return false; // provably dependent 1113 } 1114 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1115 LLVM_DEBUG(dbgs() << " provably independent\n"); 1116 ++ZIVindependence; 1117 return true; // provably independent 1118 } 1119 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1120 Result.Consistent = false; 1121 return false; // possibly dependent 1122 } 1123 1124 1125 // strongSIVtest - 1126 // From the paper, Practical Dependence Testing, Section 4.2.1 1127 // 1128 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1129 // where i is an induction variable, c1 and c2 are loop invariant, 1130 // and a is a constant, we can solve it exactly using the Strong SIV test. 1131 // 1132 // Can prove independence. Failing that, can compute distance (and direction). 1133 // In the presence of symbolic terms, we can sometimes make progress. 1134 // 1135 // If there's a dependence, 1136 // 1137 // c1 + a*i = c2 + a*i' 1138 // 1139 // The dependence distance is 1140 // 1141 // d = i' - i = (c1 - c2)/a 1142 // 1143 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1144 // loop's upper bound. If a dependence exists, the dependence direction is 1145 // defined as 1146 // 1147 // { < if d > 0 1148 // direction = { = if d = 0 1149 // { > if d < 0 1150 // 1151 // Return true if dependence disproved. 1152 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1153 const SCEV *DstConst, const Loop *CurLoop, 1154 unsigned Level, FullDependence &Result, 1155 Constraint &NewConstraint) const { 1156 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1157 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1158 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1159 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1160 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1161 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1162 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1163 ++StrongSIVapplications; 1164 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1165 Level--; 1166 1167 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1168 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1169 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1170 1171 // check that |Delta| < iteration count 1172 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1173 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1174 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1175 const SCEV *AbsDelta = 1176 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1177 const SCEV *AbsCoeff = 1178 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1179 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1180 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1181 // Distance greater than trip count - no dependence 1182 ++StrongSIVindependence; 1183 ++StrongSIVsuccesses; 1184 return true; 1185 } 1186 } 1187 1188 // Can we compute distance? 1189 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1190 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1191 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1192 APInt Distance = ConstDelta; // these need to be initialized 1193 APInt Remainder = ConstDelta; 1194 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1195 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1196 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1197 // Make sure Coeff divides Delta exactly 1198 if (Remainder != 0) { 1199 // Coeff doesn't divide Distance, no dependence 1200 ++StrongSIVindependence; 1201 ++StrongSIVsuccesses; 1202 return true; 1203 } 1204 Result.DV[Level].Distance = SE->getConstant(Distance); 1205 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1206 if (Distance.sgt(0)) 1207 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1208 else if (Distance.slt(0)) 1209 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1210 else 1211 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1212 ++StrongSIVsuccesses; 1213 } 1214 else if (Delta->isZero()) { 1215 // since 0/X == 0 1216 Result.DV[Level].Distance = Delta; 1217 NewConstraint.setDistance(Delta, CurLoop); 1218 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1219 ++StrongSIVsuccesses; 1220 } 1221 else { 1222 if (Coeff->isOne()) { 1223 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1224 Result.DV[Level].Distance = Delta; // since X/1 == X 1225 NewConstraint.setDistance(Delta, CurLoop); 1226 } 1227 else { 1228 Result.Consistent = false; 1229 NewConstraint.setLine(Coeff, 1230 SE->getNegativeSCEV(Coeff), 1231 SE->getNegativeSCEV(Delta), CurLoop); 1232 } 1233 1234 // maybe we can get a useful direction 1235 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1236 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1237 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1238 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1239 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1240 // The double negatives above are confusing. 1241 // It helps to read !SE->isKnownNonZero(Delta) 1242 // as "Delta might be Zero" 1243 unsigned NewDirection = Dependence::DVEntry::NONE; 1244 if ((DeltaMaybePositive && CoeffMaybePositive) || 1245 (DeltaMaybeNegative && CoeffMaybeNegative)) 1246 NewDirection = Dependence::DVEntry::LT; 1247 if (DeltaMaybeZero) 1248 NewDirection |= Dependence::DVEntry::EQ; 1249 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1250 (DeltaMaybePositive && CoeffMaybeNegative)) 1251 NewDirection |= Dependence::DVEntry::GT; 1252 if (NewDirection < Result.DV[Level].Direction) 1253 ++StrongSIVsuccesses; 1254 Result.DV[Level].Direction &= NewDirection; 1255 } 1256 return false; 1257 } 1258 1259 1260 // weakCrossingSIVtest - 1261 // From the paper, Practical Dependence Testing, Section 4.2.2 1262 // 1263 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1264 // where i is an induction variable, c1 and c2 are loop invariant, 1265 // and a is a constant, we can solve it exactly using the 1266 // Weak-Crossing SIV test. 1267 // 1268 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1269 // the two lines, where i = i', yielding 1270 // 1271 // c1 + a*i = c2 - a*i 1272 // 2a*i = c2 - c1 1273 // i = (c2 - c1)/2a 1274 // 1275 // If i < 0, there is no dependence. 1276 // If i > upperbound, there is no dependence. 1277 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1278 // If i = upperbound, there's a dependence with distance = 0. 1279 // If i is integral, there's a dependence (all directions). 1280 // If the non-integer part = 1/2, there's a dependence (<> directions). 1281 // Otherwise, there's no dependence. 1282 // 1283 // Can prove independence. Failing that, 1284 // can sometimes refine the directions. 1285 // Can determine iteration for splitting. 1286 // 1287 // Return true if dependence disproved. 1288 bool DependenceInfo::weakCrossingSIVtest( 1289 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1290 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1291 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1292 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1293 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1294 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1295 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1296 ++WeakCrossingSIVapplications; 1297 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1298 Level--; 1299 Result.Consistent = false; 1300 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1301 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1302 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1303 if (Delta->isZero()) { 1304 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1305 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1306 ++WeakCrossingSIVsuccesses; 1307 if (!Result.DV[Level].Direction) { 1308 ++WeakCrossingSIVindependence; 1309 return true; 1310 } 1311 Result.DV[Level].Distance = Delta; // = 0 1312 return false; 1313 } 1314 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1315 if (!ConstCoeff) 1316 return false; 1317 1318 Result.DV[Level].Splitable = true; 1319 if (SE->isKnownNegative(ConstCoeff)) { 1320 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1321 assert(ConstCoeff && 1322 "dynamic cast of negative of ConstCoeff should yield constant"); 1323 Delta = SE->getNegativeSCEV(Delta); 1324 } 1325 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1326 1327 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1328 SplitIter = SE->getUDivExpr( 1329 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1330 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1331 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1332 1333 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1334 if (!ConstDelta) 1335 return false; 1336 1337 // We're certain that ConstCoeff > 0; therefore, 1338 // if Delta < 0, then no dependence. 1339 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1340 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1341 if (SE->isKnownNegative(Delta)) { 1342 // No dependence, Delta < 0 1343 ++WeakCrossingSIVindependence; 1344 ++WeakCrossingSIVsuccesses; 1345 return true; 1346 } 1347 1348 // We're certain that Delta > 0 and ConstCoeff > 0. 1349 // Check Delta/(2*ConstCoeff) against upper loop bound 1350 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1351 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1352 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1353 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1354 ConstantTwo); 1355 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1356 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1357 // Delta too big, no dependence 1358 ++WeakCrossingSIVindependence; 1359 ++WeakCrossingSIVsuccesses; 1360 return true; 1361 } 1362 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1363 // i = i' = UB 1364 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1365 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1366 ++WeakCrossingSIVsuccesses; 1367 if (!Result.DV[Level].Direction) { 1368 ++WeakCrossingSIVindependence; 1369 return true; 1370 } 1371 Result.DV[Level].Splitable = false; 1372 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1373 return false; 1374 } 1375 } 1376 1377 // check that Coeff divides Delta 1378 APInt APDelta = ConstDelta->getAPInt(); 1379 APInt APCoeff = ConstCoeff->getAPInt(); 1380 APInt Distance = APDelta; // these need to be initialzed 1381 APInt Remainder = APDelta; 1382 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1383 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1384 if (Remainder != 0) { 1385 // Coeff doesn't divide Delta, no dependence 1386 ++WeakCrossingSIVindependence; 1387 ++WeakCrossingSIVsuccesses; 1388 return true; 1389 } 1390 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1391 1392 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1393 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1394 Remainder = Distance.srem(Two); 1395 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1396 if (Remainder != 0) { 1397 // Equal direction isn't possible 1398 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1399 ++WeakCrossingSIVsuccesses; 1400 } 1401 return false; 1402 } 1403 1404 1405 // Kirch's algorithm, from 1406 // 1407 // Optimizing Supercompilers for Supercomputers 1408 // Michael Wolfe 1409 // MIT Press, 1989 1410 // 1411 // Program 2.1, page 29. 1412 // Computes the GCD of AM and BM. 1413 // Also finds a solution to the equation ax - by = gcd(a, b). 1414 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1415 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1416 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1417 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1418 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1419 APInt G0 = AM.abs(); 1420 APInt G1 = BM.abs(); 1421 APInt Q = G0; // these need to be initialized 1422 APInt R = G0; 1423 APInt::sdivrem(G0, G1, Q, R); 1424 while (R != 0) { 1425 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1426 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1427 G0 = G1; G1 = R; 1428 APInt::sdivrem(G0, G1, Q, R); 1429 } 1430 G = G1; 1431 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1432 X = AM.slt(0) ? -A1 : A1; 1433 Y = BM.slt(0) ? B1 : -B1; 1434 1435 // make sure gcd divides Delta 1436 R = Delta.srem(G); 1437 if (R != 0) 1438 return true; // gcd doesn't divide Delta, no dependence 1439 Q = Delta.sdiv(G); 1440 X *= Q; 1441 Y *= Q; 1442 return false; 1443 } 1444 1445 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1446 APInt Q = A; // these need to be initialized 1447 APInt R = A; 1448 APInt::sdivrem(A, B, Q, R); 1449 if (R == 0) 1450 return Q; 1451 if ((A.sgt(0) && B.sgt(0)) || 1452 (A.slt(0) && B.slt(0))) 1453 return Q; 1454 else 1455 return Q - 1; 1456 } 1457 1458 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1459 APInt Q = A; // these need to be initialized 1460 APInt R = A; 1461 APInt::sdivrem(A, B, Q, R); 1462 if (R == 0) 1463 return Q; 1464 if ((A.sgt(0) && B.sgt(0)) || 1465 (A.slt(0) && B.slt(0))) 1466 return Q + 1; 1467 else 1468 return Q; 1469 } 1470 1471 1472 static 1473 APInt maxAPInt(APInt A, APInt B) { 1474 return A.sgt(B) ? A : B; 1475 } 1476 1477 1478 static 1479 APInt minAPInt(APInt A, APInt B) { 1480 return A.slt(B) ? A : B; 1481 } 1482 1483 1484 // exactSIVtest - 1485 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1486 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1487 // and a2 are constant, we can solve it exactly using an algorithm developed 1488 // by Banerjee and Wolfe. See Section 2.5.3 in 1489 // 1490 // Optimizing Supercompilers for Supercomputers 1491 // Michael Wolfe 1492 // MIT Press, 1989 1493 // 1494 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1495 // so use them if possible. They're also a bit better with symbolics and, 1496 // in the case of the strong SIV test, can compute Distances. 1497 // 1498 // Return true if dependence disproved. 1499 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1500 const SCEV *SrcConst, const SCEV *DstConst, 1501 const Loop *CurLoop, unsigned Level, 1502 FullDependence &Result, 1503 Constraint &NewConstraint) const { 1504 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1505 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1506 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1507 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1508 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1509 ++ExactSIVapplications; 1510 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1511 Level--; 1512 Result.Consistent = false; 1513 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1514 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1515 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1516 Delta, CurLoop); 1517 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1518 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1519 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1520 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1521 return false; 1522 1523 // find gcd 1524 APInt G, X, Y; 1525 APInt AM = ConstSrcCoeff->getAPInt(); 1526 APInt BM = ConstDstCoeff->getAPInt(); 1527 unsigned Bits = AM.getBitWidth(); 1528 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1529 // gcd doesn't divide Delta, no dependence 1530 ++ExactSIVindependence; 1531 ++ExactSIVsuccesses; 1532 return true; 1533 } 1534 1535 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1536 1537 // since SCEV construction normalizes, LM = 0 1538 APInt UM(Bits, 1, true); 1539 bool UMvalid = false; 1540 // UM is perhaps unavailable, let's check 1541 if (const SCEVConstant *CUB = 1542 collectConstantUpperBound(CurLoop, Delta->getType())) { 1543 UM = CUB->getAPInt(); 1544 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1545 UMvalid = true; 1546 } 1547 1548 APInt TU(APInt::getSignedMaxValue(Bits)); 1549 APInt TL(APInt::getSignedMinValue(Bits)); 1550 1551 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1552 APInt TMUL = BM.sdiv(G); 1553 if (TMUL.sgt(0)) { 1554 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1555 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1556 if (UMvalid) { 1557 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1558 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1559 } 1560 } 1561 else { 1562 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1563 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1564 if (UMvalid) { 1565 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1566 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1567 } 1568 } 1569 1570 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1571 TMUL = AM.sdiv(G); 1572 if (TMUL.sgt(0)) { 1573 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1574 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1575 if (UMvalid) { 1576 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1577 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1578 } 1579 } 1580 else { 1581 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1582 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1583 if (UMvalid) { 1584 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1585 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1586 } 1587 } 1588 if (TL.sgt(TU)) { 1589 ++ExactSIVindependence; 1590 ++ExactSIVsuccesses; 1591 return true; 1592 } 1593 1594 // explore directions 1595 unsigned NewDirection = Dependence::DVEntry::NONE; 1596 1597 // less than 1598 APInt SaveTU(TU); // save these 1599 APInt SaveTL(TL); 1600 LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); 1601 TMUL = AM - BM; 1602 if (TMUL.sgt(0)) { 1603 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1604 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1605 } 1606 else { 1607 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1608 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1609 } 1610 if (TL.sle(TU)) { 1611 NewDirection |= Dependence::DVEntry::LT; 1612 ++ExactSIVsuccesses; 1613 } 1614 1615 // equal 1616 TU = SaveTU; // restore 1617 TL = SaveTL; 1618 LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); 1619 if (TMUL.sgt(0)) { 1620 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1621 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1622 } 1623 else { 1624 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1625 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1626 } 1627 TMUL = BM - AM; 1628 if (TMUL.sgt(0)) { 1629 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1630 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1631 } 1632 else { 1633 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1634 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1635 } 1636 if (TL.sle(TU)) { 1637 NewDirection |= Dependence::DVEntry::EQ; 1638 ++ExactSIVsuccesses; 1639 } 1640 1641 // greater than 1642 TU = SaveTU; // restore 1643 TL = SaveTL; 1644 LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); 1645 if (TMUL.sgt(0)) { 1646 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1647 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1648 } 1649 else { 1650 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1651 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1652 } 1653 if (TL.sle(TU)) { 1654 NewDirection |= Dependence::DVEntry::GT; 1655 ++ExactSIVsuccesses; 1656 } 1657 1658 // finished 1659 Result.DV[Level].Direction &= NewDirection; 1660 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1661 ++ExactSIVindependence; 1662 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1663 } 1664 1665 1666 1667 // Return true if the divisor evenly divides the dividend. 1668 static 1669 bool isRemainderZero(const SCEVConstant *Dividend, 1670 const SCEVConstant *Divisor) { 1671 const APInt &ConstDividend = Dividend->getAPInt(); 1672 const APInt &ConstDivisor = Divisor->getAPInt(); 1673 return ConstDividend.srem(ConstDivisor) == 0; 1674 } 1675 1676 1677 // weakZeroSrcSIVtest - 1678 // From the paper, Practical Dependence Testing, Section 4.2.2 1679 // 1680 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1681 // where i is an induction variable, c1 and c2 are loop invariant, 1682 // and a is a constant, we can solve it exactly using the 1683 // Weak-Zero SIV test. 1684 // 1685 // Given 1686 // 1687 // c1 = c2 + a*i 1688 // 1689 // we get 1690 // 1691 // (c1 - c2)/a = i 1692 // 1693 // If i is not an integer, there's no dependence. 1694 // If i < 0 or > UB, there's no dependence. 1695 // If i = 0, the direction is >= and peeling the 1696 // 1st iteration will break the dependence. 1697 // If i = UB, the direction is <= and peeling the 1698 // last iteration will break the dependence. 1699 // Otherwise, the direction is *. 1700 // 1701 // Can prove independence. Failing that, we can sometimes refine 1702 // the directions. Can sometimes show that first or last 1703 // iteration carries all the dependences (so worth peeling). 1704 // 1705 // (see also weakZeroDstSIVtest) 1706 // 1707 // Return true if dependence disproved. 1708 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1709 const SCEV *SrcConst, 1710 const SCEV *DstConst, 1711 const Loop *CurLoop, unsigned Level, 1712 FullDependence &Result, 1713 Constraint &NewConstraint) const { 1714 // For the WeakSIV test, it's possible the loop isn't common to 1715 // the Src and Dst loops. If it isn't, then there's no need to 1716 // record a direction. 1717 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1718 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1719 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1720 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1721 ++WeakZeroSIVapplications; 1722 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1723 Level--; 1724 Result.Consistent = false; 1725 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1726 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1727 CurLoop); 1728 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1729 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1730 if (Level < CommonLevels) { 1731 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1732 Result.DV[Level].PeelFirst = true; 1733 ++WeakZeroSIVsuccesses; 1734 } 1735 return false; // dependences caused by first iteration 1736 } 1737 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1738 if (!ConstCoeff) 1739 return false; 1740 const SCEV *AbsCoeff = 1741 SE->isKnownNegative(ConstCoeff) ? 1742 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1743 const SCEV *NewDelta = 1744 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1745 1746 // check that Delta/SrcCoeff < iteration count 1747 // really check NewDelta < count*AbsCoeff 1748 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1749 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1750 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1751 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1752 ++WeakZeroSIVindependence; 1753 ++WeakZeroSIVsuccesses; 1754 return true; 1755 } 1756 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1757 // dependences caused by last iteration 1758 if (Level < CommonLevels) { 1759 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1760 Result.DV[Level].PeelLast = true; 1761 ++WeakZeroSIVsuccesses; 1762 } 1763 return false; 1764 } 1765 } 1766 1767 // check that Delta/SrcCoeff >= 0 1768 // really check that NewDelta >= 0 1769 if (SE->isKnownNegative(NewDelta)) { 1770 // No dependence, newDelta < 0 1771 ++WeakZeroSIVindependence; 1772 ++WeakZeroSIVsuccesses; 1773 return true; 1774 } 1775 1776 // if SrcCoeff doesn't divide Delta, then no dependence 1777 if (isa<SCEVConstant>(Delta) && 1778 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1779 ++WeakZeroSIVindependence; 1780 ++WeakZeroSIVsuccesses; 1781 return true; 1782 } 1783 return false; 1784 } 1785 1786 1787 // weakZeroDstSIVtest - 1788 // From the paper, Practical Dependence Testing, Section 4.2.2 1789 // 1790 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1791 // where i is an induction variable, c1 and c2 are loop invariant, 1792 // and a is a constant, we can solve it exactly using the 1793 // Weak-Zero SIV test. 1794 // 1795 // Given 1796 // 1797 // c1 + a*i = c2 1798 // 1799 // we get 1800 // 1801 // i = (c2 - c1)/a 1802 // 1803 // If i is not an integer, there's no dependence. 1804 // If i < 0 or > UB, there's no dependence. 1805 // If i = 0, the direction is <= and peeling the 1806 // 1st iteration will break the dependence. 1807 // If i = UB, the direction is >= and peeling the 1808 // last iteration will break the dependence. 1809 // Otherwise, the direction is *. 1810 // 1811 // Can prove independence. Failing that, we can sometimes refine 1812 // the directions. Can sometimes show that first or last 1813 // iteration carries all the dependences (so worth peeling). 1814 // 1815 // (see also weakZeroSrcSIVtest) 1816 // 1817 // Return true if dependence disproved. 1818 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1819 const SCEV *SrcConst, 1820 const SCEV *DstConst, 1821 const Loop *CurLoop, unsigned Level, 1822 FullDependence &Result, 1823 Constraint &NewConstraint) const { 1824 // For the WeakSIV test, it's possible the loop isn't common to the 1825 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1826 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1827 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1828 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1829 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1830 ++WeakZeroSIVapplications; 1831 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1832 Level--; 1833 Result.Consistent = false; 1834 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1835 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1836 CurLoop); 1837 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1838 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1839 if (Level < CommonLevels) { 1840 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1841 Result.DV[Level].PeelFirst = true; 1842 ++WeakZeroSIVsuccesses; 1843 } 1844 return false; // dependences caused by first iteration 1845 } 1846 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1847 if (!ConstCoeff) 1848 return false; 1849 const SCEV *AbsCoeff = 1850 SE->isKnownNegative(ConstCoeff) ? 1851 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1852 const SCEV *NewDelta = 1853 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1854 1855 // check that Delta/SrcCoeff < iteration count 1856 // really check NewDelta < count*AbsCoeff 1857 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1858 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1859 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1860 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1861 ++WeakZeroSIVindependence; 1862 ++WeakZeroSIVsuccesses; 1863 return true; 1864 } 1865 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1866 // dependences caused by last iteration 1867 if (Level < CommonLevels) { 1868 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1869 Result.DV[Level].PeelLast = true; 1870 ++WeakZeroSIVsuccesses; 1871 } 1872 return false; 1873 } 1874 } 1875 1876 // check that Delta/SrcCoeff >= 0 1877 // really check that NewDelta >= 0 1878 if (SE->isKnownNegative(NewDelta)) { 1879 // No dependence, newDelta < 0 1880 ++WeakZeroSIVindependence; 1881 ++WeakZeroSIVsuccesses; 1882 return true; 1883 } 1884 1885 // if SrcCoeff doesn't divide Delta, then no dependence 1886 if (isa<SCEVConstant>(Delta) && 1887 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1888 ++WeakZeroSIVindependence; 1889 ++WeakZeroSIVsuccesses; 1890 return true; 1891 } 1892 return false; 1893 } 1894 1895 1896 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1897 // Things of the form [c1 + a*i] and [c2 + b*j], 1898 // where i and j are induction variable, c1 and c2 are loop invariant, 1899 // and a and b are constants. 1900 // Returns true if any possible dependence is disproved. 1901 // Marks the result as inconsistent. 1902 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1903 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1904 const SCEV *SrcConst, const SCEV *DstConst, 1905 const Loop *SrcLoop, const Loop *DstLoop, 1906 FullDependence &Result) const { 1907 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1908 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1909 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1910 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1911 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1912 ++ExactRDIVapplications; 1913 Result.Consistent = false; 1914 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1915 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1916 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1917 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1918 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1919 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1920 return false; 1921 1922 // find gcd 1923 APInt G, X, Y; 1924 APInt AM = ConstSrcCoeff->getAPInt(); 1925 APInt BM = ConstDstCoeff->getAPInt(); 1926 unsigned Bits = AM.getBitWidth(); 1927 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1928 // gcd doesn't divide Delta, no dependence 1929 ++ExactRDIVindependence; 1930 return true; 1931 } 1932 1933 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1934 1935 // since SCEV construction seems to normalize, LM = 0 1936 APInt SrcUM(Bits, 1, true); 1937 bool SrcUMvalid = false; 1938 // SrcUM is perhaps unavailable, let's check 1939 if (const SCEVConstant *UpperBound = 1940 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1941 SrcUM = UpperBound->getAPInt(); 1942 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1943 SrcUMvalid = true; 1944 } 1945 1946 APInt DstUM(Bits, 1, true); 1947 bool DstUMvalid = false; 1948 // UM is perhaps unavailable, let's check 1949 if (const SCEVConstant *UpperBound = 1950 collectConstantUpperBound(DstLoop, Delta->getType())) { 1951 DstUM = UpperBound->getAPInt(); 1952 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1953 DstUMvalid = true; 1954 } 1955 1956 APInt TU(APInt::getSignedMaxValue(Bits)); 1957 APInt TL(APInt::getSignedMinValue(Bits)); 1958 1959 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1960 APInt TMUL = BM.sdiv(G); 1961 if (TMUL.sgt(0)) { 1962 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1963 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1964 if (SrcUMvalid) { 1965 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1966 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1967 } 1968 } 1969 else { 1970 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1971 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1972 if (SrcUMvalid) { 1973 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1974 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1975 } 1976 } 1977 1978 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1979 TMUL = AM.sdiv(G); 1980 if (TMUL.sgt(0)) { 1981 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1982 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1983 if (DstUMvalid) { 1984 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1985 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1986 } 1987 } 1988 else { 1989 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1990 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1991 if (DstUMvalid) { 1992 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1993 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1994 } 1995 } 1996 if (TL.sgt(TU)) 1997 ++ExactRDIVindependence; 1998 return TL.sgt(TU); 1999 } 2000 2001 2002 // symbolicRDIVtest - 2003 // In Section 4.5 of the Practical Dependence Testing paper,the authors 2004 // introduce a special case of Banerjee's Inequalities (also called the 2005 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 2006 // particularly cases with symbolics. Since it's only able to disprove 2007 // dependence (not compute distances or directions), we'll use it as a 2008 // fall back for the other tests. 2009 // 2010 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2011 // where i and j are induction variables and c1 and c2 are loop invariants, 2012 // we can use the symbolic tests to disprove some dependences, serving as a 2013 // backup for the RDIV test. Note that i and j can be the same variable, 2014 // letting this test serve as a backup for the various SIV tests. 2015 // 2016 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2017 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2018 // loop bounds for the i and j loops, respectively. So, ... 2019 // 2020 // c1 + a1*i = c2 + a2*j 2021 // a1*i - a2*j = c2 - c1 2022 // 2023 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2024 // range of the maximum and minimum possible values of a1*i - a2*j. 2025 // Considering the signs of a1 and a2, we have 4 possible cases: 2026 // 2027 // 1) If a1 >= 0 and a2 >= 0, then 2028 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2029 // -a2*N2 <= c2 - c1 <= a1*N1 2030 // 2031 // 2) If a1 >= 0 and a2 <= 0, then 2032 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2033 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2034 // 2035 // 3) If a1 <= 0 and a2 >= 0, then 2036 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2037 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2038 // 2039 // 4) If a1 <= 0 and a2 <= 0, then 2040 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2041 // a1*N1 <= c2 - c1 <= -a2*N2 2042 // 2043 // return true if dependence disproved 2044 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2045 const SCEV *C1, const SCEV *C2, 2046 const Loop *Loop1, 2047 const Loop *Loop2) const { 2048 ++SymbolicRDIVapplications; 2049 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2050 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2051 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2052 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2053 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2054 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2055 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2056 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2057 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2058 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2059 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2060 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2061 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2062 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2063 if (SE->isKnownNonNegative(A1)) { 2064 if (SE->isKnownNonNegative(A2)) { 2065 // A1 >= 0 && A2 >= 0 2066 if (N1) { 2067 // make sure that c2 - c1 <= a1*N1 2068 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2069 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2070 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2071 ++SymbolicRDIVindependence; 2072 return true; 2073 } 2074 } 2075 if (N2) { 2076 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2077 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2078 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2079 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2080 ++SymbolicRDIVindependence; 2081 return true; 2082 } 2083 } 2084 } 2085 else if (SE->isKnownNonPositive(A2)) { 2086 // a1 >= 0 && a2 <= 0 2087 if (N1 && N2) { 2088 // make sure that c2 - c1 <= a1*N1 - a2*N2 2089 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2090 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2091 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2092 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2093 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2094 ++SymbolicRDIVindependence; 2095 return true; 2096 } 2097 } 2098 // make sure that 0 <= c2 - c1 2099 if (SE->isKnownNegative(C2_C1)) { 2100 ++SymbolicRDIVindependence; 2101 return true; 2102 } 2103 } 2104 } 2105 else if (SE->isKnownNonPositive(A1)) { 2106 if (SE->isKnownNonNegative(A2)) { 2107 // a1 <= 0 && a2 >= 0 2108 if (N1 && N2) { 2109 // make sure that a1*N1 - a2*N2 <= c2 - c1 2110 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2111 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2112 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2113 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2114 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2115 ++SymbolicRDIVindependence; 2116 return true; 2117 } 2118 } 2119 // make sure that c2 - c1 <= 0 2120 if (SE->isKnownPositive(C2_C1)) { 2121 ++SymbolicRDIVindependence; 2122 return true; 2123 } 2124 } 2125 else if (SE->isKnownNonPositive(A2)) { 2126 // a1 <= 0 && a2 <= 0 2127 if (N1) { 2128 // make sure that a1*N1 <= c2 - c1 2129 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2130 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2131 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2132 ++SymbolicRDIVindependence; 2133 return true; 2134 } 2135 } 2136 if (N2) { 2137 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2138 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2139 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2140 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2141 ++SymbolicRDIVindependence; 2142 return true; 2143 } 2144 } 2145 } 2146 } 2147 return false; 2148 } 2149 2150 2151 // testSIV - 2152 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2153 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2154 // a2 are constant, we attack it with an SIV test. While they can all be 2155 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2156 // they apply; they're cheaper and sometimes more precise. 2157 // 2158 // Return true if dependence disproved. 2159 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2160 FullDependence &Result, Constraint &NewConstraint, 2161 const SCEV *&SplitIter) const { 2162 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2163 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2164 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2165 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2166 if (SrcAddRec && DstAddRec) { 2167 const SCEV *SrcConst = SrcAddRec->getStart(); 2168 const SCEV *DstConst = DstAddRec->getStart(); 2169 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2170 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2171 const Loop *CurLoop = SrcAddRec->getLoop(); 2172 assert(CurLoop == DstAddRec->getLoop() && 2173 "both loops in SIV should be same"); 2174 Level = mapSrcLoop(CurLoop); 2175 bool disproven; 2176 if (SrcCoeff == DstCoeff) 2177 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2178 Level, Result, NewConstraint); 2179 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2180 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2181 Level, Result, NewConstraint, SplitIter); 2182 else 2183 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2184 Level, Result, NewConstraint); 2185 return disproven || 2186 gcdMIVtest(Src, Dst, Result) || 2187 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2188 } 2189 if (SrcAddRec) { 2190 const SCEV *SrcConst = SrcAddRec->getStart(); 2191 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2192 const SCEV *DstConst = Dst; 2193 const Loop *CurLoop = SrcAddRec->getLoop(); 2194 Level = mapSrcLoop(CurLoop); 2195 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2196 Level, Result, NewConstraint) || 2197 gcdMIVtest(Src, Dst, Result); 2198 } 2199 if (DstAddRec) { 2200 const SCEV *DstConst = DstAddRec->getStart(); 2201 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2202 const SCEV *SrcConst = Src; 2203 const Loop *CurLoop = DstAddRec->getLoop(); 2204 Level = mapDstLoop(CurLoop); 2205 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2206 CurLoop, Level, Result, NewConstraint) || 2207 gcdMIVtest(Src, Dst, Result); 2208 } 2209 llvm_unreachable("SIV test expected at least one AddRec"); 2210 return false; 2211 } 2212 2213 2214 // testRDIV - 2215 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2216 // where i and j are induction variables, c1 and c2 are loop invariant, 2217 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2218 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2219 // It doesn't make sense to talk about distance or direction in this case, 2220 // so there's no point in making special versions of the Strong SIV test or 2221 // the Weak-crossing SIV test. 2222 // 2223 // With minor algebra, this test can also be used for things like 2224 // [c1 + a1*i + a2*j][c2]. 2225 // 2226 // Return true if dependence disproved. 2227 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2228 FullDependence &Result) const { 2229 // we have 3 possible situations here: 2230 // 1) [a*i + b] and [c*j + d] 2231 // 2) [a*i + c*j + b] and [d] 2232 // 3) [b] and [a*i + c*j + d] 2233 // We need to find what we've got and get organized 2234 2235 const SCEV *SrcConst, *DstConst; 2236 const SCEV *SrcCoeff, *DstCoeff; 2237 const Loop *SrcLoop, *DstLoop; 2238 2239 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2240 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2241 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2242 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2243 if (SrcAddRec && DstAddRec) { 2244 SrcConst = SrcAddRec->getStart(); 2245 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2246 SrcLoop = SrcAddRec->getLoop(); 2247 DstConst = DstAddRec->getStart(); 2248 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2249 DstLoop = DstAddRec->getLoop(); 2250 } 2251 else if (SrcAddRec) { 2252 if (const SCEVAddRecExpr *tmpAddRec = 2253 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2254 SrcConst = tmpAddRec->getStart(); 2255 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2256 SrcLoop = tmpAddRec->getLoop(); 2257 DstConst = Dst; 2258 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2259 DstLoop = SrcAddRec->getLoop(); 2260 } 2261 else 2262 llvm_unreachable("RDIV reached by surprising SCEVs"); 2263 } 2264 else if (DstAddRec) { 2265 if (const SCEVAddRecExpr *tmpAddRec = 2266 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2267 DstConst = tmpAddRec->getStart(); 2268 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2269 DstLoop = tmpAddRec->getLoop(); 2270 SrcConst = Src; 2271 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2272 SrcLoop = DstAddRec->getLoop(); 2273 } 2274 else 2275 llvm_unreachable("RDIV reached by surprising SCEVs"); 2276 } 2277 else 2278 llvm_unreachable("RDIV expected at least one AddRec"); 2279 return exactRDIVtest(SrcCoeff, DstCoeff, 2280 SrcConst, DstConst, 2281 SrcLoop, DstLoop, 2282 Result) || 2283 gcdMIVtest(Src, Dst, Result) || 2284 symbolicRDIVtest(SrcCoeff, DstCoeff, 2285 SrcConst, DstConst, 2286 SrcLoop, DstLoop); 2287 } 2288 2289 2290 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2291 // Return true if dependence disproved. 2292 // Can sometimes refine direction vectors. 2293 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2294 const SmallBitVector &Loops, 2295 FullDependence &Result) const { 2296 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2297 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2298 Result.Consistent = false; 2299 return gcdMIVtest(Src, Dst, Result) || 2300 banerjeeMIVtest(Src, Dst, Loops, Result); 2301 } 2302 2303 2304 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2305 // in this case 10. If there is no constant part, returns NULL. 2306 static 2307 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2308 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2309 return Constant; 2310 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2311 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2312 return Constant; 2313 return nullptr; 2314 } 2315 2316 2317 //===----------------------------------------------------------------------===// 2318 // gcdMIVtest - 2319 // Tests an MIV subscript pair for dependence. 2320 // Returns true if any possible dependence is disproved. 2321 // Marks the result as inconsistent. 2322 // Can sometimes disprove the equal direction for 1 or more loops, 2323 // as discussed in Michael Wolfe's book, 2324 // High Performance Compilers for Parallel Computing, page 235. 2325 // 2326 // We spend some effort (code!) to handle cases like 2327 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2328 // but M and N are just loop-invariant variables. 2329 // This should help us handle linearized subscripts; 2330 // also makes this test a useful backup to the various SIV tests. 2331 // 2332 // It occurs to me that the presence of loop-invariant variables 2333 // changes the nature of the test from "greatest common divisor" 2334 // to "a common divisor". 2335 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2336 FullDependence &Result) const { 2337 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2338 ++GCDapplications; 2339 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2340 APInt RunningGCD = APInt::getNullValue(BitWidth); 2341 2342 // Examine Src coefficients. 2343 // Compute running GCD and record source constant. 2344 // Because we're looking for the constant at the end of the chain, 2345 // we can't quit the loop just because the GCD == 1. 2346 const SCEV *Coefficients = Src; 2347 while (const SCEVAddRecExpr *AddRec = 2348 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2349 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2350 // If the coefficient is the product of a constant and other stuff, 2351 // we can use the constant in the GCD computation. 2352 const auto *Constant = getConstantPart(Coeff); 2353 if (!Constant) 2354 return false; 2355 APInt ConstCoeff = Constant->getAPInt(); 2356 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2357 Coefficients = AddRec->getStart(); 2358 } 2359 const SCEV *SrcConst = Coefficients; 2360 2361 // Examine Dst coefficients. 2362 // Compute running GCD and record destination constant. 2363 // Because we're looking for the constant at the end of the chain, 2364 // we can't quit the loop just because the GCD == 1. 2365 Coefficients = Dst; 2366 while (const SCEVAddRecExpr *AddRec = 2367 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2368 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2369 // If the coefficient is the product of a constant and other stuff, 2370 // we can use the constant in the GCD computation. 2371 const auto *Constant = getConstantPart(Coeff); 2372 if (!Constant) 2373 return false; 2374 APInt ConstCoeff = Constant->getAPInt(); 2375 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2376 Coefficients = AddRec->getStart(); 2377 } 2378 const SCEV *DstConst = Coefficients; 2379 2380 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2381 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2382 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2383 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2384 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2385 // If Delta is a sum of products, we may be able to make further progress. 2386 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2387 const SCEV *Operand = Sum->getOperand(Op); 2388 if (isa<SCEVConstant>(Operand)) { 2389 assert(!Constant && "Surprised to find multiple constants"); 2390 Constant = cast<SCEVConstant>(Operand); 2391 } 2392 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2393 // Search for constant operand to participate in GCD; 2394 // If none found; return false. 2395 const SCEVConstant *ConstOp = getConstantPart(Product); 2396 if (!ConstOp) 2397 return false; 2398 APInt ConstOpValue = ConstOp->getAPInt(); 2399 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2400 ConstOpValue.abs()); 2401 } 2402 else 2403 return false; 2404 } 2405 } 2406 if (!Constant) 2407 return false; 2408 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2409 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2410 if (ConstDelta == 0) 2411 return false; 2412 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2413 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2414 APInt Remainder = ConstDelta.srem(RunningGCD); 2415 if (Remainder != 0) { 2416 ++GCDindependence; 2417 return true; 2418 } 2419 2420 // Try to disprove equal directions. 2421 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2422 // the code above can't disprove the dependence because the GCD = 1. 2423 // So we consider what happen if i = i' and what happens if j = j'. 2424 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2425 // which is infeasible, so we can disallow the = direction for the i level. 2426 // Setting j = j' doesn't help matters, so we end up with a direction vector 2427 // of [<>, *] 2428 // 2429 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2430 // we need to remember that the constant part is 5 and the RunningGCD should 2431 // be initialized to ExtraGCD = 30. 2432 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2433 2434 bool Improved = false; 2435 Coefficients = Src; 2436 while (const SCEVAddRecExpr *AddRec = 2437 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2438 Coefficients = AddRec->getStart(); 2439 const Loop *CurLoop = AddRec->getLoop(); 2440 RunningGCD = ExtraGCD; 2441 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2442 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2443 const SCEV *Inner = Src; 2444 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2445 AddRec = cast<SCEVAddRecExpr>(Inner); 2446 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2447 if (CurLoop == AddRec->getLoop()) 2448 ; // SrcCoeff == Coeff 2449 else { 2450 // If the coefficient is the product of a constant and other stuff, 2451 // we can use the constant in the GCD computation. 2452 Constant = getConstantPart(Coeff); 2453 if (!Constant) 2454 return false; 2455 APInt ConstCoeff = Constant->getAPInt(); 2456 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2457 } 2458 Inner = AddRec->getStart(); 2459 } 2460 Inner = Dst; 2461 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2462 AddRec = cast<SCEVAddRecExpr>(Inner); 2463 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2464 if (CurLoop == AddRec->getLoop()) 2465 DstCoeff = Coeff; 2466 else { 2467 // If the coefficient is the product of a constant and other stuff, 2468 // we can use the constant in the GCD computation. 2469 Constant = getConstantPart(Coeff); 2470 if (!Constant) 2471 return false; 2472 APInt ConstCoeff = Constant->getAPInt(); 2473 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2474 } 2475 Inner = AddRec->getStart(); 2476 } 2477 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2478 // If the coefficient is the product of a constant and other stuff, 2479 // we can use the constant in the GCD computation. 2480 Constant = getConstantPart(Delta); 2481 if (!Constant) 2482 // The difference of the two coefficients might not be a product 2483 // or constant, in which case we give up on this direction. 2484 continue; 2485 APInt ConstCoeff = Constant->getAPInt(); 2486 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2487 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2488 if (RunningGCD != 0) { 2489 Remainder = ConstDelta.srem(RunningGCD); 2490 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2491 if (Remainder != 0) { 2492 unsigned Level = mapSrcLoop(CurLoop); 2493 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2494 Improved = true; 2495 } 2496 } 2497 } 2498 if (Improved) 2499 ++GCDsuccesses; 2500 LLVM_DEBUG(dbgs() << "all done\n"); 2501 return false; 2502 } 2503 2504 2505 //===----------------------------------------------------------------------===// 2506 // banerjeeMIVtest - 2507 // Use Banerjee's Inequalities to test an MIV subscript pair. 2508 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2509 // Generally follows the discussion in Section 2.5.2 of 2510 // 2511 // Optimizing Supercompilers for Supercomputers 2512 // Michael Wolfe 2513 // 2514 // The inequalities given on page 25 are simplified in that loops are 2515 // normalized so that the lower bound is always 0 and the stride is always 1. 2516 // For example, Wolfe gives 2517 // 2518 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2519 // 2520 // where A_k is the coefficient of the kth index in the source subscript, 2521 // B_k is the coefficient of the kth index in the destination subscript, 2522 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2523 // index, and N_k is the stride of the kth index. Since all loops are normalized 2524 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2525 // equation to 2526 // 2527 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2528 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2529 // 2530 // Similar simplifications are possible for the other equations. 2531 // 2532 // When we can't determine the number of iterations for a loop, 2533 // we use NULL as an indicator for the worst case, infinity. 2534 // When computing the upper bound, NULL denotes +inf; 2535 // for the lower bound, NULL denotes -inf. 2536 // 2537 // Return true if dependence disproved. 2538 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2539 const SmallBitVector &Loops, 2540 FullDependence &Result) const { 2541 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2542 ++BanerjeeApplications; 2543 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2544 const SCEV *A0; 2545 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2546 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2547 const SCEV *B0; 2548 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2549 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2550 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2551 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2552 2553 // Compute bounds for all the * directions. 2554 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2555 for (unsigned K = 1; K <= MaxLevels; ++K) { 2556 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2557 Bound[K].Direction = Dependence::DVEntry::ALL; 2558 Bound[K].DirSet = Dependence::DVEntry::NONE; 2559 findBoundsALL(A, B, Bound, K); 2560 #ifndef NDEBUG 2561 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2562 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2563 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2564 else 2565 LLVM_DEBUG(dbgs() << "-inf\t"); 2566 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2567 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2568 else 2569 LLVM_DEBUG(dbgs() << "+inf\n"); 2570 #endif 2571 } 2572 2573 // Test the *, *, *, ... case. 2574 bool Disproved = false; 2575 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2576 // Explore the direction vector hierarchy. 2577 unsigned DepthExpanded = 0; 2578 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2579 Loops, DepthExpanded, Delta); 2580 if (NewDeps > 0) { 2581 bool Improved = false; 2582 for (unsigned K = 1; K <= CommonLevels; ++K) { 2583 if (Loops[K]) { 2584 unsigned Old = Result.DV[K - 1].Direction; 2585 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2586 Improved |= Old != Result.DV[K - 1].Direction; 2587 if (!Result.DV[K - 1].Direction) { 2588 Improved = false; 2589 Disproved = true; 2590 break; 2591 } 2592 } 2593 } 2594 if (Improved) 2595 ++BanerjeeSuccesses; 2596 } 2597 else { 2598 ++BanerjeeIndependence; 2599 Disproved = true; 2600 } 2601 } 2602 else { 2603 ++BanerjeeIndependence; 2604 Disproved = true; 2605 } 2606 delete [] Bound; 2607 delete [] A; 2608 delete [] B; 2609 return Disproved; 2610 } 2611 2612 2613 // Hierarchically expands the direction vector 2614 // search space, combining the directions of discovered dependences 2615 // in the DirSet field of Bound. Returns the number of distinct 2616 // dependences discovered. If the dependence is disproved, 2617 // it will return 0. 2618 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2619 CoefficientInfo *B, BoundInfo *Bound, 2620 const SmallBitVector &Loops, 2621 unsigned &DepthExpanded, 2622 const SCEV *Delta) const { 2623 if (Level > CommonLevels) { 2624 // record result 2625 LLVM_DEBUG(dbgs() << "\t["); 2626 for (unsigned K = 1; K <= CommonLevels; ++K) { 2627 if (Loops[K]) { 2628 Bound[K].DirSet |= Bound[K].Direction; 2629 #ifndef NDEBUG 2630 switch (Bound[K].Direction) { 2631 case Dependence::DVEntry::LT: 2632 LLVM_DEBUG(dbgs() << " <"); 2633 break; 2634 case Dependence::DVEntry::EQ: 2635 LLVM_DEBUG(dbgs() << " ="); 2636 break; 2637 case Dependence::DVEntry::GT: 2638 LLVM_DEBUG(dbgs() << " >"); 2639 break; 2640 case Dependence::DVEntry::ALL: 2641 LLVM_DEBUG(dbgs() << " *"); 2642 break; 2643 default: 2644 llvm_unreachable("unexpected Bound[K].Direction"); 2645 } 2646 #endif 2647 } 2648 } 2649 LLVM_DEBUG(dbgs() << " ]\n"); 2650 return 1; 2651 } 2652 if (Loops[Level]) { 2653 if (Level > DepthExpanded) { 2654 DepthExpanded = Level; 2655 // compute bounds for <, =, > at current level 2656 findBoundsLT(A, B, Bound, Level); 2657 findBoundsGT(A, B, Bound, Level); 2658 findBoundsEQ(A, B, Bound, Level); 2659 #ifndef NDEBUG 2660 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2661 LLVM_DEBUG(dbgs() << "\t <\t"); 2662 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2663 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2664 << '\t'); 2665 else 2666 LLVM_DEBUG(dbgs() << "-inf\t"); 2667 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2668 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2669 << '\n'); 2670 else 2671 LLVM_DEBUG(dbgs() << "+inf\n"); 2672 LLVM_DEBUG(dbgs() << "\t =\t"); 2673 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2674 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2675 << '\t'); 2676 else 2677 LLVM_DEBUG(dbgs() << "-inf\t"); 2678 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2679 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2680 << '\n'); 2681 else 2682 LLVM_DEBUG(dbgs() << "+inf\n"); 2683 LLVM_DEBUG(dbgs() << "\t >\t"); 2684 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2685 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2686 << '\t'); 2687 else 2688 LLVM_DEBUG(dbgs() << "-inf\t"); 2689 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2690 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2691 << '\n'); 2692 else 2693 LLVM_DEBUG(dbgs() << "+inf\n"); 2694 #endif 2695 } 2696 2697 unsigned NewDeps = 0; 2698 2699 // test bounds for <, *, *, ... 2700 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2701 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2702 Loops, DepthExpanded, Delta); 2703 2704 // Test bounds for =, *, *, ... 2705 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2706 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2707 Loops, DepthExpanded, Delta); 2708 2709 // test bounds for >, *, *, ... 2710 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2711 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2712 Loops, DepthExpanded, Delta); 2713 2714 Bound[Level].Direction = Dependence::DVEntry::ALL; 2715 return NewDeps; 2716 } 2717 else 2718 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2719 } 2720 2721 2722 // Returns true iff the current bounds are plausible. 2723 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2724 BoundInfo *Bound, const SCEV *Delta) const { 2725 Bound[Level].Direction = DirKind; 2726 if (const SCEV *LowerBound = getLowerBound(Bound)) 2727 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2728 return false; 2729 if (const SCEV *UpperBound = getUpperBound(Bound)) 2730 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2731 return false; 2732 return true; 2733 } 2734 2735 2736 // Computes the upper and lower bounds for level K 2737 // using the * direction. Records them in Bound. 2738 // Wolfe gives the equations 2739 // 2740 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2741 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2742 // 2743 // Since we normalize loops, we can simplify these equations to 2744 // 2745 // LB^*_k = (A^-_k - B^+_k)U_k 2746 // UB^*_k = (A^+_k - B^-_k)U_k 2747 // 2748 // We must be careful to handle the case where the upper bound is unknown. 2749 // Note that the lower bound is always <= 0 2750 // and the upper bound is always >= 0. 2751 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2752 BoundInfo *Bound, unsigned K) const { 2753 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2754 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2755 if (Bound[K].Iterations) { 2756 Bound[K].Lower[Dependence::DVEntry::ALL] = 2757 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2758 Bound[K].Iterations); 2759 Bound[K].Upper[Dependence::DVEntry::ALL] = 2760 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2761 Bound[K].Iterations); 2762 } 2763 else { 2764 // If the difference is 0, we won't need to know the number of iterations. 2765 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2766 Bound[K].Lower[Dependence::DVEntry::ALL] = 2767 SE->getZero(A[K].Coeff->getType()); 2768 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2769 Bound[K].Upper[Dependence::DVEntry::ALL] = 2770 SE->getZero(A[K].Coeff->getType()); 2771 } 2772 } 2773 2774 2775 // Computes the upper and lower bounds for level K 2776 // using the = direction. Records them in Bound. 2777 // Wolfe gives the equations 2778 // 2779 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2780 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2781 // 2782 // Since we normalize loops, we can simplify these equations to 2783 // 2784 // LB^=_k = (A_k - B_k)^- U_k 2785 // UB^=_k = (A_k - B_k)^+ U_k 2786 // 2787 // We must be careful to handle the case where the upper bound is unknown. 2788 // Note that the lower bound is always <= 0 2789 // and the upper bound is always >= 0. 2790 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2791 BoundInfo *Bound, unsigned K) const { 2792 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2793 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2794 if (Bound[K].Iterations) { 2795 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2796 const SCEV *NegativePart = getNegativePart(Delta); 2797 Bound[K].Lower[Dependence::DVEntry::EQ] = 2798 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2799 const SCEV *PositivePart = getPositivePart(Delta); 2800 Bound[K].Upper[Dependence::DVEntry::EQ] = 2801 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2802 } 2803 else { 2804 // If the positive/negative part of the difference is 0, 2805 // we won't need to know the number of iterations. 2806 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2807 const SCEV *NegativePart = getNegativePart(Delta); 2808 if (NegativePart->isZero()) 2809 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2810 const SCEV *PositivePart = getPositivePart(Delta); 2811 if (PositivePart->isZero()) 2812 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2813 } 2814 } 2815 2816 2817 // Computes the upper and lower bounds for level K 2818 // using the < direction. Records them in Bound. 2819 // Wolfe gives the equations 2820 // 2821 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2822 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2823 // 2824 // Since we normalize loops, we can simplify these equations to 2825 // 2826 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2827 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2828 // 2829 // We must be careful to handle the case where the upper bound is unknown. 2830 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2831 BoundInfo *Bound, unsigned K) const { 2832 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2833 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2834 if (Bound[K].Iterations) { 2835 const SCEV *Iter_1 = SE->getMinusSCEV( 2836 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2837 const SCEV *NegPart = 2838 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2839 Bound[K].Lower[Dependence::DVEntry::LT] = 2840 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2841 const SCEV *PosPart = 2842 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2843 Bound[K].Upper[Dependence::DVEntry::LT] = 2844 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2845 } 2846 else { 2847 // If the positive/negative part of the difference is 0, 2848 // we won't need to know the number of iterations. 2849 const SCEV *NegPart = 2850 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2851 if (NegPart->isZero()) 2852 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2853 const SCEV *PosPart = 2854 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2855 if (PosPart->isZero()) 2856 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2857 } 2858 } 2859 2860 2861 // Computes the upper and lower bounds for level K 2862 // using the > direction. Records them in Bound. 2863 // Wolfe gives the equations 2864 // 2865 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2866 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2867 // 2868 // Since we normalize loops, we can simplify these equations to 2869 // 2870 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2871 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2872 // 2873 // We must be careful to handle the case where the upper bound is unknown. 2874 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2875 BoundInfo *Bound, unsigned K) const { 2876 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2877 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2878 if (Bound[K].Iterations) { 2879 const SCEV *Iter_1 = SE->getMinusSCEV( 2880 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2881 const SCEV *NegPart = 2882 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2883 Bound[K].Lower[Dependence::DVEntry::GT] = 2884 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2885 const SCEV *PosPart = 2886 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2887 Bound[K].Upper[Dependence::DVEntry::GT] = 2888 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2889 } 2890 else { 2891 // If the positive/negative part of the difference is 0, 2892 // we won't need to know the number of iterations. 2893 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2894 if (NegPart->isZero()) 2895 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2896 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2897 if (PosPart->isZero()) 2898 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2899 } 2900 } 2901 2902 2903 // X^+ = max(X, 0) 2904 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2905 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2906 } 2907 2908 2909 // X^- = min(X, 0) 2910 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2911 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2912 } 2913 2914 2915 // Walks through the subscript, 2916 // collecting each coefficient, the associated loop bounds, 2917 // and recording its positive and negative parts for later use. 2918 DependenceInfo::CoefficientInfo * 2919 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2920 const SCEV *&Constant) const { 2921 const SCEV *Zero = SE->getZero(Subscript->getType()); 2922 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2923 for (unsigned K = 1; K <= MaxLevels; ++K) { 2924 CI[K].Coeff = Zero; 2925 CI[K].PosPart = Zero; 2926 CI[K].NegPart = Zero; 2927 CI[K].Iterations = nullptr; 2928 } 2929 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2930 const Loop *L = AddRec->getLoop(); 2931 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2932 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2933 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2934 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2935 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2936 Subscript = AddRec->getStart(); 2937 } 2938 Constant = Subscript; 2939 #ifndef NDEBUG 2940 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2941 for (unsigned K = 1; K <= MaxLevels; ++K) { 2942 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2943 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2944 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2945 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2946 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2947 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2948 if (CI[K].Iterations) 2949 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2950 else 2951 LLVM_DEBUG(dbgs() << "+inf"); 2952 LLVM_DEBUG(dbgs() << '\n'); 2953 } 2954 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2955 #endif 2956 return CI; 2957 } 2958 2959 2960 // Looks through all the bounds info and 2961 // computes the lower bound given the current direction settings 2962 // at each level. If the lower bound for any level is -inf, 2963 // the result is -inf. 2964 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2965 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2966 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2967 if (Bound[K].Lower[Bound[K].Direction]) 2968 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2969 else 2970 Sum = nullptr; 2971 } 2972 return Sum; 2973 } 2974 2975 2976 // Looks through all the bounds info and 2977 // computes the upper bound given the current direction settings 2978 // at each level. If the upper bound at any level is +inf, 2979 // the result is +inf. 2980 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2981 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2982 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2983 if (Bound[K].Upper[Bound[K].Direction]) 2984 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2985 else 2986 Sum = nullptr; 2987 } 2988 return Sum; 2989 } 2990 2991 2992 //===----------------------------------------------------------------------===// 2993 // Constraint manipulation for Delta test. 2994 2995 // Given a linear SCEV, 2996 // return the coefficient (the step) 2997 // corresponding to the specified loop. 2998 // If there isn't one, return 0. 2999 // For example, given a*i + b*j + c*k, finding the coefficient 3000 // corresponding to the j loop would yield b. 3001 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 3002 const Loop *TargetLoop) const { 3003 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3004 if (!AddRec) 3005 return SE->getZero(Expr->getType()); 3006 if (AddRec->getLoop() == TargetLoop) 3007 return AddRec->getStepRecurrence(*SE); 3008 return findCoefficient(AddRec->getStart(), TargetLoop); 3009 } 3010 3011 3012 // Given a linear SCEV, 3013 // return the SCEV given by zeroing out the coefficient 3014 // corresponding to the specified loop. 3015 // For example, given a*i + b*j + c*k, zeroing the coefficient 3016 // corresponding to the j loop would yield a*i + c*k. 3017 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3018 const Loop *TargetLoop) const { 3019 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3020 if (!AddRec) 3021 return Expr; // ignore 3022 if (AddRec->getLoop() == TargetLoop) 3023 return AddRec->getStart(); 3024 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3025 AddRec->getStepRecurrence(*SE), 3026 AddRec->getLoop(), 3027 AddRec->getNoWrapFlags()); 3028 } 3029 3030 3031 // Given a linear SCEV Expr, 3032 // return the SCEV given by adding some Value to the 3033 // coefficient corresponding to the specified TargetLoop. 3034 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3035 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3036 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3037 const Loop *TargetLoop, 3038 const SCEV *Value) const { 3039 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3040 if (!AddRec) // create a new addRec 3041 return SE->getAddRecExpr(Expr, 3042 Value, 3043 TargetLoop, 3044 SCEV::FlagAnyWrap); // Worst case, with no info. 3045 if (AddRec->getLoop() == TargetLoop) { 3046 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3047 if (Sum->isZero()) 3048 return AddRec->getStart(); 3049 return SE->getAddRecExpr(AddRec->getStart(), 3050 Sum, 3051 AddRec->getLoop(), 3052 AddRec->getNoWrapFlags()); 3053 } 3054 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3055 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3056 return SE->getAddRecExpr( 3057 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3058 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3059 AddRec->getNoWrapFlags()); 3060 } 3061 3062 3063 // Review the constraints, looking for opportunities 3064 // to simplify a subscript pair (Src and Dst). 3065 // Return true if some simplification occurs. 3066 // If the simplification isn't exact (that is, if it is conservative 3067 // in terms of dependence), set consistent to false. 3068 // Corresponds to Figure 5 from the paper 3069 // 3070 // Practical Dependence Testing 3071 // Goff, Kennedy, Tseng 3072 // PLDI 1991 3073 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3074 SmallBitVector &Loops, 3075 SmallVectorImpl<Constraint> &Constraints, 3076 bool &Consistent) { 3077 bool Result = false; 3078 for (unsigned LI : Loops.set_bits()) { 3079 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3080 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3081 if (Constraints[LI].isDistance()) 3082 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3083 else if (Constraints[LI].isLine()) 3084 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3085 else if (Constraints[LI].isPoint()) 3086 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3087 } 3088 return Result; 3089 } 3090 3091 3092 // Attempt to propagate a distance 3093 // constraint into a subscript pair (Src and Dst). 3094 // Return true if some simplification occurs. 3095 // If the simplification isn't exact (that is, if it is conservative 3096 // in terms of dependence), set consistent to false. 3097 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3098 Constraint &CurConstraint, 3099 bool &Consistent) { 3100 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3101 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3102 const SCEV *A_K = findCoefficient(Src, CurLoop); 3103 if (A_K->isZero()) 3104 return false; 3105 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3106 Src = SE->getMinusSCEV(Src, DA_K); 3107 Src = zeroCoefficient(Src, CurLoop); 3108 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3109 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3110 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3111 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3112 if (!findCoefficient(Dst, CurLoop)->isZero()) 3113 Consistent = false; 3114 return true; 3115 } 3116 3117 3118 // Attempt to propagate a line 3119 // constraint into a subscript pair (Src and Dst). 3120 // Return true if some simplification occurs. 3121 // If the simplification isn't exact (that is, if it is conservative 3122 // in terms of dependence), set consistent to false. 3123 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3124 Constraint &CurConstraint, 3125 bool &Consistent) { 3126 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3127 const SCEV *A = CurConstraint.getA(); 3128 const SCEV *B = CurConstraint.getB(); 3129 const SCEV *C = CurConstraint.getC(); 3130 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3131 << "\n"); 3132 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3133 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3134 if (A->isZero()) { 3135 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3136 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3137 if (!Bconst || !Cconst) return false; 3138 APInt Beta = Bconst->getAPInt(); 3139 APInt Charlie = Cconst->getAPInt(); 3140 APInt CdivB = Charlie.sdiv(Beta); 3141 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3142 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3143 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3144 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3145 Dst = zeroCoefficient(Dst, CurLoop); 3146 if (!findCoefficient(Src, CurLoop)->isZero()) 3147 Consistent = false; 3148 } 3149 else if (B->isZero()) { 3150 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3151 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3152 if (!Aconst || !Cconst) return false; 3153 APInt Alpha = Aconst->getAPInt(); 3154 APInt Charlie = Cconst->getAPInt(); 3155 APInt CdivA = Charlie.sdiv(Alpha); 3156 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3157 const SCEV *A_K = findCoefficient(Src, CurLoop); 3158 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3159 Src = zeroCoefficient(Src, CurLoop); 3160 if (!findCoefficient(Dst, CurLoop)->isZero()) 3161 Consistent = false; 3162 } 3163 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3164 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3165 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3166 if (!Aconst || !Cconst) return false; 3167 APInt Alpha = Aconst->getAPInt(); 3168 APInt Charlie = Cconst->getAPInt(); 3169 APInt CdivA = Charlie.sdiv(Alpha); 3170 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3171 const SCEV *A_K = findCoefficient(Src, CurLoop); 3172 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3173 Src = zeroCoefficient(Src, CurLoop); 3174 Dst = addToCoefficient(Dst, CurLoop, A_K); 3175 if (!findCoefficient(Dst, CurLoop)->isZero()) 3176 Consistent = false; 3177 } 3178 else { 3179 // paper is incorrect here, or perhaps just misleading 3180 const SCEV *A_K = findCoefficient(Src, CurLoop); 3181 Src = SE->getMulExpr(Src, A); 3182 Dst = SE->getMulExpr(Dst, A); 3183 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3184 Src = zeroCoefficient(Src, CurLoop); 3185 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3186 if (!findCoefficient(Dst, CurLoop)->isZero()) 3187 Consistent = false; 3188 } 3189 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3190 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3191 return true; 3192 } 3193 3194 3195 // Attempt to propagate a point 3196 // constraint into a subscript pair (Src and Dst). 3197 // Return true if some simplification occurs. 3198 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3199 Constraint &CurConstraint) { 3200 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3201 const SCEV *A_K = findCoefficient(Src, CurLoop); 3202 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3203 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3204 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3205 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3206 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3207 Src = zeroCoefficient(Src, CurLoop); 3208 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3209 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3210 Dst = zeroCoefficient(Dst, CurLoop); 3211 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3212 return true; 3213 } 3214 3215 3216 // Update direction vector entry based on the current constraint. 3217 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3218 const Constraint &CurConstraint) const { 3219 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3220 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3221 if (CurConstraint.isAny()) 3222 ; // use defaults 3223 else if (CurConstraint.isDistance()) { 3224 // this one is consistent, the others aren't 3225 Level.Scalar = false; 3226 Level.Distance = CurConstraint.getD(); 3227 unsigned NewDirection = Dependence::DVEntry::NONE; 3228 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3229 NewDirection = Dependence::DVEntry::EQ; 3230 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3231 NewDirection |= Dependence::DVEntry::LT; 3232 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3233 NewDirection |= Dependence::DVEntry::GT; 3234 Level.Direction &= NewDirection; 3235 } 3236 else if (CurConstraint.isLine()) { 3237 Level.Scalar = false; 3238 Level.Distance = nullptr; 3239 // direction should be accurate 3240 } 3241 else if (CurConstraint.isPoint()) { 3242 Level.Scalar = false; 3243 Level.Distance = nullptr; 3244 unsigned NewDirection = Dependence::DVEntry::NONE; 3245 if (!isKnownPredicate(CmpInst::ICMP_NE, 3246 CurConstraint.getY(), 3247 CurConstraint.getX())) 3248 // if X may be = Y 3249 NewDirection |= Dependence::DVEntry::EQ; 3250 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3251 CurConstraint.getY(), 3252 CurConstraint.getX())) 3253 // if Y may be > X 3254 NewDirection |= Dependence::DVEntry::LT; 3255 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3256 CurConstraint.getY(), 3257 CurConstraint.getX())) 3258 // if Y may be < X 3259 NewDirection |= Dependence::DVEntry::GT; 3260 Level.Direction &= NewDirection; 3261 } 3262 else 3263 llvm_unreachable("constraint has unexpected kind"); 3264 } 3265 3266 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3267 /// source and destination array references are recurrences on a nested loop, 3268 /// this function flattens the nested recurrences into separate recurrences 3269 /// for each loop level. 3270 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3271 SmallVectorImpl<Subscript> &Pair) { 3272 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3273 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3274 Value *SrcPtr = getLoadStorePointerOperand(Src); 3275 Value *DstPtr = getLoadStorePointerOperand(Dst); 3276 3277 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3278 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3279 3280 // Below code mimics the code in Delinearization.cpp 3281 const SCEV *SrcAccessFn = 3282 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3283 const SCEV *DstAccessFn = 3284 SE->getSCEVAtScope(DstPtr, DstLoop); 3285 3286 const SCEVUnknown *SrcBase = 3287 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3288 const SCEVUnknown *DstBase = 3289 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3290 3291 if (!SrcBase || !DstBase || SrcBase != DstBase) 3292 return false; 3293 3294 const SCEV *ElementSize = SE->getElementSize(Src); 3295 if (ElementSize != SE->getElementSize(Dst)) 3296 return false; 3297 3298 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3299 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3300 3301 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3302 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3303 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3304 return false; 3305 3306 // First step: collect parametric terms in both array references. 3307 SmallVector<const SCEV *, 4> Terms; 3308 SE->collectParametricTerms(SrcAR, Terms); 3309 SE->collectParametricTerms(DstAR, Terms); 3310 3311 // Second step: find subscript sizes. 3312 SmallVector<const SCEV *, 4> Sizes; 3313 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3314 3315 // Third step: compute the access functions for each subscript. 3316 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3317 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3318 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3319 3320 // Fail when there is only a subscript: that's a linearized access function. 3321 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3322 SrcSubscripts.size() != DstSubscripts.size()) 3323 return false; 3324 3325 int size = SrcSubscripts.size(); 3326 3327 // Statically check that the array bounds are in-range. The first subscript we 3328 // don't have a size for and it cannot overflow into another subscript, so is 3329 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3330 // and dst. 3331 // FIXME: It may be better to record these sizes and add them as constraints 3332 // to the dependency checks. 3333 if (!DisableDelinearizationChecks) 3334 for (int i = 1; i < size; ++i) { 3335 if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) 3336 return false; 3337 3338 if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) 3339 return false; 3340 3341 if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) 3342 return false; 3343 3344 if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) 3345 return false; 3346 } 3347 3348 LLVM_DEBUG({ 3349 dbgs() << "\nSrcSubscripts: "; 3350 for (int i = 0; i < size; i++) 3351 dbgs() << *SrcSubscripts[i]; 3352 dbgs() << "\nDstSubscripts: "; 3353 for (int i = 0; i < size; i++) 3354 dbgs() << *DstSubscripts[i]; 3355 }); 3356 3357 // The delinearization transforms a single-subscript MIV dependence test into 3358 // a multi-subscript SIV dependence test that is easier to compute. So we 3359 // resize Pair to contain as many pairs of subscripts as the delinearization 3360 // has found, and then initialize the pairs following the delinearization. 3361 Pair.resize(size); 3362 for (int i = 0; i < size; ++i) { 3363 Pair[i].Src = SrcSubscripts[i]; 3364 Pair[i].Dst = DstSubscripts[i]; 3365 unifySubscriptType(&Pair[i]); 3366 } 3367 3368 return true; 3369 } 3370 3371 //===----------------------------------------------------------------------===// 3372 3373 #ifndef NDEBUG 3374 // For debugging purposes, dump a small bit vector to dbgs(). 3375 static void dumpSmallBitVector(SmallBitVector &BV) { 3376 dbgs() << "{"; 3377 for (unsigned VI : BV.set_bits()) { 3378 dbgs() << VI; 3379 if (BV.find_next(VI) >= 0) 3380 dbgs() << ' '; 3381 } 3382 dbgs() << "}\n"; 3383 } 3384 #endif 3385 3386 bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA, 3387 FunctionAnalysisManager::Invalidator &Inv) { 3388 // Check if the analysis itself has been invalidated. 3389 auto PAC = PA.getChecker<DependenceAnalysis>(); 3390 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 3391 return true; 3392 3393 // Check transitive dependencies. 3394 return Inv.invalidate<AAManager>(F, PA) || 3395 Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 3396 Inv.invalidate<LoopAnalysis>(F, PA); 3397 } 3398 3399 // depends - 3400 // Returns NULL if there is no dependence. 3401 // Otherwise, return a Dependence with as many details as possible. 3402 // Corresponds to Section 3.1 in the paper 3403 // 3404 // Practical Dependence Testing 3405 // Goff, Kennedy, Tseng 3406 // PLDI 1991 3407 // 3408 // Care is required to keep the routine below, getSplitIteration(), 3409 // up to date with respect to this routine. 3410 std::unique_ptr<Dependence> 3411 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3412 bool PossiblyLoopIndependent) { 3413 if (Src == Dst) 3414 PossiblyLoopIndependent = false; 3415 3416 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3417 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3418 // if both instructions don't reference memory, there's no dependence 3419 return nullptr; 3420 3421 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3422 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3423 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3424 return std::make_unique<Dependence>(Src, Dst); 3425 } 3426 3427 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3428 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3429 Value *SrcPtr = getLoadStorePointerOperand(Src); 3430 Value *DstPtr = getLoadStorePointerOperand(Dst); 3431 3432 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3433 MemoryLocation::get(Dst), 3434 MemoryLocation::get(Src))) { 3435 case MayAlias: 3436 case PartialAlias: 3437 // cannot analyse objects if we don't understand their aliasing. 3438 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3439 return std::make_unique<Dependence>(Src, Dst); 3440 case NoAlias: 3441 // If the objects noalias, they are distinct, accesses are independent. 3442 LLVM_DEBUG(dbgs() << "no alias\n"); 3443 return nullptr; 3444 case MustAlias: 3445 break; // The underlying objects alias; test accesses for dependence. 3446 } 3447 3448 // establish loop nesting levels 3449 establishNestingLevels(Src, Dst); 3450 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3451 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3452 3453 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3454 ++TotalArrayPairs; 3455 3456 unsigned Pairs = 1; 3457 SmallVector<Subscript, 2> Pair(Pairs); 3458 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3459 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3460 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3461 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3462 Pair[0].Src = SrcSCEV; 3463 Pair[0].Dst = DstSCEV; 3464 3465 if (Delinearize) { 3466 if (tryDelinearize(Src, Dst, Pair)) { 3467 LLVM_DEBUG(dbgs() << " delinearized\n"); 3468 Pairs = Pair.size(); 3469 } 3470 } 3471 3472 for (unsigned P = 0; P < Pairs; ++P) { 3473 Pair[P].Loops.resize(MaxLevels + 1); 3474 Pair[P].GroupLoops.resize(MaxLevels + 1); 3475 Pair[P].Group.resize(Pairs); 3476 removeMatchingExtensions(&Pair[P]); 3477 Pair[P].Classification = 3478 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3479 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3480 Pair[P].Loops); 3481 Pair[P].GroupLoops = Pair[P].Loops; 3482 Pair[P].Group.set(P); 3483 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3484 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3485 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3486 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3487 LLVM_DEBUG(dbgs() << "\tloops = "); 3488 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3489 } 3490 3491 SmallBitVector Separable(Pairs); 3492 SmallBitVector Coupled(Pairs); 3493 3494 // Partition subscripts into separable and minimally-coupled groups 3495 // Algorithm in paper is algorithmically better; 3496 // this may be faster in practice. Check someday. 3497 // 3498 // Here's an example of how it works. Consider this code: 3499 // 3500 // for (i = ...) { 3501 // for (j = ...) { 3502 // for (k = ...) { 3503 // for (l = ...) { 3504 // for (m = ...) { 3505 // A[i][j][k][m] = ...; 3506 // ... = A[0][j][l][i + j]; 3507 // } 3508 // } 3509 // } 3510 // } 3511 // } 3512 // 3513 // There are 4 subscripts here: 3514 // 0 [i] and [0] 3515 // 1 [j] and [j] 3516 // 2 [k] and [l] 3517 // 3 [m] and [i + j] 3518 // 3519 // We've already classified each subscript pair as ZIV, SIV, etc., 3520 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3521 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3522 // and set Pair[P].Group = {P}. 3523 // 3524 // Src Dst Classification Loops GroupLoops Group 3525 // 0 [i] [0] SIV {1} {1} {0} 3526 // 1 [j] [j] SIV {2} {2} {1} 3527 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3528 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3529 // 3530 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3531 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3532 // 3533 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3534 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3535 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3536 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3537 // to either Separable or Coupled). 3538 // 3539 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3540 // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, 3541 // so Pair[3].Group = {0, 1, 3} and Done = false. 3542 // 3543 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3544 // Since Done remains true, we add 2 to the set of Separable pairs. 3545 // 3546 // Finally, we consider 3. There's nothing to compare it with, 3547 // so Done remains true and we add it to the Coupled set. 3548 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3549 // 3550 // In the end, we've got 1 separable subscript and 1 coupled group. 3551 for (unsigned SI = 0; SI < Pairs; ++SI) { 3552 if (Pair[SI].Classification == Subscript::NonLinear) { 3553 // ignore these, but collect loops for later 3554 ++NonlinearSubscriptPairs; 3555 collectCommonLoops(Pair[SI].Src, 3556 LI->getLoopFor(Src->getParent()), 3557 Pair[SI].Loops); 3558 collectCommonLoops(Pair[SI].Dst, 3559 LI->getLoopFor(Dst->getParent()), 3560 Pair[SI].Loops); 3561 Result.Consistent = false; 3562 } else if (Pair[SI].Classification == Subscript::ZIV) { 3563 // always separable 3564 Separable.set(SI); 3565 } 3566 else { 3567 // SIV, RDIV, or MIV, so check for coupled group 3568 bool Done = true; 3569 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3570 SmallBitVector Intersection = Pair[SI].GroupLoops; 3571 Intersection &= Pair[SJ].GroupLoops; 3572 if (Intersection.any()) { 3573 // accumulate set of all the loops in group 3574 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3575 // accumulate set of all subscripts in group 3576 Pair[SJ].Group |= Pair[SI].Group; 3577 Done = false; 3578 } 3579 } 3580 if (Done) { 3581 if (Pair[SI].Group.count() == 1) { 3582 Separable.set(SI); 3583 ++SeparableSubscriptPairs; 3584 } 3585 else { 3586 Coupled.set(SI); 3587 ++CoupledSubscriptPairs; 3588 } 3589 } 3590 } 3591 } 3592 3593 LLVM_DEBUG(dbgs() << " Separable = "); 3594 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3595 LLVM_DEBUG(dbgs() << " Coupled = "); 3596 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3597 3598 Constraint NewConstraint; 3599 NewConstraint.setAny(SE); 3600 3601 // test separable subscripts 3602 for (unsigned SI : Separable.set_bits()) { 3603 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3604 switch (Pair[SI].Classification) { 3605 case Subscript::ZIV: 3606 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3607 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3608 return nullptr; 3609 break; 3610 case Subscript::SIV: { 3611 LLVM_DEBUG(dbgs() << ", SIV\n"); 3612 unsigned Level; 3613 const SCEV *SplitIter = nullptr; 3614 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3615 SplitIter)) 3616 return nullptr; 3617 break; 3618 } 3619 case Subscript::RDIV: 3620 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3621 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3622 return nullptr; 3623 break; 3624 case Subscript::MIV: 3625 LLVM_DEBUG(dbgs() << ", MIV\n"); 3626 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3627 return nullptr; 3628 break; 3629 default: 3630 llvm_unreachable("subscript has unexpected classification"); 3631 } 3632 } 3633 3634 if (Coupled.count()) { 3635 // test coupled subscript groups 3636 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3637 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3638 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3639 for (unsigned II = 0; II <= MaxLevels; ++II) 3640 Constraints[II].setAny(SE); 3641 for (unsigned SI : Coupled.set_bits()) { 3642 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3643 SmallBitVector Group(Pair[SI].Group); 3644 SmallBitVector Sivs(Pairs); 3645 SmallBitVector Mivs(Pairs); 3646 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3647 SmallVector<Subscript *, 4> PairsInGroup; 3648 for (unsigned SJ : Group.set_bits()) { 3649 LLVM_DEBUG(dbgs() << SJ << " "); 3650 if (Pair[SJ].Classification == Subscript::SIV) 3651 Sivs.set(SJ); 3652 else 3653 Mivs.set(SJ); 3654 PairsInGroup.push_back(&Pair[SJ]); 3655 } 3656 unifySubscriptType(PairsInGroup); 3657 LLVM_DEBUG(dbgs() << "}\n"); 3658 while (Sivs.any()) { 3659 bool Changed = false; 3660 for (unsigned SJ : Sivs.set_bits()) { 3661 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3662 // SJ is an SIV subscript that's part of the current coupled group 3663 unsigned Level; 3664 const SCEV *SplitIter = nullptr; 3665 LLVM_DEBUG(dbgs() << "SIV\n"); 3666 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3667 SplitIter)) 3668 return nullptr; 3669 ConstrainedLevels.set(Level); 3670 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3671 if (Constraints[Level].isEmpty()) { 3672 ++DeltaIndependence; 3673 return nullptr; 3674 } 3675 Changed = true; 3676 } 3677 Sivs.reset(SJ); 3678 } 3679 if (Changed) { 3680 // propagate, possibly creating new SIVs and ZIVs 3681 LLVM_DEBUG(dbgs() << " propagating\n"); 3682 LLVM_DEBUG(dbgs() << "\tMivs = "); 3683 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3684 for (unsigned SJ : Mivs.set_bits()) { 3685 // SJ is an MIV subscript that's part of the current coupled group 3686 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3687 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3688 Constraints, Result.Consistent)) { 3689 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3690 ++DeltaPropagations; 3691 Pair[SJ].Classification = 3692 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3693 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3694 Pair[SJ].Loops); 3695 switch (Pair[SJ].Classification) { 3696 case Subscript::ZIV: 3697 LLVM_DEBUG(dbgs() << "ZIV\n"); 3698 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3699 return nullptr; 3700 Mivs.reset(SJ); 3701 break; 3702 case Subscript::SIV: 3703 Sivs.set(SJ); 3704 Mivs.reset(SJ); 3705 break; 3706 case Subscript::RDIV: 3707 case Subscript::MIV: 3708 break; 3709 default: 3710 llvm_unreachable("bad subscript classification"); 3711 } 3712 } 3713 } 3714 } 3715 } 3716 3717 // test & propagate remaining RDIVs 3718 for (unsigned SJ : Mivs.set_bits()) { 3719 if (Pair[SJ].Classification == Subscript::RDIV) { 3720 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3721 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3722 return nullptr; 3723 // I don't yet understand how to propagate RDIV results 3724 Mivs.reset(SJ); 3725 } 3726 } 3727 3728 // test remaining MIVs 3729 // This code is temporary. 3730 // Better to somehow test all remaining subscripts simultaneously. 3731 for (unsigned SJ : Mivs.set_bits()) { 3732 if (Pair[SJ].Classification == Subscript::MIV) { 3733 LLVM_DEBUG(dbgs() << "MIV test\n"); 3734 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3735 return nullptr; 3736 } 3737 else 3738 llvm_unreachable("expected only MIV subscripts at this point"); 3739 } 3740 3741 // update Result.DV from constraint vector 3742 LLVM_DEBUG(dbgs() << " updating\n"); 3743 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3744 if (SJ > CommonLevels) 3745 break; 3746 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3747 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3748 return nullptr; 3749 } 3750 } 3751 } 3752 3753 // Make sure the Scalar flags are set correctly. 3754 SmallBitVector CompleteLoops(MaxLevels + 1); 3755 for (unsigned SI = 0; SI < Pairs; ++SI) 3756 CompleteLoops |= Pair[SI].Loops; 3757 for (unsigned II = 1; II <= CommonLevels; ++II) 3758 if (CompleteLoops[II]) 3759 Result.DV[II - 1].Scalar = false; 3760 3761 if (PossiblyLoopIndependent) { 3762 // Make sure the LoopIndependent flag is set correctly. 3763 // All directions must include equal, otherwise no 3764 // loop-independent dependence is possible. 3765 for (unsigned II = 1; II <= CommonLevels; ++II) { 3766 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3767 Result.LoopIndependent = false; 3768 break; 3769 } 3770 } 3771 } 3772 else { 3773 // On the other hand, if all directions are equal and there's no 3774 // loop-independent dependence possible, then no dependence exists. 3775 bool AllEqual = true; 3776 for (unsigned II = 1; II <= CommonLevels; ++II) { 3777 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3778 AllEqual = false; 3779 break; 3780 } 3781 } 3782 if (AllEqual) 3783 return nullptr; 3784 } 3785 3786 return std::make_unique<FullDependence>(std::move(Result)); 3787 } 3788 3789 3790 3791 //===----------------------------------------------------------------------===// 3792 // getSplitIteration - 3793 // Rather than spend rarely-used space recording the splitting iteration 3794 // during the Weak-Crossing SIV test, we re-compute it on demand. 3795 // The re-computation is basically a repeat of the entire dependence test, 3796 // though simplified since we know that the dependence exists. 3797 // It's tedious, since we must go through all propagations, etc. 3798 // 3799 // Care is required to keep this code up to date with respect to the routine 3800 // above, depends(). 3801 // 3802 // Generally, the dependence analyzer will be used to build 3803 // a dependence graph for a function (basically a map from instructions 3804 // to dependences). Looking for cycles in the graph shows us loops 3805 // that cannot be trivially vectorized/parallelized. 3806 // 3807 // We can try to improve the situation by examining all the dependences 3808 // that make up the cycle, looking for ones we can break. 3809 // Sometimes, peeling the first or last iteration of a loop will break 3810 // dependences, and we've got flags for those possibilities. 3811 // Sometimes, splitting a loop at some other iteration will do the trick, 3812 // and we've got a flag for that case. Rather than waste the space to 3813 // record the exact iteration (since we rarely know), we provide 3814 // a method that calculates the iteration. It's a drag that it must work 3815 // from scratch, but wonderful in that it's possible. 3816 // 3817 // Here's an example: 3818 // 3819 // for (i = 0; i < 10; i++) 3820 // A[i] = ... 3821 // ... = A[11 - i] 3822 // 3823 // There's a loop-carried flow dependence from the store to the load, 3824 // found by the weak-crossing SIV test. The dependence will have a flag, 3825 // indicating that the dependence can be broken by splitting the loop. 3826 // Calling getSplitIteration will return 5. 3827 // Splitting the loop breaks the dependence, like so: 3828 // 3829 // for (i = 0; i <= 5; i++) 3830 // A[i] = ... 3831 // ... = A[11 - i] 3832 // for (i = 6; i < 10; i++) 3833 // A[i] = ... 3834 // ... = A[11 - i] 3835 // 3836 // breaks the dependence and allows us to vectorize/parallelize 3837 // both loops. 3838 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3839 unsigned SplitLevel) { 3840 assert(Dep.isSplitable(SplitLevel) && 3841 "Dep should be splitable at SplitLevel"); 3842 Instruction *Src = Dep.getSrc(); 3843 Instruction *Dst = Dep.getDst(); 3844 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3845 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3846 assert(isLoadOrStore(Src)); 3847 assert(isLoadOrStore(Dst)); 3848 Value *SrcPtr = getLoadStorePointerOperand(Src); 3849 Value *DstPtr = getLoadStorePointerOperand(Dst); 3850 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3851 MemoryLocation::get(Dst), 3852 MemoryLocation::get(Src)) == MustAlias); 3853 3854 // establish loop nesting levels 3855 establishNestingLevels(Src, Dst); 3856 3857 FullDependence Result(Src, Dst, false, CommonLevels); 3858 3859 unsigned Pairs = 1; 3860 SmallVector<Subscript, 2> Pair(Pairs); 3861 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3862 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3863 Pair[0].Src = SrcSCEV; 3864 Pair[0].Dst = DstSCEV; 3865 3866 if (Delinearize) { 3867 if (tryDelinearize(Src, Dst, Pair)) { 3868 LLVM_DEBUG(dbgs() << " delinearized\n"); 3869 Pairs = Pair.size(); 3870 } 3871 } 3872 3873 for (unsigned P = 0; P < Pairs; ++P) { 3874 Pair[P].Loops.resize(MaxLevels + 1); 3875 Pair[P].GroupLoops.resize(MaxLevels + 1); 3876 Pair[P].Group.resize(Pairs); 3877 removeMatchingExtensions(&Pair[P]); 3878 Pair[P].Classification = 3879 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3880 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3881 Pair[P].Loops); 3882 Pair[P].GroupLoops = Pair[P].Loops; 3883 Pair[P].Group.set(P); 3884 } 3885 3886 SmallBitVector Separable(Pairs); 3887 SmallBitVector Coupled(Pairs); 3888 3889 // partition subscripts into separable and minimally-coupled groups 3890 for (unsigned SI = 0; SI < Pairs; ++SI) { 3891 if (Pair[SI].Classification == Subscript::NonLinear) { 3892 // ignore these, but collect loops for later 3893 collectCommonLoops(Pair[SI].Src, 3894 LI->getLoopFor(Src->getParent()), 3895 Pair[SI].Loops); 3896 collectCommonLoops(Pair[SI].Dst, 3897 LI->getLoopFor(Dst->getParent()), 3898 Pair[SI].Loops); 3899 Result.Consistent = false; 3900 } 3901 else if (Pair[SI].Classification == Subscript::ZIV) 3902 Separable.set(SI); 3903 else { 3904 // SIV, RDIV, or MIV, so check for coupled group 3905 bool Done = true; 3906 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3907 SmallBitVector Intersection = Pair[SI].GroupLoops; 3908 Intersection &= Pair[SJ].GroupLoops; 3909 if (Intersection.any()) { 3910 // accumulate set of all the loops in group 3911 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3912 // accumulate set of all subscripts in group 3913 Pair[SJ].Group |= Pair[SI].Group; 3914 Done = false; 3915 } 3916 } 3917 if (Done) { 3918 if (Pair[SI].Group.count() == 1) 3919 Separable.set(SI); 3920 else 3921 Coupled.set(SI); 3922 } 3923 } 3924 } 3925 3926 Constraint NewConstraint; 3927 NewConstraint.setAny(SE); 3928 3929 // test separable subscripts 3930 for (unsigned SI : Separable.set_bits()) { 3931 switch (Pair[SI].Classification) { 3932 case Subscript::SIV: { 3933 unsigned Level; 3934 const SCEV *SplitIter = nullptr; 3935 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3936 Result, NewConstraint, SplitIter); 3937 if (Level == SplitLevel) { 3938 assert(SplitIter != nullptr); 3939 return SplitIter; 3940 } 3941 break; 3942 } 3943 case Subscript::ZIV: 3944 case Subscript::RDIV: 3945 case Subscript::MIV: 3946 break; 3947 default: 3948 llvm_unreachable("subscript has unexpected classification"); 3949 } 3950 } 3951 3952 if (Coupled.count()) { 3953 // test coupled subscript groups 3954 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3955 for (unsigned II = 0; II <= MaxLevels; ++II) 3956 Constraints[II].setAny(SE); 3957 for (unsigned SI : Coupled.set_bits()) { 3958 SmallBitVector Group(Pair[SI].Group); 3959 SmallBitVector Sivs(Pairs); 3960 SmallBitVector Mivs(Pairs); 3961 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3962 for (unsigned SJ : Group.set_bits()) { 3963 if (Pair[SJ].Classification == Subscript::SIV) 3964 Sivs.set(SJ); 3965 else 3966 Mivs.set(SJ); 3967 } 3968 while (Sivs.any()) { 3969 bool Changed = false; 3970 for (unsigned SJ : Sivs.set_bits()) { 3971 // SJ is an SIV subscript that's part of the current coupled group 3972 unsigned Level; 3973 const SCEV *SplitIter = nullptr; 3974 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3975 Result, NewConstraint, SplitIter); 3976 if (Level == SplitLevel && SplitIter) 3977 return SplitIter; 3978 ConstrainedLevels.set(Level); 3979 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3980 Changed = true; 3981 Sivs.reset(SJ); 3982 } 3983 if (Changed) { 3984 // propagate, possibly creating new SIVs and ZIVs 3985 for (unsigned SJ : Mivs.set_bits()) { 3986 // SJ is an MIV subscript that's part of the current coupled group 3987 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3988 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3989 Pair[SJ].Classification = 3990 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3991 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3992 Pair[SJ].Loops); 3993 switch (Pair[SJ].Classification) { 3994 case Subscript::ZIV: 3995 Mivs.reset(SJ); 3996 break; 3997 case Subscript::SIV: 3998 Sivs.set(SJ); 3999 Mivs.reset(SJ); 4000 break; 4001 case Subscript::RDIV: 4002 case Subscript::MIV: 4003 break; 4004 default: 4005 llvm_unreachable("bad subscript classification"); 4006 } 4007 } 4008 } 4009 } 4010 } 4011 } 4012 } 4013 llvm_unreachable("somehow reached end of routine"); 4014 return nullptr; 4015 } 4016