1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // Since Clang linearizes some array subscripts, the dependence 28 // analysis is using SCEV->delinearize to recover the representation of multiple 29 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 30 // delinearization is controlled by the flag -da-delinearize. 31 // 32 // We should pay some careful attention to the possibility of integer overflow 33 // in the implementation of the various tests. This could happen with Add, 34 // Subtract, or Multiply, with both APInt's and SCEV's. 35 // 36 // Some non-linear subscript pairs can be handled by the GCD test 37 // (and perhaps other tests). 38 // Should explore how often these things occur. 39 // 40 // Finally, it seems like certain test cases expose weaknesses in the SCEV 41 // simplification, especially in the handling of sign and zero extensions. 42 // It could be useful to spend time exploring these. 43 // 44 // Please note that this is work in progress and the interface is subject to 45 // change. 46 // 47 //===----------------------------------------------------------------------===// 48 // // 49 // In memory of Ken Kennedy, 1945 - 2007 // 50 // // 51 //===----------------------------------------------------------------------===// 52 53 #include "llvm/Analysis/DependenceAnalysis.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/Analysis/AliasAnalysis.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/Config/llvm-config.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 DependenceAnalysis::Result 118 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 119 auto &AA = FAM.getResult<AAManager>(F); 120 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 121 auto &LI = FAM.getResult<LoopAnalysis>(F); 122 return DependenceInfo(&F, &AA, &SE, &LI); 123 } 124 125 AnalysisKey DependenceAnalysis::Key; 126 127 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 128 "Dependence Analysis", true, true) 129 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 132 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 133 true, true) 134 135 char DependenceAnalysisWrapperPass::ID = 0; 136 137 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 138 return new DependenceAnalysisWrapperPass(); 139 } 140 141 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 142 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 143 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 144 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 145 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 146 return false; 147 } 148 149 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 150 151 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 152 153 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 154 AU.setPreservesAll(); 155 AU.addRequiredTransitive<AAResultsWrapperPass>(); 156 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 157 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 158 } 159 160 161 // Used to test the dependence analyzer. 162 // Looks through the function, noting loads and stores. 163 // Calls depends() on every possible pair and prints out the result. 164 // Ignores all other instructions. 165 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 166 auto *F = DA->getFunction(); 167 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 168 ++SrcI) { 169 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 170 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 171 DstI != DstE; ++DstI) { 172 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 173 OS << "da analyze - "; 174 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 175 D->dump(OS); 176 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 177 if (D->isSplitable(Level)) { 178 OS << "da analyze - split level = " << Level; 179 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 180 OS << "!\n"; 181 } 182 } 183 } 184 else 185 OS << "none!\n"; 186 } 187 } 188 } 189 } 190 } 191 192 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 193 const Module *) const { 194 dumpExampleDependence(OS, info.get()); 195 } 196 197 PreservedAnalyses 198 DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { 199 OS << "'Dependence Analysis' for function '" << F.getName() << "':\n"; 200 dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F)); 201 return PreservedAnalyses::all(); 202 } 203 204 //===----------------------------------------------------------------------===// 205 // Dependence methods 206 207 // Returns true if this is an input dependence. 208 bool Dependence::isInput() const { 209 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 210 } 211 212 213 // Returns true if this is an output dependence. 214 bool Dependence::isOutput() const { 215 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 216 } 217 218 219 // Returns true if this is an flow (aka true) dependence. 220 bool Dependence::isFlow() const { 221 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 222 } 223 224 225 // Returns true if this is an anti dependence. 226 bool Dependence::isAnti() const { 227 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 228 } 229 230 231 // Returns true if a particular level is scalar; that is, 232 // if no subscript in the source or destination mention the induction 233 // variable associated with the loop at this level. 234 // Leave this out of line, so it will serve as a virtual method anchor 235 bool Dependence::isScalar(unsigned level) const { 236 return false; 237 } 238 239 240 //===----------------------------------------------------------------------===// 241 // FullDependence methods 242 243 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 244 bool PossiblyLoopIndependent, 245 unsigned CommonLevels) 246 : Dependence(Source, Destination), Levels(CommonLevels), 247 LoopIndependent(PossiblyLoopIndependent) { 248 Consistent = true; 249 if (CommonLevels) 250 DV = make_unique<DVEntry[]>(CommonLevels); 251 } 252 253 // The rest are simple getters that hide the implementation. 254 255 // getDirection - Returns the direction associated with a particular level. 256 unsigned FullDependence::getDirection(unsigned Level) const { 257 assert(0 < Level && Level <= Levels && "Level out of range"); 258 return DV[Level - 1].Direction; 259 } 260 261 262 // Returns the distance (or NULL) associated with a particular level. 263 const SCEV *FullDependence::getDistance(unsigned Level) const { 264 assert(0 < Level && Level <= Levels && "Level out of range"); 265 return DV[Level - 1].Distance; 266 } 267 268 269 // Returns true if a particular level is scalar; that is, 270 // if no subscript in the source or destination mention the induction 271 // variable associated with the loop at this level. 272 bool FullDependence::isScalar(unsigned Level) const { 273 assert(0 < Level && Level <= Levels && "Level out of range"); 274 return DV[Level - 1].Scalar; 275 } 276 277 278 // Returns true if peeling the first iteration from this loop 279 // will break this dependence. 280 bool FullDependence::isPeelFirst(unsigned Level) const { 281 assert(0 < Level && Level <= Levels && "Level out of range"); 282 return DV[Level - 1].PeelFirst; 283 } 284 285 286 // Returns true if peeling the last iteration from this loop 287 // will break this dependence. 288 bool FullDependence::isPeelLast(unsigned Level) const { 289 assert(0 < Level && Level <= Levels && "Level out of range"); 290 return DV[Level - 1].PeelLast; 291 } 292 293 294 // Returns true if splitting this loop will break the dependence. 295 bool FullDependence::isSplitable(unsigned Level) const { 296 assert(0 < Level && Level <= Levels && "Level out of range"); 297 return DV[Level - 1].Splitable; 298 } 299 300 301 //===----------------------------------------------------------------------===// 302 // DependenceInfo::Constraint methods 303 304 // If constraint is a point <X, Y>, returns X. 305 // Otherwise assert. 306 const SCEV *DependenceInfo::Constraint::getX() const { 307 assert(Kind == Point && "Kind should be Point"); 308 return A; 309 } 310 311 312 // If constraint is a point <X, Y>, returns Y. 313 // Otherwise assert. 314 const SCEV *DependenceInfo::Constraint::getY() const { 315 assert(Kind == Point && "Kind should be Point"); 316 return B; 317 } 318 319 320 // If constraint is a line AX + BY = C, returns A. 321 // Otherwise assert. 322 const SCEV *DependenceInfo::Constraint::getA() const { 323 assert((Kind == Line || Kind == Distance) && 324 "Kind should be Line (or Distance)"); 325 return A; 326 } 327 328 329 // If constraint is a line AX + BY = C, returns B. 330 // Otherwise assert. 331 const SCEV *DependenceInfo::Constraint::getB() const { 332 assert((Kind == Line || Kind == Distance) && 333 "Kind should be Line (or Distance)"); 334 return B; 335 } 336 337 338 // If constraint is a line AX + BY = C, returns C. 339 // Otherwise assert. 340 const SCEV *DependenceInfo::Constraint::getC() const { 341 assert((Kind == Line || Kind == Distance) && 342 "Kind should be Line (or Distance)"); 343 return C; 344 } 345 346 347 // If constraint is a distance, returns D. 348 // Otherwise assert. 349 const SCEV *DependenceInfo::Constraint::getD() const { 350 assert(Kind == Distance && "Kind should be Distance"); 351 return SE->getNegativeSCEV(C); 352 } 353 354 355 // Returns the loop associated with this constraint. 356 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 357 assert((Kind == Distance || Kind == Line || Kind == Point) && 358 "Kind should be Distance, Line, or Point"); 359 return AssociatedLoop; 360 } 361 362 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 363 const Loop *CurLoop) { 364 Kind = Point; 365 A = X; 366 B = Y; 367 AssociatedLoop = CurLoop; 368 } 369 370 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 371 const SCEV *CC, const Loop *CurLoop) { 372 Kind = Line; 373 A = AA; 374 B = BB; 375 C = CC; 376 AssociatedLoop = CurLoop; 377 } 378 379 void DependenceInfo::Constraint::setDistance(const SCEV *D, 380 const Loop *CurLoop) { 381 Kind = Distance; 382 A = SE->getOne(D->getType()); 383 B = SE->getNegativeSCEV(A); 384 C = SE->getNegativeSCEV(D); 385 AssociatedLoop = CurLoop; 386 } 387 388 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 389 390 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 391 SE = NewSE; 392 Kind = Any; 393 } 394 395 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 396 // For debugging purposes. Dumps the constraint out to OS. 397 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 398 if (isEmpty()) 399 OS << " Empty\n"; 400 else if (isAny()) 401 OS << " Any\n"; 402 else if (isPoint()) 403 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 404 else if (isDistance()) 405 OS << " Distance is " << *getD() << 406 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 407 else if (isLine()) 408 OS << " Line is " << *getA() << "*X + " << 409 *getB() << "*Y = " << *getC() << "\n"; 410 else 411 llvm_unreachable("unknown constraint type in Constraint::dump"); 412 } 413 #endif 414 415 416 // Updates X with the intersection 417 // of the Constraints X and Y. Returns true if X has changed. 418 // Corresponds to Figure 4 from the paper 419 // 420 // Practical Dependence Testing 421 // Goff, Kennedy, Tseng 422 // PLDI 1991 423 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 424 ++DeltaApplications; 425 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 426 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 427 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 428 assert(!Y->isPoint() && "Y must not be a Point"); 429 if (X->isAny()) { 430 if (Y->isAny()) 431 return false; 432 *X = *Y; 433 return true; 434 } 435 if (X->isEmpty()) 436 return false; 437 if (Y->isEmpty()) { 438 X->setEmpty(); 439 return true; 440 } 441 442 if (X->isDistance() && Y->isDistance()) { 443 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 444 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 445 return false; 446 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 447 X->setEmpty(); 448 ++DeltaSuccesses; 449 return true; 450 } 451 // Hmmm, interesting situation. 452 // I guess if either is constant, keep it and ignore the other. 453 if (isa<SCEVConstant>(Y->getD())) { 454 *X = *Y; 455 return true; 456 } 457 return false; 458 } 459 460 // At this point, the pseudo-code in Figure 4 of the paper 461 // checks if (X->isPoint() && Y->isPoint()). 462 // This case can't occur in our implementation, 463 // since a Point can only arise as the result of intersecting 464 // two Line constraints, and the right-hand value, Y, is never 465 // the result of an intersection. 466 assert(!(X->isPoint() && Y->isPoint()) && 467 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 468 469 if (X->isLine() && Y->isLine()) { 470 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 471 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 472 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 473 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 474 // slopes are equal, so lines are parallel 475 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 476 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 477 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 478 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 479 return false; 480 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 481 X->setEmpty(); 482 ++DeltaSuccesses; 483 return true; 484 } 485 return false; 486 } 487 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 488 // slopes differ, so lines intersect 489 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 490 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 491 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 492 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 493 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 494 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 495 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 496 const SCEVConstant *C1A2_C2A1 = 497 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 498 const SCEVConstant *C1B2_C2B1 = 499 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 500 const SCEVConstant *A1B2_A2B1 = 501 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 502 const SCEVConstant *A2B1_A1B2 = 503 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 504 if (!C1B2_C2B1 || !C1A2_C2A1 || 505 !A1B2_A2B1 || !A2B1_A1B2) 506 return false; 507 APInt Xtop = C1B2_C2B1->getAPInt(); 508 APInt Xbot = A1B2_A2B1->getAPInt(); 509 APInt Ytop = C1A2_C2A1->getAPInt(); 510 APInt Ybot = A2B1_A1B2->getAPInt(); 511 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 512 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 513 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 514 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 515 APInt Xq = Xtop; // these need to be initialized, even 516 APInt Xr = Xtop; // though they're just going to be overwritten 517 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 518 APInt Yq = Ytop; 519 APInt Yr = Ytop; 520 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 521 if (Xr != 0 || Yr != 0) { 522 X->setEmpty(); 523 ++DeltaSuccesses; 524 return true; 525 } 526 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 527 if (Xq.slt(0) || Yq.slt(0)) { 528 X->setEmpty(); 529 ++DeltaSuccesses; 530 return true; 531 } 532 if (const SCEVConstant *CUB = 533 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 534 const APInt &UpperBound = CUB->getAPInt(); 535 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 536 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 537 X->setEmpty(); 538 ++DeltaSuccesses; 539 return true; 540 } 541 } 542 X->setPoint(SE->getConstant(Xq), 543 SE->getConstant(Yq), 544 X->getAssociatedLoop()); 545 ++DeltaSuccesses; 546 return true; 547 } 548 return false; 549 } 550 551 // if (X->isLine() && Y->isPoint()) This case can't occur. 552 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 553 554 if (X->isPoint() && Y->isLine()) { 555 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 556 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 557 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 558 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 559 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 560 return false; 561 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 562 X->setEmpty(); 563 ++DeltaSuccesses; 564 return true; 565 } 566 return false; 567 } 568 569 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 570 return false; 571 } 572 573 574 //===----------------------------------------------------------------------===// 575 // DependenceInfo methods 576 577 // For debugging purposes. Dumps a dependence to OS. 578 void Dependence::dump(raw_ostream &OS) const { 579 bool Splitable = false; 580 if (isConfused()) 581 OS << "confused"; 582 else { 583 if (isConsistent()) 584 OS << "consistent "; 585 if (isFlow()) 586 OS << "flow"; 587 else if (isOutput()) 588 OS << "output"; 589 else if (isAnti()) 590 OS << "anti"; 591 else if (isInput()) 592 OS << "input"; 593 unsigned Levels = getLevels(); 594 OS << " ["; 595 for (unsigned II = 1; II <= Levels; ++II) { 596 if (isSplitable(II)) 597 Splitable = true; 598 if (isPeelFirst(II)) 599 OS << 'p'; 600 const SCEV *Distance = getDistance(II); 601 if (Distance) 602 OS << *Distance; 603 else if (isScalar(II)) 604 OS << "S"; 605 else { 606 unsigned Direction = getDirection(II); 607 if (Direction == DVEntry::ALL) 608 OS << "*"; 609 else { 610 if (Direction & DVEntry::LT) 611 OS << "<"; 612 if (Direction & DVEntry::EQ) 613 OS << "="; 614 if (Direction & DVEntry::GT) 615 OS << ">"; 616 } 617 } 618 if (isPeelLast(II)) 619 OS << 'p'; 620 if (II < Levels) 621 OS << " "; 622 } 623 if (isLoopIndependent()) 624 OS << "|<"; 625 OS << "]"; 626 if (Splitable) 627 OS << " splitable"; 628 } 629 OS << "!\n"; 630 } 631 632 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 633 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 634 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 635 // Otherwise the underlying objects are checked to see if they point to 636 // different identifiable objects. 637 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 638 const DataLayout &DL, 639 const MemoryLocation &LocA, 640 const MemoryLocation &LocB) { 641 // Check the original locations (minus size) for noalias, which can happen for 642 // tbaa, incompatible underlying object locations, etc. 643 MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags); 644 MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags); 645 if (AA->alias(LocAS, LocBS) == NoAlias) 646 return NoAlias; 647 648 // Check the underlying objects are the same 649 const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); 650 const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); 651 652 // If the underlying objects are the same, they must alias 653 if (AObj == BObj) 654 return MustAlias; 655 656 // We may have hit the recursion limit for underlying objects, or have 657 // underlying objects where we don't know they will alias. 658 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 659 return MayAlias; 660 661 // Otherwise we know the objects are different and both identified objects so 662 // must not alias. 663 return NoAlias; 664 } 665 666 667 // Returns true if the load or store can be analyzed. Atomic and volatile 668 // operations have properties which this analysis does not understand. 669 static 670 bool isLoadOrStore(const Instruction *I) { 671 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 672 return LI->isUnordered(); 673 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 674 return SI->isUnordered(); 675 return false; 676 } 677 678 679 // Examines the loop nesting of the Src and Dst 680 // instructions and establishes their shared loops. Sets the variables 681 // CommonLevels, SrcLevels, and MaxLevels. 682 // The source and destination instructions needn't be contained in the same 683 // loop. The routine establishNestingLevels finds the level of most deeply 684 // nested loop that contains them both, CommonLevels. An instruction that's 685 // not contained in a loop is at level = 0. MaxLevels is equal to the level 686 // of the source plus the level of the destination, minus CommonLevels. 687 // This lets us allocate vectors MaxLevels in length, with room for every 688 // distinct loop referenced in both the source and destination subscripts. 689 // The variable SrcLevels is the nesting depth of the source instruction. 690 // It's used to help calculate distinct loops referenced by the destination. 691 // Here's the map from loops to levels: 692 // 0 - unused 693 // 1 - outermost common loop 694 // ... - other common loops 695 // CommonLevels - innermost common loop 696 // ... - loops containing Src but not Dst 697 // SrcLevels - innermost loop containing Src but not Dst 698 // ... - loops containing Dst but not Src 699 // MaxLevels - innermost loops containing Dst but not Src 700 // Consider the follow code fragment: 701 // for (a = ...) { 702 // for (b = ...) { 703 // for (c = ...) { 704 // for (d = ...) { 705 // A[] = ...; 706 // } 707 // } 708 // for (e = ...) { 709 // for (f = ...) { 710 // for (g = ...) { 711 // ... = A[]; 712 // } 713 // } 714 // } 715 // } 716 // } 717 // If we're looking at the possibility of a dependence between the store 718 // to A (the Src) and the load from A (the Dst), we'll note that they 719 // have 2 loops in common, so CommonLevels will equal 2 and the direction 720 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 721 // A map from loop names to loop numbers would look like 722 // a - 1 723 // b - 2 = CommonLevels 724 // c - 3 725 // d - 4 = SrcLevels 726 // e - 5 727 // f - 6 728 // g - 7 = MaxLevels 729 void DependenceInfo::establishNestingLevels(const Instruction *Src, 730 const Instruction *Dst) { 731 const BasicBlock *SrcBlock = Src->getParent(); 732 const BasicBlock *DstBlock = Dst->getParent(); 733 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 734 unsigned DstLevel = LI->getLoopDepth(DstBlock); 735 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 736 const Loop *DstLoop = LI->getLoopFor(DstBlock); 737 SrcLevels = SrcLevel; 738 MaxLevels = SrcLevel + DstLevel; 739 while (SrcLevel > DstLevel) { 740 SrcLoop = SrcLoop->getParentLoop(); 741 SrcLevel--; 742 } 743 while (DstLevel > SrcLevel) { 744 DstLoop = DstLoop->getParentLoop(); 745 DstLevel--; 746 } 747 while (SrcLoop != DstLoop) { 748 SrcLoop = SrcLoop->getParentLoop(); 749 DstLoop = DstLoop->getParentLoop(); 750 SrcLevel--; 751 } 752 CommonLevels = SrcLevel; 753 MaxLevels -= CommonLevels; 754 } 755 756 757 // Given one of the loops containing the source, return 758 // its level index in our numbering scheme. 759 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 760 return SrcLoop->getLoopDepth(); 761 } 762 763 764 // Given one of the loops containing the destination, 765 // return its level index in our numbering scheme. 766 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 767 unsigned D = DstLoop->getLoopDepth(); 768 if (D > CommonLevels) 769 return D - CommonLevels + SrcLevels; 770 else 771 return D; 772 } 773 774 775 // Returns true if Expression is loop invariant in LoopNest. 776 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 777 const Loop *LoopNest) const { 778 if (!LoopNest) 779 return true; 780 return SE->isLoopInvariant(Expression, LoopNest) && 781 isLoopInvariant(Expression, LoopNest->getParentLoop()); 782 } 783 784 785 786 // Finds the set of loops from the LoopNest that 787 // have a level <= CommonLevels and are referred to by the SCEV Expression. 788 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 789 const Loop *LoopNest, 790 SmallBitVector &Loops) const { 791 while (LoopNest) { 792 unsigned Level = LoopNest->getLoopDepth(); 793 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 794 Loops.set(Level); 795 LoopNest = LoopNest->getParentLoop(); 796 } 797 } 798 799 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 800 801 unsigned widestWidthSeen = 0; 802 Type *widestType; 803 804 // Go through each pair and find the widest bit to which we need 805 // to extend all of them. 806 for (Subscript *Pair : Pairs) { 807 const SCEV *Src = Pair->Src; 808 const SCEV *Dst = Pair->Dst; 809 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 810 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 811 if (SrcTy == nullptr || DstTy == nullptr) { 812 assert(SrcTy == DstTy && "This function only unify integer types and " 813 "expect Src and Dst share the same type " 814 "otherwise."); 815 continue; 816 } 817 if (SrcTy->getBitWidth() > widestWidthSeen) { 818 widestWidthSeen = SrcTy->getBitWidth(); 819 widestType = SrcTy; 820 } 821 if (DstTy->getBitWidth() > widestWidthSeen) { 822 widestWidthSeen = DstTy->getBitWidth(); 823 widestType = DstTy; 824 } 825 } 826 827 828 assert(widestWidthSeen > 0); 829 830 // Now extend each pair to the widest seen. 831 for (Subscript *Pair : Pairs) { 832 const SCEV *Src = Pair->Src; 833 const SCEV *Dst = Pair->Dst; 834 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 835 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 836 if (SrcTy == nullptr || DstTy == nullptr) { 837 assert(SrcTy == DstTy && "This function only unify integer types and " 838 "expect Src and Dst share the same type " 839 "otherwise."); 840 continue; 841 } 842 if (SrcTy->getBitWidth() < widestWidthSeen) 843 // Sign-extend Src to widestType 844 Pair->Src = SE->getSignExtendExpr(Src, widestType); 845 if (DstTy->getBitWidth() < widestWidthSeen) { 846 // Sign-extend Dst to widestType 847 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 848 } 849 } 850 } 851 852 // removeMatchingExtensions - Examines a subscript pair. 853 // If the source and destination are identically sign (or zero) 854 // extended, it strips off the extension in an effect to simplify 855 // the actual analysis. 856 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 857 const SCEV *Src = Pair->Src; 858 const SCEV *Dst = Pair->Dst; 859 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 860 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 861 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 862 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 863 const SCEV *SrcCastOp = SrcCast->getOperand(); 864 const SCEV *DstCastOp = DstCast->getOperand(); 865 if (SrcCastOp->getType() == DstCastOp->getType()) { 866 Pair->Src = SrcCastOp; 867 Pair->Dst = DstCastOp; 868 } 869 } 870 } 871 872 873 // Examine the scev and return true iff it's linear. 874 // Collect any loops mentioned in the set of "Loops". 875 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 876 SmallBitVector &Loops) { 877 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 878 if (!AddRec) 879 return isLoopInvariant(Src, LoopNest); 880 const SCEV *Start = AddRec->getStart(); 881 const SCEV *Step = AddRec->getStepRecurrence(*SE); 882 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 883 if (!isa<SCEVCouldNotCompute>(UB)) { 884 if (SE->getTypeSizeInBits(Start->getType()) < 885 SE->getTypeSizeInBits(UB->getType())) { 886 if (!AddRec->getNoWrapFlags()) 887 return false; 888 } 889 } 890 if (!isLoopInvariant(Step, LoopNest)) 891 return false; 892 Loops.set(mapSrcLoop(AddRec->getLoop())); 893 return checkSrcSubscript(Start, LoopNest, Loops); 894 } 895 896 897 898 // Examine the scev and return true iff it's linear. 899 // Collect any loops mentioned in the set of "Loops". 900 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 901 SmallBitVector &Loops) { 902 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 903 if (!AddRec) 904 return isLoopInvariant(Dst, LoopNest); 905 const SCEV *Start = AddRec->getStart(); 906 const SCEV *Step = AddRec->getStepRecurrence(*SE); 907 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 908 if (!isa<SCEVCouldNotCompute>(UB)) { 909 if (SE->getTypeSizeInBits(Start->getType()) < 910 SE->getTypeSizeInBits(UB->getType())) { 911 if (!AddRec->getNoWrapFlags()) 912 return false; 913 } 914 } 915 if (!isLoopInvariant(Step, LoopNest)) 916 return false; 917 Loops.set(mapDstLoop(AddRec->getLoop())); 918 return checkDstSubscript(Start, LoopNest, Loops); 919 } 920 921 922 // Examines the subscript pair (the Src and Dst SCEVs) 923 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 924 // Collects the associated loops in a set. 925 DependenceInfo::Subscript::ClassificationKind 926 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 927 const SCEV *Dst, const Loop *DstLoopNest, 928 SmallBitVector &Loops) { 929 SmallBitVector SrcLoops(MaxLevels + 1); 930 SmallBitVector DstLoops(MaxLevels + 1); 931 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 932 return Subscript::NonLinear; 933 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 934 return Subscript::NonLinear; 935 Loops = SrcLoops; 936 Loops |= DstLoops; 937 unsigned N = Loops.count(); 938 if (N == 0) 939 return Subscript::ZIV; 940 if (N == 1) 941 return Subscript::SIV; 942 if (N == 2 && (SrcLoops.count() == 0 || 943 DstLoops.count() == 0 || 944 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 945 return Subscript::RDIV; 946 return Subscript::MIV; 947 } 948 949 950 // A wrapper around SCEV::isKnownPredicate. 951 // Looks for cases where we're interested in comparing for equality. 952 // If both X and Y have been identically sign or zero extended, 953 // it strips off the (confusing) extensions before invoking 954 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 955 // will be similarly updated. 956 // 957 // If SCEV::isKnownPredicate can't prove the predicate, 958 // we try simple subtraction, which seems to help in some cases 959 // involving symbolics. 960 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 961 const SCEV *Y) const { 962 if (Pred == CmpInst::ICMP_EQ || 963 Pred == CmpInst::ICMP_NE) { 964 if ((isa<SCEVSignExtendExpr>(X) && 965 isa<SCEVSignExtendExpr>(Y)) || 966 (isa<SCEVZeroExtendExpr>(X) && 967 isa<SCEVZeroExtendExpr>(Y))) { 968 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 969 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 970 const SCEV *Xop = CX->getOperand(); 971 const SCEV *Yop = CY->getOperand(); 972 if (Xop->getType() == Yop->getType()) { 973 X = Xop; 974 Y = Yop; 975 } 976 } 977 } 978 if (SE->isKnownPredicate(Pred, X, Y)) 979 return true; 980 // If SE->isKnownPredicate can't prove the condition, 981 // we try the brute-force approach of subtracting 982 // and testing the difference. 983 // By testing with SE->isKnownPredicate first, we avoid 984 // the possibility of overflow when the arguments are constants. 985 const SCEV *Delta = SE->getMinusSCEV(X, Y); 986 switch (Pred) { 987 case CmpInst::ICMP_EQ: 988 return Delta->isZero(); 989 case CmpInst::ICMP_NE: 990 return SE->isKnownNonZero(Delta); 991 case CmpInst::ICMP_SGE: 992 return SE->isKnownNonNegative(Delta); 993 case CmpInst::ICMP_SLE: 994 return SE->isKnownNonPositive(Delta); 995 case CmpInst::ICMP_SGT: 996 return SE->isKnownPositive(Delta); 997 case CmpInst::ICMP_SLT: 998 return SE->isKnownNegative(Delta); 999 default: 1000 llvm_unreachable("unexpected predicate in isKnownPredicate"); 1001 } 1002 } 1003 1004 /// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) 1005 /// with some extra checking if S is an AddRec and we can prove less-than using 1006 /// the loop bounds. 1007 bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { 1008 // First unify to the same type 1009 auto *SType = dyn_cast<IntegerType>(S->getType()); 1010 auto *SizeType = dyn_cast<IntegerType>(Size->getType()); 1011 if (!SType || !SizeType) 1012 return false; 1013 Type *MaxType = 1014 (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; 1015 S = SE->getTruncateOrZeroExtend(S, MaxType); 1016 Size = SE->getTruncateOrZeroExtend(Size, MaxType); 1017 1018 // Special check for addrecs using BE taken count 1019 const SCEV *Bound = SE->getMinusSCEV(S, Size); 1020 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Bound)) { 1021 if (AddRec->isAffine()) { 1022 const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); 1023 if (!isa<SCEVCouldNotCompute>(BECount)) { 1024 const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); 1025 if (SE->isKnownNegative(Limit)) 1026 return true; 1027 } 1028 } 1029 } 1030 1031 // Check using normal isKnownNegative 1032 const SCEV *LimitedBound = 1033 SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); 1034 return SE->isKnownNegative(LimitedBound); 1035 } 1036 1037 bool DependenceInfo::isKnownNonNegative(const SCEV *S, const Value *Ptr) const { 1038 bool Inbounds = false; 1039 if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(Ptr)) 1040 Inbounds = SrcGEP->isInBounds(); 1041 if (Inbounds) { 1042 if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 1043 if (AddRec->isAffine()) { 1044 // We know S is for Ptr, the operand on a load/store, so doesn't wrap. 1045 // If both parts are NonNegative, the end result will be NonNegative 1046 if (SE->isKnownNonNegative(AddRec->getStart()) && 1047 SE->isKnownNonNegative(AddRec->getOperand(1))) 1048 return true; 1049 } 1050 } 1051 } 1052 1053 return SE->isKnownNonNegative(S); 1054 } 1055 1056 // All subscripts are all the same type. 1057 // Loop bound may be smaller (e.g., a char). 1058 // Should zero extend loop bound, since it's always >= 0. 1059 // This routine collects upper bound and extends or truncates if needed. 1060 // Truncating is safe when subscripts are known not to wrap. Cases without 1061 // nowrap flags should have been rejected earlier. 1062 // Return null if no bound available. 1063 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1064 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1065 const SCEV *UB = SE->getBackedgeTakenCount(L); 1066 return SE->getTruncateOrZeroExtend(UB, T); 1067 } 1068 return nullptr; 1069 } 1070 1071 1072 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1073 // If the cast fails, returns NULL. 1074 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1075 Type *T) const { 1076 if (const SCEV *UB = collectUpperBound(L, T)) 1077 return dyn_cast<SCEVConstant>(UB); 1078 return nullptr; 1079 } 1080 1081 1082 // testZIV - 1083 // When we have a pair of subscripts of the form [c1] and [c2], 1084 // where c1 and c2 are both loop invariant, we attack it using 1085 // the ZIV test. Basically, we test by comparing the two values, 1086 // but there are actually three possible results: 1087 // 1) the values are equal, so there's a dependence 1088 // 2) the values are different, so there's no dependence 1089 // 3) the values might be equal, so we have to assume a dependence. 1090 // 1091 // Return true if dependence disproved. 1092 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1093 FullDependence &Result) const { 1094 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1095 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1096 ++ZIVapplications; 1097 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1098 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1099 return false; // provably dependent 1100 } 1101 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1102 LLVM_DEBUG(dbgs() << " provably independent\n"); 1103 ++ZIVindependence; 1104 return true; // provably independent 1105 } 1106 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1107 Result.Consistent = false; 1108 return false; // possibly dependent 1109 } 1110 1111 1112 // strongSIVtest - 1113 // From the paper, Practical Dependence Testing, Section 4.2.1 1114 // 1115 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1116 // where i is an induction variable, c1 and c2 are loop invariant, 1117 // and a is a constant, we can solve it exactly using the Strong SIV test. 1118 // 1119 // Can prove independence. Failing that, can compute distance (and direction). 1120 // In the presence of symbolic terms, we can sometimes make progress. 1121 // 1122 // If there's a dependence, 1123 // 1124 // c1 + a*i = c2 + a*i' 1125 // 1126 // The dependence distance is 1127 // 1128 // d = i' - i = (c1 - c2)/a 1129 // 1130 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1131 // loop's upper bound. If a dependence exists, the dependence direction is 1132 // defined as 1133 // 1134 // { < if d > 0 1135 // direction = { = if d = 0 1136 // { > if d < 0 1137 // 1138 // Return true if dependence disproved. 1139 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1140 const SCEV *DstConst, const Loop *CurLoop, 1141 unsigned Level, FullDependence &Result, 1142 Constraint &NewConstraint) const { 1143 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1144 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1145 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1146 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1147 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1148 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1149 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1150 ++StrongSIVapplications; 1151 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1152 Level--; 1153 1154 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1155 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1156 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1157 1158 // check that |Delta| < iteration count 1159 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1160 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1161 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1162 const SCEV *AbsDelta = 1163 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1164 const SCEV *AbsCoeff = 1165 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1166 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1167 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1168 // Distance greater than trip count - no dependence 1169 ++StrongSIVindependence; 1170 ++StrongSIVsuccesses; 1171 return true; 1172 } 1173 } 1174 1175 // Can we compute distance? 1176 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1177 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1178 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1179 APInt Distance = ConstDelta; // these need to be initialized 1180 APInt Remainder = ConstDelta; 1181 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1182 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1183 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1184 // Make sure Coeff divides Delta exactly 1185 if (Remainder != 0) { 1186 // Coeff doesn't divide Distance, no dependence 1187 ++StrongSIVindependence; 1188 ++StrongSIVsuccesses; 1189 return true; 1190 } 1191 Result.DV[Level].Distance = SE->getConstant(Distance); 1192 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1193 if (Distance.sgt(0)) 1194 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1195 else if (Distance.slt(0)) 1196 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1197 else 1198 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1199 ++StrongSIVsuccesses; 1200 } 1201 else if (Delta->isZero()) { 1202 // since 0/X == 0 1203 Result.DV[Level].Distance = Delta; 1204 NewConstraint.setDistance(Delta, CurLoop); 1205 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1206 ++StrongSIVsuccesses; 1207 } 1208 else { 1209 if (Coeff->isOne()) { 1210 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1211 Result.DV[Level].Distance = Delta; // since X/1 == X 1212 NewConstraint.setDistance(Delta, CurLoop); 1213 } 1214 else { 1215 Result.Consistent = false; 1216 NewConstraint.setLine(Coeff, 1217 SE->getNegativeSCEV(Coeff), 1218 SE->getNegativeSCEV(Delta), CurLoop); 1219 } 1220 1221 // maybe we can get a useful direction 1222 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1223 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1224 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1225 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1226 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1227 // The double negatives above are confusing. 1228 // It helps to read !SE->isKnownNonZero(Delta) 1229 // as "Delta might be Zero" 1230 unsigned NewDirection = Dependence::DVEntry::NONE; 1231 if ((DeltaMaybePositive && CoeffMaybePositive) || 1232 (DeltaMaybeNegative && CoeffMaybeNegative)) 1233 NewDirection = Dependence::DVEntry::LT; 1234 if (DeltaMaybeZero) 1235 NewDirection |= Dependence::DVEntry::EQ; 1236 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1237 (DeltaMaybePositive && CoeffMaybeNegative)) 1238 NewDirection |= Dependence::DVEntry::GT; 1239 if (NewDirection < Result.DV[Level].Direction) 1240 ++StrongSIVsuccesses; 1241 Result.DV[Level].Direction &= NewDirection; 1242 } 1243 return false; 1244 } 1245 1246 1247 // weakCrossingSIVtest - 1248 // From the paper, Practical Dependence Testing, Section 4.2.2 1249 // 1250 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1251 // where i is an induction variable, c1 and c2 are loop invariant, 1252 // and a is a constant, we can solve it exactly using the 1253 // Weak-Crossing SIV test. 1254 // 1255 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1256 // the two lines, where i = i', yielding 1257 // 1258 // c1 + a*i = c2 - a*i 1259 // 2a*i = c2 - c1 1260 // i = (c2 - c1)/2a 1261 // 1262 // If i < 0, there is no dependence. 1263 // If i > upperbound, there is no dependence. 1264 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1265 // If i = upperbound, there's a dependence with distance = 0. 1266 // If i is integral, there's a dependence (all directions). 1267 // If the non-integer part = 1/2, there's a dependence (<> directions). 1268 // Otherwise, there's no dependence. 1269 // 1270 // Can prove independence. Failing that, 1271 // can sometimes refine the directions. 1272 // Can determine iteration for splitting. 1273 // 1274 // Return true if dependence disproved. 1275 bool DependenceInfo::weakCrossingSIVtest( 1276 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1277 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1278 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1279 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1280 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1281 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1282 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1283 ++WeakCrossingSIVapplications; 1284 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1285 Level--; 1286 Result.Consistent = false; 1287 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1288 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1289 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1290 if (Delta->isZero()) { 1291 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1292 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1293 ++WeakCrossingSIVsuccesses; 1294 if (!Result.DV[Level].Direction) { 1295 ++WeakCrossingSIVindependence; 1296 return true; 1297 } 1298 Result.DV[Level].Distance = Delta; // = 0 1299 return false; 1300 } 1301 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1302 if (!ConstCoeff) 1303 return false; 1304 1305 Result.DV[Level].Splitable = true; 1306 if (SE->isKnownNegative(ConstCoeff)) { 1307 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1308 assert(ConstCoeff && 1309 "dynamic cast of negative of ConstCoeff should yield constant"); 1310 Delta = SE->getNegativeSCEV(Delta); 1311 } 1312 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1313 1314 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1315 SplitIter = SE->getUDivExpr( 1316 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1317 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1318 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1319 1320 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1321 if (!ConstDelta) 1322 return false; 1323 1324 // We're certain that ConstCoeff > 0; therefore, 1325 // if Delta < 0, then no dependence. 1326 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1327 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1328 if (SE->isKnownNegative(Delta)) { 1329 // No dependence, Delta < 0 1330 ++WeakCrossingSIVindependence; 1331 ++WeakCrossingSIVsuccesses; 1332 return true; 1333 } 1334 1335 // We're certain that Delta > 0 and ConstCoeff > 0. 1336 // Check Delta/(2*ConstCoeff) against upper loop bound 1337 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1338 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1339 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1340 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1341 ConstantTwo); 1342 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1343 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1344 // Delta too big, no dependence 1345 ++WeakCrossingSIVindependence; 1346 ++WeakCrossingSIVsuccesses; 1347 return true; 1348 } 1349 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1350 // i = i' = UB 1351 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1352 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1353 ++WeakCrossingSIVsuccesses; 1354 if (!Result.DV[Level].Direction) { 1355 ++WeakCrossingSIVindependence; 1356 return true; 1357 } 1358 Result.DV[Level].Splitable = false; 1359 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1360 return false; 1361 } 1362 } 1363 1364 // check that Coeff divides Delta 1365 APInt APDelta = ConstDelta->getAPInt(); 1366 APInt APCoeff = ConstCoeff->getAPInt(); 1367 APInt Distance = APDelta; // these need to be initialzed 1368 APInt Remainder = APDelta; 1369 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1370 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1371 if (Remainder != 0) { 1372 // Coeff doesn't divide Delta, no dependence 1373 ++WeakCrossingSIVindependence; 1374 ++WeakCrossingSIVsuccesses; 1375 return true; 1376 } 1377 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1378 1379 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1380 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1381 Remainder = Distance.srem(Two); 1382 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1383 if (Remainder != 0) { 1384 // Equal direction isn't possible 1385 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1386 ++WeakCrossingSIVsuccesses; 1387 } 1388 return false; 1389 } 1390 1391 1392 // Kirch's algorithm, from 1393 // 1394 // Optimizing Supercompilers for Supercomputers 1395 // Michael Wolfe 1396 // MIT Press, 1989 1397 // 1398 // Program 2.1, page 29. 1399 // Computes the GCD of AM and BM. 1400 // Also finds a solution to the equation ax - by = gcd(a, b). 1401 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1402 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1403 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1404 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1405 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1406 APInt G0 = AM.abs(); 1407 APInt G1 = BM.abs(); 1408 APInt Q = G0; // these need to be initialized 1409 APInt R = G0; 1410 APInt::sdivrem(G0, G1, Q, R); 1411 while (R != 0) { 1412 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1413 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1414 G0 = G1; G1 = R; 1415 APInt::sdivrem(G0, G1, Q, R); 1416 } 1417 G = G1; 1418 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1419 X = AM.slt(0) ? -A1 : A1; 1420 Y = BM.slt(0) ? B1 : -B1; 1421 1422 // make sure gcd divides Delta 1423 R = Delta.srem(G); 1424 if (R != 0) 1425 return true; // gcd doesn't divide Delta, no dependence 1426 Q = Delta.sdiv(G); 1427 X *= Q; 1428 Y *= Q; 1429 return false; 1430 } 1431 1432 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1433 APInt Q = A; // these need to be initialized 1434 APInt R = A; 1435 APInt::sdivrem(A, B, Q, R); 1436 if (R == 0) 1437 return Q; 1438 if ((A.sgt(0) && B.sgt(0)) || 1439 (A.slt(0) && B.slt(0))) 1440 return Q; 1441 else 1442 return Q - 1; 1443 } 1444 1445 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1446 APInt Q = A; // these need to be initialized 1447 APInt R = A; 1448 APInt::sdivrem(A, B, Q, R); 1449 if (R == 0) 1450 return Q; 1451 if ((A.sgt(0) && B.sgt(0)) || 1452 (A.slt(0) && B.slt(0))) 1453 return Q + 1; 1454 else 1455 return Q; 1456 } 1457 1458 1459 static 1460 APInt maxAPInt(APInt A, APInt B) { 1461 return A.sgt(B) ? A : B; 1462 } 1463 1464 1465 static 1466 APInt minAPInt(APInt A, APInt B) { 1467 return A.slt(B) ? A : B; 1468 } 1469 1470 1471 // exactSIVtest - 1472 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1473 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1474 // and a2 are constant, we can solve it exactly using an algorithm developed 1475 // by Banerjee and Wolfe. See Section 2.5.3 in 1476 // 1477 // Optimizing Supercompilers for Supercomputers 1478 // Michael Wolfe 1479 // MIT Press, 1989 1480 // 1481 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1482 // so use them if possible. They're also a bit better with symbolics and, 1483 // in the case of the strong SIV test, can compute Distances. 1484 // 1485 // Return true if dependence disproved. 1486 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1487 const SCEV *SrcConst, const SCEV *DstConst, 1488 const Loop *CurLoop, unsigned Level, 1489 FullDependence &Result, 1490 Constraint &NewConstraint) const { 1491 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1492 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1493 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1494 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1495 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1496 ++ExactSIVapplications; 1497 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1498 Level--; 1499 Result.Consistent = false; 1500 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1501 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1502 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1503 Delta, CurLoop); 1504 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1505 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1506 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1507 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1508 return false; 1509 1510 // find gcd 1511 APInt G, X, Y; 1512 APInt AM = ConstSrcCoeff->getAPInt(); 1513 APInt BM = ConstDstCoeff->getAPInt(); 1514 unsigned Bits = AM.getBitWidth(); 1515 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1516 // gcd doesn't divide Delta, no dependence 1517 ++ExactSIVindependence; 1518 ++ExactSIVsuccesses; 1519 return true; 1520 } 1521 1522 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1523 1524 // since SCEV construction normalizes, LM = 0 1525 APInt UM(Bits, 1, true); 1526 bool UMvalid = false; 1527 // UM is perhaps unavailable, let's check 1528 if (const SCEVConstant *CUB = 1529 collectConstantUpperBound(CurLoop, Delta->getType())) { 1530 UM = CUB->getAPInt(); 1531 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1532 UMvalid = true; 1533 } 1534 1535 APInt TU(APInt::getSignedMaxValue(Bits)); 1536 APInt TL(APInt::getSignedMinValue(Bits)); 1537 1538 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1539 APInt TMUL = BM.sdiv(G); 1540 if (TMUL.sgt(0)) { 1541 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1542 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1543 if (UMvalid) { 1544 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1545 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1546 } 1547 } 1548 else { 1549 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1550 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1551 if (UMvalid) { 1552 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1553 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1554 } 1555 } 1556 1557 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1558 TMUL = AM.sdiv(G); 1559 if (TMUL.sgt(0)) { 1560 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1561 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1562 if (UMvalid) { 1563 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1564 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1565 } 1566 } 1567 else { 1568 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1569 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1570 if (UMvalid) { 1571 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1572 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1573 } 1574 } 1575 if (TL.sgt(TU)) { 1576 ++ExactSIVindependence; 1577 ++ExactSIVsuccesses; 1578 return true; 1579 } 1580 1581 // explore directions 1582 unsigned NewDirection = Dependence::DVEntry::NONE; 1583 1584 // less than 1585 APInt SaveTU(TU); // save these 1586 APInt SaveTL(TL); 1587 LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); 1588 TMUL = AM - BM; 1589 if (TMUL.sgt(0)) { 1590 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1591 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1592 } 1593 else { 1594 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1595 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1596 } 1597 if (TL.sle(TU)) { 1598 NewDirection |= Dependence::DVEntry::LT; 1599 ++ExactSIVsuccesses; 1600 } 1601 1602 // equal 1603 TU = SaveTU; // restore 1604 TL = SaveTL; 1605 LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); 1606 if (TMUL.sgt(0)) { 1607 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1608 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1609 } 1610 else { 1611 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1612 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1613 } 1614 TMUL = BM - AM; 1615 if (TMUL.sgt(0)) { 1616 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1617 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1618 } 1619 else { 1620 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1621 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1622 } 1623 if (TL.sle(TU)) { 1624 NewDirection |= Dependence::DVEntry::EQ; 1625 ++ExactSIVsuccesses; 1626 } 1627 1628 // greater than 1629 TU = SaveTU; // restore 1630 TL = SaveTL; 1631 LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); 1632 if (TMUL.sgt(0)) { 1633 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1634 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1635 } 1636 else { 1637 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1638 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1639 } 1640 if (TL.sle(TU)) { 1641 NewDirection |= Dependence::DVEntry::GT; 1642 ++ExactSIVsuccesses; 1643 } 1644 1645 // finished 1646 Result.DV[Level].Direction &= NewDirection; 1647 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1648 ++ExactSIVindependence; 1649 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1650 } 1651 1652 1653 1654 // Return true if the divisor evenly divides the dividend. 1655 static 1656 bool isRemainderZero(const SCEVConstant *Dividend, 1657 const SCEVConstant *Divisor) { 1658 const APInt &ConstDividend = Dividend->getAPInt(); 1659 const APInt &ConstDivisor = Divisor->getAPInt(); 1660 return ConstDividend.srem(ConstDivisor) == 0; 1661 } 1662 1663 1664 // weakZeroSrcSIVtest - 1665 // From the paper, Practical Dependence Testing, Section 4.2.2 1666 // 1667 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1668 // where i is an induction variable, c1 and c2 are loop invariant, 1669 // and a is a constant, we can solve it exactly using the 1670 // Weak-Zero SIV test. 1671 // 1672 // Given 1673 // 1674 // c1 = c2 + a*i 1675 // 1676 // we get 1677 // 1678 // (c1 - c2)/a = i 1679 // 1680 // If i is not an integer, there's no dependence. 1681 // If i < 0 or > UB, there's no dependence. 1682 // If i = 0, the direction is >= and peeling the 1683 // 1st iteration will break the dependence. 1684 // If i = UB, the direction is <= and peeling the 1685 // last iteration will break the dependence. 1686 // Otherwise, the direction is *. 1687 // 1688 // Can prove independence. Failing that, we can sometimes refine 1689 // the directions. Can sometimes show that first or last 1690 // iteration carries all the dependences (so worth peeling). 1691 // 1692 // (see also weakZeroDstSIVtest) 1693 // 1694 // Return true if dependence disproved. 1695 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1696 const SCEV *SrcConst, 1697 const SCEV *DstConst, 1698 const Loop *CurLoop, unsigned Level, 1699 FullDependence &Result, 1700 Constraint &NewConstraint) const { 1701 // For the WeakSIV test, it's possible the loop isn't common to 1702 // the Src and Dst loops. If it isn't, then there's no need to 1703 // record a direction. 1704 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1705 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1706 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1707 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1708 ++WeakZeroSIVapplications; 1709 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1710 Level--; 1711 Result.Consistent = false; 1712 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1713 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1714 CurLoop); 1715 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1716 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1717 if (Level < CommonLevels) { 1718 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1719 Result.DV[Level].PeelFirst = true; 1720 ++WeakZeroSIVsuccesses; 1721 } 1722 return false; // dependences caused by first iteration 1723 } 1724 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1725 if (!ConstCoeff) 1726 return false; 1727 const SCEV *AbsCoeff = 1728 SE->isKnownNegative(ConstCoeff) ? 1729 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1730 const SCEV *NewDelta = 1731 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1732 1733 // check that Delta/SrcCoeff < iteration count 1734 // really check NewDelta < count*AbsCoeff 1735 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1736 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1737 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1738 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1739 ++WeakZeroSIVindependence; 1740 ++WeakZeroSIVsuccesses; 1741 return true; 1742 } 1743 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1744 // dependences caused by last iteration 1745 if (Level < CommonLevels) { 1746 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1747 Result.DV[Level].PeelLast = true; 1748 ++WeakZeroSIVsuccesses; 1749 } 1750 return false; 1751 } 1752 } 1753 1754 // check that Delta/SrcCoeff >= 0 1755 // really check that NewDelta >= 0 1756 if (SE->isKnownNegative(NewDelta)) { 1757 // No dependence, newDelta < 0 1758 ++WeakZeroSIVindependence; 1759 ++WeakZeroSIVsuccesses; 1760 return true; 1761 } 1762 1763 // if SrcCoeff doesn't divide Delta, then no dependence 1764 if (isa<SCEVConstant>(Delta) && 1765 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1766 ++WeakZeroSIVindependence; 1767 ++WeakZeroSIVsuccesses; 1768 return true; 1769 } 1770 return false; 1771 } 1772 1773 1774 // weakZeroDstSIVtest - 1775 // From the paper, Practical Dependence Testing, Section 4.2.2 1776 // 1777 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1778 // where i is an induction variable, c1 and c2 are loop invariant, 1779 // and a is a constant, we can solve it exactly using the 1780 // Weak-Zero SIV test. 1781 // 1782 // Given 1783 // 1784 // c1 + a*i = c2 1785 // 1786 // we get 1787 // 1788 // i = (c2 - c1)/a 1789 // 1790 // If i is not an integer, there's no dependence. 1791 // If i < 0 or > UB, there's no dependence. 1792 // If i = 0, the direction is <= and peeling the 1793 // 1st iteration will break the dependence. 1794 // If i = UB, the direction is >= and peeling the 1795 // last iteration will break the dependence. 1796 // Otherwise, the direction is *. 1797 // 1798 // Can prove independence. Failing that, we can sometimes refine 1799 // the directions. Can sometimes show that first or last 1800 // iteration carries all the dependences (so worth peeling). 1801 // 1802 // (see also weakZeroSrcSIVtest) 1803 // 1804 // Return true if dependence disproved. 1805 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1806 const SCEV *SrcConst, 1807 const SCEV *DstConst, 1808 const Loop *CurLoop, unsigned Level, 1809 FullDependence &Result, 1810 Constraint &NewConstraint) const { 1811 // For the WeakSIV test, it's possible the loop isn't common to the 1812 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1813 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1814 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1815 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1816 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1817 ++WeakZeroSIVapplications; 1818 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1819 Level--; 1820 Result.Consistent = false; 1821 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1822 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1823 CurLoop); 1824 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1825 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1826 if (Level < CommonLevels) { 1827 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1828 Result.DV[Level].PeelFirst = true; 1829 ++WeakZeroSIVsuccesses; 1830 } 1831 return false; // dependences caused by first iteration 1832 } 1833 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1834 if (!ConstCoeff) 1835 return false; 1836 const SCEV *AbsCoeff = 1837 SE->isKnownNegative(ConstCoeff) ? 1838 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1839 const SCEV *NewDelta = 1840 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1841 1842 // check that Delta/SrcCoeff < iteration count 1843 // really check NewDelta < count*AbsCoeff 1844 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1845 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1846 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1847 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1848 ++WeakZeroSIVindependence; 1849 ++WeakZeroSIVsuccesses; 1850 return true; 1851 } 1852 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1853 // dependences caused by last iteration 1854 if (Level < CommonLevels) { 1855 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1856 Result.DV[Level].PeelLast = true; 1857 ++WeakZeroSIVsuccesses; 1858 } 1859 return false; 1860 } 1861 } 1862 1863 // check that Delta/SrcCoeff >= 0 1864 // really check that NewDelta >= 0 1865 if (SE->isKnownNegative(NewDelta)) { 1866 // No dependence, newDelta < 0 1867 ++WeakZeroSIVindependence; 1868 ++WeakZeroSIVsuccesses; 1869 return true; 1870 } 1871 1872 // if SrcCoeff doesn't divide Delta, then no dependence 1873 if (isa<SCEVConstant>(Delta) && 1874 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1875 ++WeakZeroSIVindependence; 1876 ++WeakZeroSIVsuccesses; 1877 return true; 1878 } 1879 return false; 1880 } 1881 1882 1883 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1884 // Things of the form [c1 + a*i] and [c2 + b*j], 1885 // where i and j are induction variable, c1 and c2 are loop invariant, 1886 // and a and b are constants. 1887 // Returns true if any possible dependence is disproved. 1888 // Marks the result as inconsistent. 1889 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1890 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1891 const SCEV *SrcConst, const SCEV *DstConst, 1892 const Loop *SrcLoop, const Loop *DstLoop, 1893 FullDependence &Result) const { 1894 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1895 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1896 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1897 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1898 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1899 ++ExactRDIVapplications; 1900 Result.Consistent = false; 1901 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1902 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1903 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1904 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1905 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1906 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1907 return false; 1908 1909 // find gcd 1910 APInt G, X, Y; 1911 APInt AM = ConstSrcCoeff->getAPInt(); 1912 APInt BM = ConstDstCoeff->getAPInt(); 1913 unsigned Bits = AM.getBitWidth(); 1914 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1915 // gcd doesn't divide Delta, no dependence 1916 ++ExactRDIVindependence; 1917 return true; 1918 } 1919 1920 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1921 1922 // since SCEV construction seems to normalize, LM = 0 1923 APInt SrcUM(Bits, 1, true); 1924 bool SrcUMvalid = false; 1925 // SrcUM is perhaps unavailable, let's check 1926 if (const SCEVConstant *UpperBound = 1927 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1928 SrcUM = UpperBound->getAPInt(); 1929 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1930 SrcUMvalid = true; 1931 } 1932 1933 APInt DstUM(Bits, 1, true); 1934 bool DstUMvalid = false; 1935 // UM is perhaps unavailable, let's check 1936 if (const SCEVConstant *UpperBound = 1937 collectConstantUpperBound(DstLoop, Delta->getType())) { 1938 DstUM = UpperBound->getAPInt(); 1939 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1940 DstUMvalid = true; 1941 } 1942 1943 APInt TU(APInt::getSignedMaxValue(Bits)); 1944 APInt TL(APInt::getSignedMinValue(Bits)); 1945 1946 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1947 APInt TMUL = BM.sdiv(G); 1948 if (TMUL.sgt(0)) { 1949 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1950 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1951 if (SrcUMvalid) { 1952 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1953 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1954 } 1955 } 1956 else { 1957 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1958 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1959 if (SrcUMvalid) { 1960 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1961 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1962 } 1963 } 1964 1965 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1966 TMUL = AM.sdiv(G); 1967 if (TMUL.sgt(0)) { 1968 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1969 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1970 if (DstUMvalid) { 1971 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1972 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1973 } 1974 } 1975 else { 1976 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1977 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1978 if (DstUMvalid) { 1979 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1980 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1981 } 1982 } 1983 if (TL.sgt(TU)) 1984 ++ExactRDIVindependence; 1985 return TL.sgt(TU); 1986 } 1987 1988 1989 // symbolicRDIVtest - 1990 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1991 // introduce a special case of Banerjee's Inequalities (also called the 1992 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1993 // particularly cases with symbolics. Since it's only able to disprove 1994 // dependence (not compute distances or directions), we'll use it as a 1995 // fall back for the other tests. 1996 // 1997 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1998 // where i and j are induction variables and c1 and c2 are loop invariants, 1999 // we can use the symbolic tests to disprove some dependences, serving as a 2000 // backup for the RDIV test. Note that i and j can be the same variable, 2001 // letting this test serve as a backup for the various SIV tests. 2002 // 2003 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 2004 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 2005 // loop bounds for the i and j loops, respectively. So, ... 2006 // 2007 // c1 + a1*i = c2 + a2*j 2008 // a1*i - a2*j = c2 - c1 2009 // 2010 // To test for a dependence, we compute c2 - c1 and make sure it's in the 2011 // range of the maximum and minimum possible values of a1*i - a2*j. 2012 // Considering the signs of a1 and a2, we have 4 possible cases: 2013 // 2014 // 1) If a1 >= 0 and a2 >= 0, then 2015 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 2016 // -a2*N2 <= c2 - c1 <= a1*N1 2017 // 2018 // 2) If a1 >= 0 and a2 <= 0, then 2019 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 2020 // 0 <= c2 - c1 <= a1*N1 - a2*N2 2021 // 2022 // 3) If a1 <= 0 and a2 >= 0, then 2023 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 2024 // a1*N1 - a2*N2 <= c2 - c1 <= 0 2025 // 2026 // 4) If a1 <= 0 and a2 <= 0, then 2027 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 2028 // a1*N1 <= c2 - c1 <= -a2*N2 2029 // 2030 // return true if dependence disproved 2031 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 2032 const SCEV *C1, const SCEV *C2, 2033 const Loop *Loop1, 2034 const Loop *Loop2) const { 2035 ++SymbolicRDIVapplications; 2036 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 2037 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 2038 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 2039 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 2040 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 2041 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 2042 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 2043 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 2044 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 2045 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 2046 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 2047 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 2048 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 2049 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 2050 if (SE->isKnownNonNegative(A1)) { 2051 if (SE->isKnownNonNegative(A2)) { 2052 // A1 >= 0 && A2 >= 0 2053 if (N1) { 2054 // make sure that c2 - c1 <= a1*N1 2055 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2056 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2057 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2058 ++SymbolicRDIVindependence; 2059 return true; 2060 } 2061 } 2062 if (N2) { 2063 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2064 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2065 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2066 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2067 ++SymbolicRDIVindependence; 2068 return true; 2069 } 2070 } 2071 } 2072 else if (SE->isKnownNonPositive(A2)) { 2073 // a1 >= 0 && a2 <= 0 2074 if (N1 && N2) { 2075 // make sure that c2 - c1 <= a1*N1 - a2*N2 2076 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2077 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2078 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2079 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2080 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2081 ++SymbolicRDIVindependence; 2082 return true; 2083 } 2084 } 2085 // make sure that 0 <= c2 - c1 2086 if (SE->isKnownNegative(C2_C1)) { 2087 ++SymbolicRDIVindependence; 2088 return true; 2089 } 2090 } 2091 } 2092 else if (SE->isKnownNonPositive(A1)) { 2093 if (SE->isKnownNonNegative(A2)) { 2094 // a1 <= 0 && a2 >= 0 2095 if (N1 && N2) { 2096 // make sure that a1*N1 - a2*N2 <= c2 - c1 2097 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2098 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2099 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2100 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2101 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2102 ++SymbolicRDIVindependence; 2103 return true; 2104 } 2105 } 2106 // make sure that c2 - c1 <= 0 2107 if (SE->isKnownPositive(C2_C1)) { 2108 ++SymbolicRDIVindependence; 2109 return true; 2110 } 2111 } 2112 else if (SE->isKnownNonPositive(A2)) { 2113 // a1 <= 0 && a2 <= 0 2114 if (N1) { 2115 // make sure that a1*N1 <= c2 - c1 2116 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2117 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2118 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2119 ++SymbolicRDIVindependence; 2120 return true; 2121 } 2122 } 2123 if (N2) { 2124 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2125 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2126 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2127 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2128 ++SymbolicRDIVindependence; 2129 return true; 2130 } 2131 } 2132 } 2133 } 2134 return false; 2135 } 2136 2137 2138 // testSIV - 2139 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2140 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2141 // a2 are constant, we attack it with an SIV test. While they can all be 2142 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2143 // they apply; they're cheaper and sometimes more precise. 2144 // 2145 // Return true if dependence disproved. 2146 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2147 FullDependence &Result, Constraint &NewConstraint, 2148 const SCEV *&SplitIter) const { 2149 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2150 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2151 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2152 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2153 if (SrcAddRec && DstAddRec) { 2154 const SCEV *SrcConst = SrcAddRec->getStart(); 2155 const SCEV *DstConst = DstAddRec->getStart(); 2156 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2157 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2158 const Loop *CurLoop = SrcAddRec->getLoop(); 2159 assert(CurLoop == DstAddRec->getLoop() && 2160 "both loops in SIV should be same"); 2161 Level = mapSrcLoop(CurLoop); 2162 bool disproven; 2163 if (SrcCoeff == DstCoeff) 2164 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2165 Level, Result, NewConstraint); 2166 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2167 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2168 Level, Result, NewConstraint, SplitIter); 2169 else 2170 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2171 Level, Result, NewConstraint); 2172 return disproven || 2173 gcdMIVtest(Src, Dst, Result) || 2174 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2175 } 2176 if (SrcAddRec) { 2177 const SCEV *SrcConst = SrcAddRec->getStart(); 2178 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2179 const SCEV *DstConst = Dst; 2180 const Loop *CurLoop = SrcAddRec->getLoop(); 2181 Level = mapSrcLoop(CurLoop); 2182 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2183 Level, Result, NewConstraint) || 2184 gcdMIVtest(Src, Dst, Result); 2185 } 2186 if (DstAddRec) { 2187 const SCEV *DstConst = DstAddRec->getStart(); 2188 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2189 const SCEV *SrcConst = Src; 2190 const Loop *CurLoop = DstAddRec->getLoop(); 2191 Level = mapDstLoop(CurLoop); 2192 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2193 CurLoop, Level, Result, NewConstraint) || 2194 gcdMIVtest(Src, Dst, Result); 2195 } 2196 llvm_unreachable("SIV test expected at least one AddRec"); 2197 return false; 2198 } 2199 2200 2201 // testRDIV - 2202 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2203 // where i and j are induction variables, c1 and c2 are loop invariant, 2204 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2205 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2206 // It doesn't make sense to talk about distance or direction in this case, 2207 // so there's no point in making special versions of the Strong SIV test or 2208 // the Weak-crossing SIV test. 2209 // 2210 // With minor algebra, this test can also be used for things like 2211 // [c1 + a1*i + a2*j][c2]. 2212 // 2213 // Return true if dependence disproved. 2214 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2215 FullDependence &Result) const { 2216 // we have 3 possible situations here: 2217 // 1) [a*i + b] and [c*j + d] 2218 // 2) [a*i + c*j + b] and [d] 2219 // 3) [b] and [a*i + c*j + d] 2220 // We need to find what we've got and get organized 2221 2222 const SCEV *SrcConst, *DstConst; 2223 const SCEV *SrcCoeff, *DstCoeff; 2224 const Loop *SrcLoop, *DstLoop; 2225 2226 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2227 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2228 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2229 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2230 if (SrcAddRec && DstAddRec) { 2231 SrcConst = SrcAddRec->getStart(); 2232 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2233 SrcLoop = SrcAddRec->getLoop(); 2234 DstConst = DstAddRec->getStart(); 2235 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2236 DstLoop = DstAddRec->getLoop(); 2237 } 2238 else if (SrcAddRec) { 2239 if (const SCEVAddRecExpr *tmpAddRec = 2240 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2241 SrcConst = tmpAddRec->getStart(); 2242 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2243 SrcLoop = tmpAddRec->getLoop(); 2244 DstConst = Dst; 2245 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2246 DstLoop = SrcAddRec->getLoop(); 2247 } 2248 else 2249 llvm_unreachable("RDIV reached by surprising SCEVs"); 2250 } 2251 else if (DstAddRec) { 2252 if (const SCEVAddRecExpr *tmpAddRec = 2253 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2254 DstConst = tmpAddRec->getStart(); 2255 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2256 DstLoop = tmpAddRec->getLoop(); 2257 SrcConst = Src; 2258 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2259 SrcLoop = DstAddRec->getLoop(); 2260 } 2261 else 2262 llvm_unreachable("RDIV reached by surprising SCEVs"); 2263 } 2264 else 2265 llvm_unreachable("RDIV expected at least one AddRec"); 2266 return exactRDIVtest(SrcCoeff, DstCoeff, 2267 SrcConst, DstConst, 2268 SrcLoop, DstLoop, 2269 Result) || 2270 gcdMIVtest(Src, Dst, Result) || 2271 symbolicRDIVtest(SrcCoeff, DstCoeff, 2272 SrcConst, DstConst, 2273 SrcLoop, DstLoop); 2274 } 2275 2276 2277 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2278 // Return true if dependence disproved. 2279 // Can sometimes refine direction vectors. 2280 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2281 const SmallBitVector &Loops, 2282 FullDependence &Result) const { 2283 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2284 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2285 Result.Consistent = false; 2286 return gcdMIVtest(Src, Dst, Result) || 2287 banerjeeMIVtest(Src, Dst, Loops, Result); 2288 } 2289 2290 2291 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2292 // in this case 10. If there is no constant part, returns NULL. 2293 static 2294 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2295 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2296 return Constant; 2297 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2298 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2299 return Constant; 2300 return nullptr; 2301 } 2302 2303 2304 //===----------------------------------------------------------------------===// 2305 // gcdMIVtest - 2306 // Tests an MIV subscript pair for dependence. 2307 // Returns true if any possible dependence is disproved. 2308 // Marks the result as inconsistent. 2309 // Can sometimes disprove the equal direction for 1 or more loops, 2310 // as discussed in Michael Wolfe's book, 2311 // High Performance Compilers for Parallel Computing, page 235. 2312 // 2313 // We spend some effort (code!) to handle cases like 2314 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2315 // but M and N are just loop-invariant variables. 2316 // This should help us handle linearized subscripts; 2317 // also makes this test a useful backup to the various SIV tests. 2318 // 2319 // It occurs to me that the presence of loop-invariant variables 2320 // changes the nature of the test from "greatest common divisor" 2321 // to "a common divisor". 2322 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2323 FullDependence &Result) const { 2324 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2325 ++GCDapplications; 2326 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2327 APInt RunningGCD = APInt::getNullValue(BitWidth); 2328 2329 // Examine Src coefficients. 2330 // Compute running GCD and record source constant. 2331 // Because we're looking for the constant at the end of the chain, 2332 // we can't quit the loop just because the GCD == 1. 2333 const SCEV *Coefficients = Src; 2334 while (const SCEVAddRecExpr *AddRec = 2335 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2336 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2337 // If the coefficient is the product of a constant and other stuff, 2338 // we can use the constant in the GCD computation. 2339 const auto *Constant = getConstantPart(Coeff); 2340 if (!Constant) 2341 return false; 2342 APInt ConstCoeff = Constant->getAPInt(); 2343 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2344 Coefficients = AddRec->getStart(); 2345 } 2346 const SCEV *SrcConst = Coefficients; 2347 2348 // Examine Dst coefficients. 2349 // Compute running GCD and record destination constant. 2350 // Because we're looking for the constant at the end of the chain, 2351 // we can't quit the loop just because the GCD == 1. 2352 Coefficients = Dst; 2353 while (const SCEVAddRecExpr *AddRec = 2354 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2355 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2356 // If the coefficient is the product of a constant and other stuff, 2357 // we can use the constant in the GCD computation. 2358 const auto *Constant = getConstantPart(Coeff); 2359 if (!Constant) 2360 return false; 2361 APInt ConstCoeff = Constant->getAPInt(); 2362 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2363 Coefficients = AddRec->getStart(); 2364 } 2365 const SCEV *DstConst = Coefficients; 2366 2367 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2368 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2369 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2370 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2371 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2372 // If Delta is a sum of products, we may be able to make further progress. 2373 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2374 const SCEV *Operand = Sum->getOperand(Op); 2375 if (isa<SCEVConstant>(Operand)) { 2376 assert(!Constant && "Surprised to find multiple constants"); 2377 Constant = cast<SCEVConstant>(Operand); 2378 } 2379 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2380 // Search for constant operand to participate in GCD; 2381 // If none found; return false. 2382 const SCEVConstant *ConstOp = getConstantPart(Product); 2383 if (!ConstOp) 2384 return false; 2385 APInt ConstOpValue = ConstOp->getAPInt(); 2386 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2387 ConstOpValue.abs()); 2388 } 2389 else 2390 return false; 2391 } 2392 } 2393 if (!Constant) 2394 return false; 2395 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2396 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2397 if (ConstDelta == 0) 2398 return false; 2399 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2400 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2401 APInt Remainder = ConstDelta.srem(RunningGCD); 2402 if (Remainder != 0) { 2403 ++GCDindependence; 2404 return true; 2405 } 2406 2407 // Try to disprove equal directions. 2408 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2409 // the code above can't disprove the dependence because the GCD = 1. 2410 // So we consider what happen if i = i' and what happens if j = j'. 2411 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2412 // which is infeasible, so we can disallow the = direction for the i level. 2413 // Setting j = j' doesn't help matters, so we end up with a direction vector 2414 // of [<>, *] 2415 // 2416 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2417 // we need to remember that the constant part is 5 and the RunningGCD should 2418 // be initialized to ExtraGCD = 30. 2419 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2420 2421 bool Improved = false; 2422 Coefficients = Src; 2423 while (const SCEVAddRecExpr *AddRec = 2424 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2425 Coefficients = AddRec->getStart(); 2426 const Loop *CurLoop = AddRec->getLoop(); 2427 RunningGCD = ExtraGCD; 2428 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2429 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2430 const SCEV *Inner = Src; 2431 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2432 AddRec = cast<SCEVAddRecExpr>(Inner); 2433 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2434 if (CurLoop == AddRec->getLoop()) 2435 ; // SrcCoeff == Coeff 2436 else { 2437 // If the coefficient is the product of a constant and other stuff, 2438 // we can use the constant in the GCD computation. 2439 Constant = getConstantPart(Coeff); 2440 if (!Constant) 2441 return false; 2442 APInt ConstCoeff = Constant->getAPInt(); 2443 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2444 } 2445 Inner = AddRec->getStart(); 2446 } 2447 Inner = Dst; 2448 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2449 AddRec = cast<SCEVAddRecExpr>(Inner); 2450 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2451 if (CurLoop == AddRec->getLoop()) 2452 DstCoeff = Coeff; 2453 else { 2454 // If the coefficient is the product of a constant and other stuff, 2455 // we can use the constant in the GCD computation. 2456 Constant = getConstantPart(Coeff); 2457 if (!Constant) 2458 return false; 2459 APInt ConstCoeff = Constant->getAPInt(); 2460 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2461 } 2462 Inner = AddRec->getStart(); 2463 } 2464 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2465 // If the coefficient is the product of a constant and other stuff, 2466 // we can use the constant in the GCD computation. 2467 Constant = getConstantPart(Delta); 2468 if (!Constant) 2469 // The difference of the two coefficients might not be a product 2470 // or constant, in which case we give up on this direction. 2471 continue; 2472 APInt ConstCoeff = Constant->getAPInt(); 2473 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2474 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2475 if (RunningGCD != 0) { 2476 Remainder = ConstDelta.srem(RunningGCD); 2477 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2478 if (Remainder != 0) { 2479 unsigned Level = mapSrcLoop(CurLoop); 2480 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2481 Improved = true; 2482 } 2483 } 2484 } 2485 if (Improved) 2486 ++GCDsuccesses; 2487 LLVM_DEBUG(dbgs() << "all done\n"); 2488 return false; 2489 } 2490 2491 2492 //===----------------------------------------------------------------------===// 2493 // banerjeeMIVtest - 2494 // Use Banerjee's Inequalities to test an MIV subscript pair. 2495 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2496 // Generally follows the discussion in Section 2.5.2 of 2497 // 2498 // Optimizing Supercompilers for Supercomputers 2499 // Michael Wolfe 2500 // 2501 // The inequalities given on page 25 are simplified in that loops are 2502 // normalized so that the lower bound is always 0 and the stride is always 1. 2503 // For example, Wolfe gives 2504 // 2505 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2506 // 2507 // where A_k is the coefficient of the kth index in the source subscript, 2508 // B_k is the coefficient of the kth index in the destination subscript, 2509 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2510 // index, and N_k is the stride of the kth index. Since all loops are normalized 2511 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2512 // equation to 2513 // 2514 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2515 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2516 // 2517 // Similar simplifications are possible for the other equations. 2518 // 2519 // When we can't determine the number of iterations for a loop, 2520 // we use NULL as an indicator for the worst case, infinity. 2521 // When computing the upper bound, NULL denotes +inf; 2522 // for the lower bound, NULL denotes -inf. 2523 // 2524 // Return true if dependence disproved. 2525 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2526 const SmallBitVector &Loops, 2527 FullDependence &Result) const { 2528 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2529 ++BanerjeeApplications; 2530 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2531 const SCEV *A0; 2532 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2533 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2534 const SCEV *B0; 2535 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2536 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2537 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2538 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2539 2540 // Compute bounds for all the * directions. 2541 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2542 for (unsigned K = 1; K <= MaxLevels; ++K) { 2543 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2544 Bound[K].Direction = Dependence::DVEntry::ALL; 2545 Bound[K].DirSet = Dependence::DVEntry::NONE; 2546 findBoundsALL(A, B, Bound, K); 2547 #ifndef NDEBUG 2548 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2549 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2550 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2551 else 2552 LLVM_DEBUG(dbgs() << "-inf\t"); 2553 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2554 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2555 else 2556 LLVM_DEBUG(dbgs() << "+inf\n"); 2557 #endif 2558 } 2559 2560 // Test the *, *, *, ... case. 2561 bool Disproved = false; 2562 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2563 // Explore the direction vector hierarchy. 2564 unsigned DepthExpanded = 0; 2565 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2566 Loops, DepthExpanded, Delta); 2567 if (NewDeps > 0) { 2568 bool Improved = false; 2569 for (unsigned K = 1; K <= CommonLevels; ++K) { 2570 if (Loops[K]) { 2571 unsigned Old = Result.DV[K - 1].Direction; 2572 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2573 Improved |= Old != Result.DV[K - 1].Direction; 2574 if (!Result.DV[K - 1].Direction) { 2575 Improved = false; 2576 Disproved = true; 2577 break; 2578 } 2579 } 2580 } 2581 if (Improved) 2582 ++BanerjeeSuccesses; 2583 } 2584 else { 2585 ++BanerjeeIndependence; 2586 Disproved = true; 2587 } 2588 } 2589 else { 2590 ++BanerjeeIndependence; 2591 Disproved = true; 2592 } 2593 delete [] Bound; 2594 delete [] A; 2595 delete [] B; 2596 return Disproved; 2597 } 2598 2599 2600 // Hierarchically expands the direction vector 2601 // search space, combining the directions of discovered dependences 2602 // in the DirSet field of Bound. Returns the number of distinct 2603 // dependences discovered. If the dependence is disproved, 2604 // it will return 0. 2605 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2606 CoefficientInfo *B, BoundInfo *Bound, 2607 const SmallBitVector &Loops, 2608 unsigned &DepthExpanded, 2609 const SCEV *Delta) const { 2610 if (Level > CommonLevels) { 2611 // record result 2612 LLVM_DEBUG(dbgs() << "\t["); 2613 for (unsigned K = 1; K <= CommonLevels; ++K) { 2614 if (Loops[K]) { 2615 Bound[K].DirSet |= Bound[K].Direction; 2616 #ifndef NDEBUG 2617 switch (Bound[K].Direction) { 2618 case Dependence::DVEntry::LT: 2619 LLVM_DEBUG(dbgs() << " <"); 2620 break; 2621 case Dependence::DVEntry::EQ: 2622 LLVM_DEBUG(dbgs() << " ="); 2623 break; 2624 case Dependence::DVEntry::GT: 2625 LLVM_DEBUG(dbgs() << " >"); 2626 break; 2627 case Dependence::DVEntry::ALL: 2628 LLVM_DEBUG(dbgs() << " *"); 2629 break; 2630 default: 2631 llvm_unreachable("unexpected Bound[K].Direction"); 2632 } 2633 #endif 2634 } 2635 } 2636 LLVM_DEBUG(dbgs() << " ]\n"); 2637 return 1; 2638 } 2639 if (Loops[Level]) { 2640 if (Level > DepthExpanded) { 2641 DepthExpanded = Level; 2642 // compute bounds for <, =, > at current level 2643 findBoundsLT(A, B, Bound, Level); 2644 findBoundsGT(A, B, Bound, Level); 2645 findBoundsEQ(A, B, Bound, Level); 2646 #ifndef NDEBUG 2647 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2648 LLVM_DEBUG(dbgs() << "\t <\t"); 2649 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2650 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2651 << '\t'); 2652 else 2653 LLVM_DEBUG(dbgs() << "-inf\t"); 2654 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2655 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2656 << '\n'); 2657 else 2658 LLVM_DEBUG(dbgs() << "+inf\n"); 2659 LLVM_DEBUG(dbgs() << "\t =\t"); 2660 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2661 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2662 << '\t'); 2663 else 2664 LLVM_DEBUG(dbgs() << "-inf\t"); 2665 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2666 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2667 << '\n'); 2668 else 2669 LLVM_DEBUG(dbgs() << "+inf\n"); 2670 LLVM_DEBUG(dbgs() << "\t >\t"); 2671 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2672 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2673 << '\t'); 2674 else 2675 LLVM_DEBUG(dbgs() << "-inf\t"); 2676 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2677 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2678 << '\n'); 2679 else 2680 LLVM_DEBUG(dbgs() << "+inf\n"); 2681 #endif 2682 } 2683 2684 unsigned NewDeps = 0; 2685 2686 // test bounds for <, *, *, ... 2687 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2688 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2689 Loops, DepthExpanded, Delta); 2690 2691 // Test bounds for =, *, *, ... 2692 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2693 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2694 Loops, DepthExpanded, Delta); 2695 2696 // test bounds for >, *, *, ... 2697 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2698 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2699 Loops, DepthExpanded, Delta); 2700 2701 Bound[Level].Direction = Dependence::DVEntry::ALL; 2702 return NewDeps; 2703 } 2704 else 2705 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2706 } 2707 2708 2709 // Returns true iff the current bounds are plausible. 2710 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2711 BoundInfo *Bound, const SCEV *Delta) const { 2712 Bound[Level].Direction = DirKind; 2713 if (const SCEV *LowerBound = getLowerBound(Bound)) 2714 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2715 return false; 2716 if (const SCEV *UpperBound = getUpperBound(Bound)) 2717 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2718 return false; 2719 return true; 2720 } 2721 2722 2723 // Computes the upper and lower bounds for level K 2724 // using the * direction. Records them in Bound. 2725 // Wolfe gives the equations 2726 // 2727 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2728 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2729 // 2730 // Since we normalize loops, we can simplify these equations to 2731 // 2732 // LB^*_k = (A^-_k - B^+_k)U_k 2733 // UB^*_k = (A^+_k - B^-_k)U_k 2734 // 2735 // We must be careful to handle the case where the upper bound is unknown. 2736 // Note that the lower bound is always <= 0 2737 // and the upper bound is always >= 0. 2738 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2739 BoundInfo *Bound, unsigned K) const { 2740 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2741 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2742 if (Bound[K].Iterations) { 2743 Bound[K].Lower[Dependence::DVEntry::ALL] = 2744 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2745 Bound[K].Iterations); 2746 Bound[K].Upper[Dependence::DVEntry::ALL] = 2747 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2748 Bound[K].Iterations); 2749 } 2750 else { 2751 // If the difference is 0, we won't need to know the number of iterations. 2752 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2753 Bound[K].Lower[Dependence::DVEntry::ALL] = 2754 SE->getZero(A[K].Coeff->getType()); 2755 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2756 Bound[K].Upper[Dependence::DVEntry::ALL] = 2757 SE->getZero(A[K].Coeff->getType()); 2758 } 2759 } 2760 2761 2762 // Computes the upper and lower bounds for level K 2763 // using the = direction. Records them in Bound. 2764 // Wolfe gives the equations 2765 // 2766 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2767 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2768 // 2769 // Since we normalize loops, we can simplify these equations to 2770 // 2771 // LB^=_k = (A_k - B_k)^- U_k 2772 // UB^=_k = (A_k - B_k)^+ U_k 2773 // 2774 // We must be careful to handle the case where the upper bound is unknown. 2775 // Note that the lower bound is always <= 0 2776 // and the upper bound is always >= 0. 2777 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2778 BoundInfo *Bound, unsigned K) const { 2779 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2780 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2781 if (Bound[K].Iterations) { 2782 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2783 const SCEV *NegativePart = getNegativePart(Delta); 2784 Bound[K].Lower[Dependence::DVEntry::EQ] = 2785 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2786 const SCEV *PositivePart = getPositivePart(Delta); 2787 Bound[K].Upper[Dependence::DVEntry::EQ] = 2788 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2789 } 2790 else { 2791 // If the positive/negative part of the difference is 0, 2792 // we won't need to know the number of iterations. 2793 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2794 const SCEV *NegativePart = getNegativePart(Delta); 2795 if (NegativePart->isZero()) 2796 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2797 const SCEV *PositivePart = getPositivePart(Delta); 2798 if (PositivePart->isZero()) 2799 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2800 } 2801 } 2802 2803 2804 // Computes the upper and lower bounds for level K 2805 // using the < direction. Records them in Bound. 2806 // Wolfe gives the equations 2807 // 2808 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2809 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2810 // 2811 // Since we normalize loops, we can simplify these equations to 2812 // 2813 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2814 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2815 // 2816 // We must be careful to handle the case where the upper bound is unknown. 2817 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2818 BoundInfo *Bound, unsigned K) const { 2819 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2820 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2821 if (Bound[K].Iterations) { 2822 const SCEV *Iter_1 = SE->getMinusSCEV( 2823 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2824 const SCEV *NegPart = 2825 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2826 Bound[K].Lower[Dependence::DVEntry::LT] = 2827 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2828 const SCEV *PosPart = 2829 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2830 Bound[K].Upper[Dependence::DVEntry::LT] = 2831 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2832 } 2833 else { 2834 // If the positive/negative part of the difference is 0, 2835 // we won't need to know the number of iterations. 2836 const SCEV *NegPart = 2837 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2838 if (NegPart->isZero()) 2839 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2840 const SCEV *PosPart = 2841 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2842 if (PosPart->isZero()) 2843 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2844 } 2845 } 2846 2847 2848 // Computes the upper and lower bounds for level K 2849 // using the > direction. Records them in Bound. 2850 // Wolfe gives the equations 2851 // 2852 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2853 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2854 // 2855 // Since we normalize loops, we can simplify these equations to 2856 // 2857 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2858 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2859 // 2860 // We must be careful to handle the case where the upper bound is unknown. 2861 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2862 BoundInfo *Bound, unsigned K) const { 2863 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2864 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2865 if (Bound[K].Iterations) { 2866 const SCEV *Iter_1 = SE->getMinusSCEV( 2867 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2868 const SCEV *NegPart = 2869 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2870 Bound[K].Lower[Dependence::DVEntry::GT] = 2871 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2872 const SCEV *PosPart = 2873 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2874 Bound[K].Upper[Dependence::DVEntry::GT] = 2875 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2876 } 2877 else { 2878 // If the positive/negative part of the difference is 0, 2879 // we won't need to know the number of iterations. 2880 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2881 if (NegPart->isZero()) 2882 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2883 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2884 if (PosPart->isZero()) 2885 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2886 } 2887 } 2888 2889 2890 // X^+ = max(X, 0) 2891 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2892 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2893 } 2894 2895 2896 // X^- = min(X, 0) 2897 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2898 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2899 } 2900 2901 2902 // Walks through the subscript, 2903 // collecting each coefficient, the associated loop bounds, 2904 // and recording its positive and negative parts for later use. 2905 DependenceInfo::CoefficientInfo * 2906 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2907 const SCEV *&Constant) const { 2908 const SCEV *Zero = SE->getZero(Subscript->getType()); 2909 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2910 for (unsigned K = 1; K <= MaxLevels; ++K) { 2911 CI[K].Coeff = Zero; 2912 CI[K].PosPart = Zero; 2913 CI[K].NegPart = Zero; 2914 CI[K].Iterations = nullptr; 2915 } 2916 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2917 const Loop *L = AddRec->getLoop(); 2918 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2919 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2920 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2921 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2922 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2923 Subscript = AddRec->getStart(); 2924 } 2925 Constant = Subscript; 2926 #ifndef NDEBUG 2927 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2928 for (unsigned K = 1; K <= MaxLevels; ++K) { 2929 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2930 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2931 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2932 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2933 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2934 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2935 if (CI[K].Iterations) 2936 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2937 else 2938 LLVM_DEBUG(dbgs() << "+inf"); 2939 LLVM_DEBUG(dbgs() << '\n'); 2940 } 2941 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2942 #endif 2943 return CI; 2944 } 2945 2946 2947 // Looks through all the bounds info and 2948 // computes the lower bound given the current direction settings 2949 // at each level. If the lower bound for any level is -inf, 2950 // the result is -inf. 2951 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2952 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2953 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2954 if (Bound[K].Lower[Bound[K].Direction]) 2955 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2956 else 2957 Sum = nullptr; 2958 } 2959 return Sum; 2960 } 2961 2962 2963 // Looks through all the bounds info and 2964 // computes the upper bound given the current direction settings 2965 // at each level. If the upper bound at any level is +inf, 2966 // the result is +inf. 2967 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2968 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2969 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2970 if (Bound[K].Upper[Bound[K].Direction]) 2971 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2972 else 2973 Sum = nullptr; 2974 } 2975 return Sum; 2976 } 2977 2978 2979 //===----------------------------------------------------------------------===// 2980 // Constraint manipulation for Delta test. 2981 2982 // Given a linear SCEV, 2983 // return the coefficient (the step) 2984 // corresponding to the specified loop. 2985 // If there isn't one, return 0. 2986 // For example, given a*i + b*j + c*k, finding the coefficient 2987 // corresponding to the j loop would yield b. 2988 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2989 const Loop *TargetLoop) const { 2990 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2991 if (!AddRec) 2992 return SE->getZero(Expr->getType()); 2993 if (AddRec->getLoop() == TargetLoop) 2994 return AddRec->getStepRecurrence(*SE); 2995 return findCoefficient(AddRec->getStart(), TargetLoop); 2996 } 2997 2998 2999 // Given a linear SCEV, 3000 // return the SCEV given by zeroing out the coefficient 3001 // corresponding to the specified loop. 3002 // For example, given a*i + b*j + c*k, zeroing the coefficient 3003 // corresponding to the j loop would yield a*i + c*k. 3004 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 3005 const Loop *TargetLoop) const { 3006 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3007 if (!AddRec) 3008 return Expr; // ignore 3009 if (AddRec->getLoop() == TargetLoop) 3010 return AddRec->getStart(); 3011 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 3012 AddRec->getStepRecurrence(*SE), 3013 AddRec->getLoop(), 3014 AddRec->getNoWrapFlags()); 3015 } 3016 3017 3018 // Given a linear SCEV Expr, 3019 // return the SCEV given by adding some Value to the 3020 // coefficient corresponding to the specified TargetLoop. 3021 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 3022 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 3023 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 3024 const Loop *TargetLoop, 3025 const SCEV *Value) const { 3026 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 3027 if (!AddRec) // create a new addRec 3028 return SE->getAddRecExpr(Expr, 3029 Value, 3030 TargetLoop, 3031 SCEV::FlagAnyWrap); // Worst case, with no info. 3032 if (AddRec->getLoop() == TargetLoop) { 3033 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 3034 if (Sum->isZero()) 3035 return AddRec->getStart(); 3036 return SE->getAddRecExpr(AddRec->getStart(), 3037 Sum, 3038 AddRec->getLoop(), 3039 AddRec->getNoWrapFlags()); 3040 } 3041 if (SE->isLoopInvariant(AddRec, TargetLoop)) 3042 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 3043 return SE->getAddRecExpr( 3044 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 3045 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 3046 AddRec->getNoWrapFlags()); 3047 } 3048 3049 3050 // Review the constraints, looking for opportunities 3051 // to simplify a subscript pair (Src and Dst). 3052 // Return true if some simplification occurs. 3053 // If the simplification isn't exact (that is, if it is conservative 3054 // in terms of dependence), set consistent to false. 3055 // Corresponds to Figure 5 from the paper 3056 // 3057 // Practical Dependence Testing 3058 // Goff, Kennedy, Tseng 3059 // PLDI 1991 3060 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3061 SmallBitVector &Loops, 3062 SmallVectorImpl<Constraint> &Constraints, 3063 bool &Consistent) { 3064 bool Result = false; 3065 for (unsigned LI : Loops.set_bits()) { 3066 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3067 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3068 if (Constraints[LI].isDistance()) 3069 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3070 else if (Constraints[LI].isLine()) 3071 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3072 else if (Constraints[LI].isPoint()) 3073 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3074 } 3075 return Result; 3076 } 3077 3078 3079 // Attempt to propagate a distance 3080 // constraint into a subscript pair (Src and Dst). 3081 // Return true if some simplification occurs. 3082 // If the simplification isn't exact (that is, if it is conservative 3083 // in terms of dependence), set consistent to false. 3084 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3085 Constraint &CurConstraint, 3086 bool &Consistent) { 3087 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3088 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3089 const SCEV *A_K = findCoefficient(Src, CurLoop); 3090 if (A_K->isZero()) 3091 return false; 3092 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3093 Src = SE->getMinusSCEV(Src, DA_K); 3094 Src = zeroCoefficient(Src, CurLoop); 3095 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3096 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3097 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3098 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3099 if (!findCoefficient(Dst, CurLoop)->isZero()) 3100 Consistent = false; 3101 return true; 3102 } 3103 3104 3105 // Attempt to propagate a line 3106 // constraint into a subscript pair (Src and Dst). 3107 // Return true if some simplification occurs. 3108 // If the simplification isn't exact (that is, if it is conservative 3109 // in terms of dependence), set consistent to false. 3110 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3111 Constraint &CurConstraint, 3112 bool &Consistent) { 3113 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3114 const SCEV *A = CurConstraint.getA(); 3115 const SCEV *B = CurConstraint.getB(); 3116 const SCEV *C = CurConstraint.getC(); 3117 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3118 << "\n"); 3119 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3120 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3121 if (A->isZero()) { 3122 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3123 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3124 if (!Bconst || !Cconst) return false; 3125 APInt Beta = Bconst->getAPInt(); 3126 APInt Charlie = Cconst->getAPInt(); 3127 APInt CdivB = Charlie.sdiv(Beta); 3128 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3129 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3130 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3131 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3132 Dst = zeroCoefficient(Dst, CurLoop); 3133 if (!findCoefficient(Src, CurLoop)->isZero()) 3134 Consistent = false; 3135 } 3136 else if (B->isZero()) { 3137 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3138 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3139 if (!Aconst || !Cconst) return false; 3140 APInt Alpha = Aconst->getAPInt(); 3141 APInt Charlie = Cconst->getAPInt(); 3142 APInt CdivA = Charlie.sdiv(Alpha); 3143 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3144 const SCEV *A_K = findCoefficient(Src, CurLoop); 3145 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3146 Src = zeroCoefficient(Src, CurLoop); 3147 if (!findCoefficient(Dst, CurLoop)->isZero()) 3148 Consistent = false; 3149 } 3150 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3151 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3152 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3153 if (!Aconst || !Cconst) return false; 3154 APInt Alpha = Aconst->getAPInt(); 3155 APInt Charlie = Cconst->getAPInt(); 3156 APInt CdivA = Charlie.sdiv(Alpha); 3157 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3158 const SCEV *A_K = findCoefficient(Src, CurLoop); 3159 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3160 Src = zeroCoefficient(Src, CurLoop); 3161 Dst = addToCoefficient(Dst, CurLoop, A_K); 3162 if (!findCoefficient(Dst, CurLoop)->isZero()) 3163 Consistent = false; 3164 } 3165 else { 3166 // paper is incorrect here, or perhaps just misleading 3167 const SCEV *A_K = findCoefficient(Src, CurLoop); 3168 Src = SE->getMulExpr(Src, A); 3169 Dst = SE->getMulExpr(Dst, A); 3170 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3171 Src = zeroCoefficient(Src, CurLoop); 3172 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3173 if (!findCoefficient(Dst, CurLoop)->isZero()) 3174 Consistent = false; 3175 } 3176 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3177 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3178 return true; 3179 } 3180 3181 3182 // Attempt to propagate a point 3183 // constraint into a subscript pair (Src and Dst). 3184 // Return true if some simplification occurs. 3185 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3186 Constraint &CurConstraint) { 3187 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3188 const SCEV *A_K = findCoefficient(Src, CurLoop); 3189 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3190 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3191 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3192 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3193 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3194 Src = zeroCoefficient(Src, CurLoop); 3195 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3196 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3197 Dst = zeroCoefficient(Dst, CurLoop); 3198 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3199 return true; 3200 } 3201 3202 3203 // Update direction vector entry based on the current constraint. 3204 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3205 const Constraint &CurConstraint) const { 3206 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3207 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3208 if (CurConstraint.isAny()) 3209 ; // use defaults 3210 else if (CurConstraint.isDistance()) { 3211 // this one is consistent, the others aren't 3212 Level.Scalar = false; 3213 Level.Distance = CurConstraint.getD(); 3214 unsigned NewDirection = Dependence::DVEntry::NONE; 3215 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3216 NewDirection = Dependence::DVEntry::EQ; 3217 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3218 NewDirection |= Dependence::DVEntry::LT; 3219 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3220 NewDirection |= Dependence::DVEntry::GT; 3221 Level.Direction &= NewDirection; 3222 } 3223 else if (CurConstraint.isLine()) { 3224 Level.Scalar = false; 3225 Level.Distance = nullptr; 3226 // direction should be accurate 3227 } 3228 else if (CurConstraint.isPoint()) { 3229 Level.Scalar = false; 3230 Level.Distance = nullptr; 3231 unsigned NewDirection = Dependence::DVEntry::NONE; 3232 if (!isKnownPredicate(CmpInst::ICMP_NE, 3233 CurConstraint.getY(), 3234 CurConstraint.getX())) 3235 // if X may be = Y 3236 NewDirection |= Dependence::DVEntry::EQ; 3237 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3238 CurConstraint.getY(), 3239 CurConstraint.getX())) 3240 // if Y may be > X 3241 NewDirection |= Dependence::DVEntry::LT; 3242 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3243 CurConstraint.getY(), 3244 CurConstraint.getX())) 3245 // if Y may be < X 3246 NewDirection |= Dependence::DVEntry::GT; 3247 Level.Direction &= NewDirection; 3248 } 3249 else 3250 llvm_unreachable("constraint has unexpected kind"); 3251 } 3252 3253 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3254 /// source and destination array references are recurrences on a nested loop, 3255 /// this function flattens the nested recurrences into separate recurrences 3256 /// for each loop level. 3257 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3258 SmallVectorImpl<Subscript> &Pair) { 3259 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3260 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3261 Value *SrcPtr = getLoadStorePointerOperand(Src); 3262 Value *DstPtr = getLoadStorePointerOperand(Dst); 3263 3264 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3265 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3266 3267 // Below code mimics the code in Delinearization.cpp 3268 const SCEV *SrcAccessFn = 3269 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3270 const SCEV *DstAccessFn = 3271 SE->getSCEVAtScope(DstPtr, DstLoop); 3272 3273 const SCEVUnknown *SrcBase = 3274 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3275 const SCEVUnknown *DstBase = 3276 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3277 3278 if (!SrcBase || !DstBase || SrcBase != DstBase) 3279 return false; 3280 3281 const SCEV *ElementSize = SE->getElementSize(Src); 3282 if (ElementSize != SE->getElementSize(Dst)) 3283 return false; 3284 3285 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3286 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3287 3288 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3289 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3290 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3291 return false; 3292 3293 // First step: collect parametric terms in both array references. 3294 SmallVector<const SCEV *, 4> Terms; 3295 SE->collectParametricTerms(SrcAR, Terms); 3296 SE->collectParametricTerms(DstAR, Terms); 3297 3298 // Second step: find subscript sizes. 3299 SmallVector<const SCEV *, 4> Sizes; 3300 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3301 3302 // Third step: compute the access functions for each subscript. 3303 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3304 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3305 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3306 3307 // Fail when there is only a subscript: that's a linearized access function. 3308 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3309 SrcSubscripts.size() != DstSubscripts.size()) 3310 return false; 3311 3312 int size = SrcSubscripts.size(); 3313 3314 // Statically check that the array bounds are in-range. The first subscript we 3315 // don't have a size for and it cannot overflow into another subscript, so is 3316 // always safe. The others need to be 0 <= subscript[i] < bound, for both src 3317 // and dst. 3318 // FIXME: It may be better to record these sizes and add them as constraints 3319 // to the dependency checks. 3320 for (int i = 1; i < size; ++i) { 3321 if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr)) 3322 return false; 3323 3324 if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) 3325 return false; 3326 3327 if (!isKnownNonNegative(DstSubscripts[i], DstPtr)) 3328 return false; 3329 3330 if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) 3331 return false; 3332 } 3333 3334 LLVM_DEBUG({ 3335 dbgs() << "\nSrcSubscripts: "; 3336 for (int i = 0; i < size; i++) 3337 dbgs() << *SrcSubscripts[i]; 3338 dbgs() << "\nDstSubscripts: "; 3339 for (int i = 0; i < size; i++) 3340 dbgs() << *DstSubscripts[i]; 3341 }); 3342 3343 // The delinearization transforms a single-subscript MIV dependence test into 3344 // a multi-subscript SIV dependence test that is easier to compute. So we 3345 // resize Pair to contain as many pairs of subscripts as the delinearization 3346 // has found, and then initialize the pairs following the delinearization. 3347 Pair.resize(size); 3348 for (int i = 0; i < size; ++i) { 3349 Pair[i].Src = SrcSubscripts[i]; 3350 Pair[i].Dst = DstSubscripts[i]; 3351 unifySubscriptType(&Pair[i]); 3352 } 3353 3354 return true; 3355 } 3356 3357 //===----------------------------------------------------------------------===// 3358 3359 #ifndef NDEBUG 3360 // For debugging purposes, dump a small bit vector to dbgs(). 3361 static void dumpSmallBitVector(SmallBitVector &BV) { 3362 dbgs() << "{"; 3363 for (unsigned VI : BV.set_bits()) { 3364 dbgs() << VI; 3365 if (BV.find_next(VI) >= 0) 3366 dbgs() << ' '; 3367 } 3368 dbgs() << "}\n"; 3369 } 3370 #endif 3371 3372 // depends - 3373 // Returns NULL if there is no dependence. 3374 // Otherwise, return a Dependence with as many details as possible. 3375 // Corresponds to Section 3.1 in the paper 3376 // 3377 // Practical Dependence Testing 3378 // Goff, Kennedy, Tseng 3379 // PLDI 1991 3380 // 3381 // Care is required to keep the routine below, getSplitIteration(), 3382 // up to date with respect to this routine. 3383 std::unique_ptr<Dependence> 3384 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3385 bool PossiblyLoopIndependent) { 3386 if (Src == Dst) 3387 PossiblyLoopIndependent = false; 3388 3389 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3390 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3391 // if both instructions don't reference memory, there's no dependence 3392 return nullptr; 3393 3394 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3395 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3396 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3397 return make_unique<Dependence>(Src, Dst); 3398 } 3399 3400 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3401 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3402 Value *SrcPtr = getLoadStorePointerOperand(Src); 3403 Value *DstPtr = getLoadStorePointerOperand(Dst); 3404 3405 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3406 MemoryLocation::get(Dst), 3407 MemoryLocation::get(Src))) { 3408 case MayAlias: 3409 case PartialAlias: 3410 // cannot analyse objects if we don't understand their aliasing. 3411 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3412 return make_unique<Dependence>(Src, Dst); 3413 case NoAlias: 3414 // If the objects noalias, they are distinct, accesses are independent. 3415 LLVM_DEBUG(dbgs() << "no alias\n"); 3416 return nullptr; 3417 case MustAlias: 3418 break; // The underlying objects alias; test accesses for dependence. 3419 } 3420 3421 // establish loop nesting levels 3422 establishNestingLevels(Src, Dst); 3423 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3424 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3425 3426 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3427 ++TotalArrayPairs; 3428 3429 unsigned Pairs = 1; 3430 SmallVector<Subscript, 2> Pair(Pairs); 3431 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3432 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3433 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3434 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3435 Pair[0].Src = SrcSCEV; 3436 Pair[0].Dst = DstSCEV; 3437 3438 if (Delinearize) { 3439 if (tryDelinearize(Src, Dst, Pair)) { 3440 LLVM_DEBUG(dbgs() << " delinearized\n"); 3441 Pairs = Pair.size(); 3442 } 3443 } 3444 3445 for (unsigned P = 0; P < Pairs; ++P) { 3446 Pair[P].Loops.resize(MaxLevels + 1); 3447 Pair[P].GroupLoops.resize(MaxLevels + 1); 3448 Pair[P].Group.resize(Pairs); 3449 removeMatchingExtensions(&Pair[P]); 3450 Pair[P].Classification = 3451 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3452 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3453 Pair[P].Loops); 3454 Pair[P].GroupLoops = Pair[P].Loops; 3455 Pair[P].Group.set(P); 3456 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3457 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3458 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3459 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3460 LLVM_DEBUG(dbgs() << "\tloops = "); 3461 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3462 } 3463 3464 SmallBitVector Separable(Pairs); 3465 SmallBitVector Coupled(Pairs); 3466 3467 // Partition subscripts into separable and minimally-coupled groups 3468 // Algorithm in paper is algorithmically better; 3469 // this may be faster in practice. Check someday. 3470 // 3471 // Here's an example of how it works. Consider this code: 3472 // 3473 // for (i = ...) { 3474 // for (j = ...) { 3475 // for (k = ...) { 3476 // for (l = ...) { 3477 // for (m = ...) { 3478 // A[i][j][k][m] = ...; 3479 // ... = A[0][j][l][i + j]; 3480 // } 3481 // } 3482 // } 3483 // } 3484 // } 3485 // 3486 // There are 4 subscripts here: 3487 // 0 [i] and [0] 3488 // 1 [j] and [j] 3489 // 2 [k] and [l] 3490 // 3 [m] and [i + j] 3491 // 3492 // We've already classified each subscript pair as ZIV, SIV, etc., 3493 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3494 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3495 // and set Pair[P].Group = {P}. 3496 // 3497 // Src Dst Classification Loops GroupLoops Group 3498 // 0 [i] [0] SIV {1} {1} {0} 3499 // 1 [j] [j] SIV {2} {2} {1} 3500 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3501 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3502 // 3503 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3504 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3505 // 3506 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3507 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3508 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3509 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3510 // to either Separable or Coupled). 3511 // 3512 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3513 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3514 // so Pair[3].Group = {0, 1, 3} and Done = false. 3515 // 3516 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3517 // Since Done remains true, we add 2 to the set of Separable pairs. 3518 // 3519 // Finally, we consider 3. There's nothing to compare it with, 3520 // so Done remains true and we add it to the Coupled set. 3521 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3522 // 3523 // In the end, we've got 1 separable subscript and 1 coupled group. 3524 for (unsigned SI = 0; SI < Pairs; ++SI) { 3525 if (Pair[SI].Classification == Subscript::NonLinear) { 3526 // ignore these, but collect loops for later 3527 ++NonlinearSubscriptPairs; 3528 collectCommonLoops(Pair[SI].Src, 3529 LI->getLoopFor(Src->getParent()), 3530 Pair[SI].Loops); 3531 collectCommonLoops(Pair[SI].Dst, 3532 LI->getLoopFor(Dst->getParent()), 3533 Pair[SI].Loops); 3534 Result.Consistent = false; 3535 } else if (Pair[SI].Classification == Subscript::ZIV) { 3536 // always separable 3537 Separable.set(SI); 3538 } 3539 else { 3540 // SIV, RDIV, or MIV, so check for coupled group 3541 bool Done = true; 3542 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3543 SmallBitVector Intersection = Pair[SI].GroupLoops; 3544 Intersection &= Pair[SJ].GroupLoops; 3545 if (Intersection.any()) { 3546 // accumulate set of all the loops in group 3547 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3548 // accumulate set of all subscripts in group 3549 Pair[SJ].Group |= Pair[SI].Group; 3550 Done = false; 3551 } 3552 } 3553 if (Done) { 3554 if (Pair[SI].Group.count() == 1) { 3555 Separable.set(SI); 3556 ++SeparableSubscriptPairs; 3557 } 3558 else { 3559 Coupled.set(SI); 3560 ++CoupledSubscriptPairs; 3561 } 3562 } 3563 } 3564 } 3565 3566 LLVM_DEBUG(dbgs() << " Separable = "); 3567 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3568 LLVM_DEBUG(dbgs() << " Coupled = "); 3569 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3570 3571 Constraint NewConstraint; 3572 NewConstraint.setAny(SE); 3573 3574 // test separable subscripts 3575 for (unsigned SI : Separable.set_bits()) { 3576 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3577 switch (Pair[SI].Classification) { 3578 case Subscript::ZIV: 3579 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3580 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3581 return nullptr; 3582 break; 3583 case Subscript::SIV: { 3584 LLVM_DEBUG(dbgs() << ", SIV\n"); 3585 unsigned Level; 3586 const SCEV *SplitIter = nullptr; 3587 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3588 SplitIter)) 3589 return nullptr; 3590 break; 3591 } 3592 case Subscript::RDIV: 3593 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3594 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3595 return nullptr; 3596 break; 3597 case Subscript::MIV: 3598 LLVM_DEBUG(dbgs() << ", MIV\n"); 3599 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3600 return nullptr; 3601 break; 3602 default: 3603 llvm_unreachable("subscript has unexpected classification"); 3604 } 3605 } 3606 3607 if (Coupled.count()) { 3608 // test coupled subscript groups 3609 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3610 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3611 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3612 for (unsigned II = 0; II <= MaxLevels; ++II) 3613 Constraints[II].setAny(SE); 3614 for (unsigned SI : Coupled.set_bits()) { 3615 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3616 SmallBitVector Group(Pair[SI].Group); 3617 SmallBitVector Sivs(Pairs); 3618 SmallBitVector Mivs(Pairs); 3619 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3620 SmallVector<Subscript *, 4> PairsInGroup; 3621 for (unsigned SJ : Group.set_bits()) { 3622 LLVM_DEBUG(dbgs() << SJ << " "); 3623 if (Pair[SJ].Classification == Subscript::SIV) 3624 Sivs.set(SJ); 3625 else 3626 Mivs.set(SJ); 3627 PairsInGroup.push_back(&Pair[SJ]); 3628 } 3629 unifySubscriptType(PairsInGroup); 3630 LLVM_DEBUG(dbgs() << "}\n"); 3631 while (Sivs.any()) { 3632 bool Changed = false; 3633 for (unsigned SJ : Sivs.set_bits()) { 3634 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3635 // SJ is an SIV subscript that's part of the current coupled group 3636 unsigned Level; 3637 const SCEV *SplitIter = nullptr; 3638 LLVM_DEBUG(dbgs() << "SIV\n"); 3639 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3640 SplitIter)) 3641 return nullptr; 3642 ConstrainedLevels.set(Level); 3643 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3644 if (Constraints[Level].isEmpty()) { 3645 ++DeltaIndependence; 3646 return nullptr; 3647 } 3648 Changed = true; 3649 } 3650 Sivs.reset(SJ); 3651 } 3652 if (Changed) { 3653 // propagate, possibly creating new SIVs and ZIVs 3654 LLVM_DEBUG(dbgs() << " propagating\n"); 3655 LLVM_DEBUG(dbgs() << "\tMivs = "); 3656 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3657 for (unsigned SJ : Mivs.set_bits()) { 3658 // SJ is an MIV subscript that's part of the current coupled group 3659 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3660 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3661 Constraints, Result.Consistent)) { 3662 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3663 ++DeltaPropagations; 3664 Pair[SJ].Classification = 3665 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3666 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3667 Pair[SJ].Loops); 3668 switch (Pair[SJ].Classification) { 3669 case Subscript::ZIV: 3670 LLVM_DEBUG(dbgs() << "ZIV\n"); 3671 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3672 return nullptr; 3673 Mivs.reset(SJ); 3674 break; 3675 case Subscript::SIV: 3676 Sivs.set(SJ); 3677 Mivs.reset(SJ); 3678 break; 3679 case Subscript::RDIV: 3680 case Subscript::MIV: 3681 break; 3682 default: 3683 llvm_unreachable("bad subscript classification"); 3684 } 3685 } 3686 } 3687 } 3688 } 3689 3690 // test & propagate remaining RDIVs 3691 for (unsigned SJ : Mivs.set_bits()) { 3692 if (Pair[SJ].Classification == Subscript::RDIV) { 3693 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3694 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3695 return nullptr; 3696 // I don't yet understand how to propagate RDIV results 3697 Mivs.reset(SJ); 3698 } 3699 } 3700 3701 // test remaining MIVs 3702 // This code is temporary. 3703 // Better to somehow test all remaining subscripts simultaneously. 3704 for (unsigned SJ : Mivs.set_bits()) { 3705 if (Pair[SJ].Classification == Subscript::MIV) { 3706 LLVM_DEBUG(dbgs() << "MIV test\n"); 3707 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3708 return nullptr; 3709 } 3710 else 3711 llvm_unreachable("expected only MIV subscripts at this point"); 3712 } 3713 3714 // update Result.DV from constraint vector 3715 LLVM_DEBUG(dbgs() << " updating\n"); 3716 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3717 if (SJ > CommonLevels) 3718 break; 3719 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3720 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3721 return nullptr; 3722 } 3723 } 3724 } 3725 3726 // Make sure the Scalar flags are set correctly. 3727 SmallBitVector CompleteLoops(MaxLevels + 1); 3728 for (unsigned SI = 0; SI < Pairs; ++SI) 3729 CompleteLoops |= Pair[SI].Loops; 3730 for (unsigned II = 1; II <= CommonLevels; ++II) 3731 if (CompleteLoops[II]) 3732 Result.DV[II - 1].Scalar = false; 3733 3734 if (PossiblyLoopIndependent) { 3735 // Make sure the LoopIndependent flag is set correctly. 3736 // All directions must include equal, otherwise no 3737 // loop-independent dependence is possible. 3738 for (unsigned II = 1; II <= CommonLevels; ++II) { 3739 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3740 Result.LoopIndependent = false; 3741 break; 3742 } 3743 } 3744 } 3745 else { 3746 // On the other hand, if all directions are equal and there's no 3747 // loop-independent dependence possible, then no dependence exists. 3748 bool AllEqual = true; 3749 for (unsigned II = 1; II <= CommonLevels; ++II) { 3750 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3751 AllEqual = false; 3752 break; 3753 } 3754 } 3755 if (AllEqual) 3756 return nullptr; 3757 } 3758 3759 return make_unique<FullDependence>(std::move(Result)); 3760 } 3761 3762 3763 3764 //===----------------------------------------------------------------------===// 3765 // getSplitIteration - 3766 // Rather than spend rarely-used space recording the splitting iteration 3767 // during the Weak-Crossing SIV test, we re-compute it on demand. 3768 // The re-computation is basically a repeat of the entire dependence test, 3769 // though simplified since we know that the dependence exists. 3770 // It's tedious, since we must go through all propagations, etc. 3771 // 3772 // Care is required to keep this code up to date with respect to the routine 3773 // above, depends(). 3774 // 3775 // Generally, the dependence analyzer will be used to build 3776 // a dependence graph for a function (basically a map from instructions 3777 // to dependences). Looking for cycles in the graph shows us loops 3778 // that cannot be trivially vectorized/parallelized. 3779 // 3780 // We can try to improve the situation by examining all the dependences 3781 // that make up the cycle, looking for ones we can break. 3782 // Sometimes, peeling the first or last iteration of a loop will break 3783 // dependences, and we've got flags for those possibilities. 3784 // Sometimes, splitting a loop at some other iteration will do the trick, 3785 // and we've got a flag for that case. Rather than waste the space to 3786 // record the exact iteration (since we rarely know), we provide 3787 // a method that calculates the iteration. It's a drag that it must work 3788 // from scratch, but wonderful in that it's possible. 3789 // 3790 // Here's an example: 3791 // 3792 // for (i = 0; i < 10; i++) 3793 // A[i] = ... 3794 // ... = A[11 - i] 3795 // 3796 // There's a loop-carried flow dependence from the store to the load, 3797 // found by the weak-crossing SIV test. The dependence will have a flag, 3798 // indicating that the dependence can be broken by splitting the loop. 3799 // Calling getSplitIteration will return 5. 3800 // Splitting the loop breaks the dependence, like so: 3801 // 3802 // for (i = 0; i <= 5; i++) 3803 // A[i] = ... 3804 // ... = A[11 - i] 3805 // for (i = 6; i < 10; i++) 3806 // A[i] = ... 3807 // ... = A[11 - i] 3808 // 3809 // breaks the dependence and allows us to vectorize/parallelize 3810 // both loops. 3811 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3812 unsigned SplitLevel) { 3813 assert(Dep.isSplitable(SplitLevel) && 3814 "Dep should be splitable at SplitLevel"); 3815 Instruction *Src = Dep.getSrc(); 3816 Instruction *Dst = Dep.getDst(); 3817 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3818 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3819 assert(isLoadOrStore(Src)); 3820 assert(isLoadOrStore(Dst)); 3821 Value *SrcPtr = getLoadStorePointerOperand(Src); 3822 Value *DstPtr = getLoadStorePointerOperand(Dst); 3823 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3824 MemoryLocation::get(Dst), 3825 MemoryLocation::get(Src)) == MustAlias); 3826 3827 // establish loop nesting levels 3828 establishNestingLevels(Src, Dst); 3829 3830 FullDependence Result(Src, Dst, false, CommonLevels); 3831 3832 unsigned Pairs = 1; 3833 SmallVector<Subscript, 2> Pair(Pairs); 3834 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3835 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3836 Pair[0].Src = SrcSCEV; 3837 Pair[0].Dst = DstSCEV; 3838 3839 if (Delinearize) { 3840 if (tryDelinearize(Src, Dst, Pair)) { 3841 LLVM_DEBUG(dbgs() << " delinearized\n"); 3842 Pairs = Pair.size(); 3843 } 3844 } 3845 3846 for (unsigned P = 0; P < Pairs; ++P) { 3847 Pair[P].Loops.resize(MaxLevels + 1); 3848 Pair[P].GroupLoops.resize(MaxLevels + 1); 3849 Pair[P].Group.resize(Pairs); 3850 removeMatchingExtensions(&Pair[P]); 3851 Pair[P].Classification = 3852 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3853 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3854 Pair[P].Loops); 3855 Pair[P].GroupLoops = Pair[P].Loops; 3856 Pair[P].Group.set(P); 3857 } 3858 3859 SmallBitVector Separable(Pairs); 3860 SmallBitVector Coupled(Pairs); 3861 3862 // partition subscripts into separable and minimally-coupled groups 3863 for (unsigned SI = 0; SI < Pairs; ++SI) { 3864 if (Pair[SI].Classification == Subscript::NonLinear) { 3865 // ignore these, but collect loops for later 3866 collectCommonLoops(Pair[SI].Src, 3867 LI->getLoopFor(Src->getParent()), 3868 Pair[SI].Loops); 3869 collectCommonLoops(Pair[SI].Dst, 3870 LI->getLoopFor(Dst->getParent()), 3871 Pair[SI].Loops); 3872 Result.Consistent = false; 3873 } 3874 else if (Pair[SI].Classification == Subscript::ZIV) 3875 Separable.set(SI); 3876 else { 3877 // SIV, RDIV, or MIV, so check for coupled group 3878 bool Done = true; 3879 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3880 SmallBitVector Intersection = Pair[SI].GroupLoops; 3881 Intersection &= Pair[SJ].GroupLoops; 3882 if (Intersection.any()) { 3883 // accumulate set of all the loops in group 3884 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3885 // accumulate set of all subscripts in group 3886 Pair[SJ].Group |= Pair[SI].Group; 3887 Done = false; 3888 } 3889 } 3890 if (Done) { 3891 if (Pair[SI].Group.count() == 1) 3892 Separable.set(SI); 3893 else 3894 Coupled.set(SI); 3895 } 3896 } 3897 } 3898 3899 Constraint NewConstraint; 3900 NewConstraint.setAny(SE); 3901 3902 // test separable subscripts 3903 for (unsigned SI : Separable.set_bits()) { 3904 switch (Pair[SI].Classification) { 3905 case Subscript::SIV: { 3906 unsigned Level; 3907 const SCEV *SplitIter = nullptr; 3908 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3909 Result, NewConstraint, SplitIter); 3910 if (Level == SplitLevel) { 3911 assert(SplitIter != nullptr); 3912 return SplitIter; 3913 } 3914 break; 3915 } 3916 case Subscript::ZIV: 3917 case Subscript::RDIV: 3918 case Subscript::MIV: 3919 break; 3920 default: 3921 llvm_unreachable("subscript has unexpected classification"); 3922 } 3923 } 3924 3925 if (Coupled.count()) { 3926 // test coupled subscript groups 3927 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3928 for (unsigned II = 0; II <= MaxLevels; ++II) 3929 Constraints[II].setAny(SE); 3930 for (unsigned SI : Coupled.set_bits()) { 3931 SmallBitVector Group(Pair[SI].Group); 3932 SmallBitVector Sivs(Pairs); 3933 SmallBitVector Mivs(Pairs); 3934 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3935 for (unsigned SJ : Group.set_bits()) { 3936 if (Pair[SJ].Classification == Subscript::SIV) 3937 Sivs.set(SJ); 3938 else 3939 Mivs.set(SJ); 3940 } 3941 while (Sivs.any()) { 3942 bool Changed = false; 3943 for (unsigned SJ : Sivs.set_bits()) { 3944 // SJ is an SIV subscript that's part of the current coupled group 3945 unsigned Level; 3946 const SCEV *SplitIter = nullptr; 3947 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3948 Result, NewConstraint, SplitIter); 3949 if (Level == SplitLevel && SplitIter) 3950 return SplitIter; 3951 ConstrainedLevels.set(Level); 3952 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3953 Changed = true; 3954 Sivs.reset(SJ); 3955 } 3956 if (Changed) { 3957 // propagate, possibly creating new SIVs and ZIVs 3958 for (unsigned SJ : Mivs.set_bits()) { 3959 // SJ is an MIV subscript that's part of the current coupled group 3960 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3961 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3962 Pair[SJ].Classification = 3963 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3964 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3965 Pair[SJ].Loops); 3966 switch (Pair[SJ].Classification) { 3967 case Subscript::ZIV: 3968 Mivs.reset(SJ); 3969 break; 3970 case Subscript::SIV: 3971 Sivs.set(SJ); 3972 Mivs.reset(SJ); 3973 break; 3974 case Subscript::RDIV: 3975 case Subscript::MIV: 3976 break; 3977 default: 3978 llvm_unreachable("bad subscript classification"); 3979 } 3980 } 3981 } 3982 } 3983 } 3984 } 3985 } 3986 llvm_unreachable("somehow reached end of routine"); 3987 return nullptr; 3988 } 3989