1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // Since Clang linearizes some array subscripts, the dependence 28 // analysis is using SCEV->delinearize to recover the representation of multiple 29 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 30 // delinearization is controlled by the flag -da-delinearize. 31 // 32 // We should pay some careful attention to the possibility of integer overflow 33 // in the implementation of the various tests. This could happen with Add, 34 // Subtract, or Multiply, with both APInt's and SCEV's. 35 // 36 // Some non-linear subscript pairs can be handled by the GCD test 37 // (and perhaps other tests). 38 // Should explore how often these things occur. 39 // 40 // Finally, it seems like certain test cases expose weaknesses in the SCEV 41 // simplification, especially in the handling of sign and zero extensions. 42 // It could be useful to spend time exploring these. 43 // 44 // Please note that this is work in progress and the interface is subject to 45 // change. 46 // 47 //===----------------------------------------------------------------------===// 48 // // 49 // In memory of Ken Kennedy, 1945 - 2007 // 50 // // 51 //===----------------------------------------------------------------------===// 52 53 #include "llvm/Analysis/DependenceAnalysis.h" 54 #include "llvm/ADT/STLExtras.h" 55 #include "llvm/ADT/Statistic.h" 56 #include "llvm/Analysis/AliasAnalysis.h" 57 #include "llvm/Analysis/LoopInfo.h" 58 #include "llvm/Analysis/ScalarEvolution.h" 59 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 60 #include "llvm/Analysis/ValueTracking.h" 61 #include "llvm/Config/llvm-config.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 DependenceAnalysis::Result 118 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 119 auto &AA = FAM.getResult<AAManager>(F); 120 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 121 auto &LI = FAM.getResult<LoopAnalysis>(F); 122 return DependenceInfo(&F, &AA, &SE, &LI); 123 } 124 125 AnalysisKey DependenceAnalysis::Key; 126 127 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 128 "Dependence Analysis", true, true) 129 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 132 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 133 true, true) 134 135 char DependenceAnalysisWrapperPass::ID = 0; 136 137 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 138 return new DependenceAnalysisWrapperPass(); 139 } 140 141 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 142 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 143 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 144 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 145 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 146 return false; 147 } 148 149 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 150 151 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 152 153 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 154 AU.setPreservesAll(); 155 AU.addRequiredTransitive<AAResultsWrapperPass>(); 156 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 157 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 158 } 159 160 161 // Used to test the dependence analyzer. 162 // Looks through the function, noting loads and stores. 163 // Calls depends() on every possible pair and prints out the result. 164 // Ignores all other instructions. 165 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 166 auto *F = DA->getFunction(); 167 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 168 ++SrcI) { 169 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 170 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 171 DstI != DstE; ++DstI) { 172 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 173 OS << "da analyze - "; 174 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 175 D->dump(OS); 176 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 177 if (D->isSplitable(Level)) { 178 OS << "da analyze - split level = " << Level; 179 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 180 OS << "!\n"; 181 } 182 } 183 } 184 else 185 OS << "none!\n"; 186 } 187 } 188 } 189 } 190 } 191 192 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 193 const Module *) const { 194 dumpExampleDependence(OS, info.get()); 195 } 196 197 //===----------------------------------------------------------------------===// 198 // Dependence methods 199 200 // Returns true if this is an input dependence. 201 bool Dependence::isInput() const { 202 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 203 } 204 205 206 // Returns true if this is an output dependence. 207 bool Dependence::isOutput() const { 208 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 209 } 210 211 212 // Returns true if this is an flow (aka true) dependence. 213 bool Dependence::isFlow() const { 214 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 215 } 216 217 218 // Returns true if this is an anti dependence. 219 bool Dependence::isAnti() const { 220 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 221 } 222 223 224 // Returns true if a particular level is scalar; that is, 225 // if no subscript in the source or destination mention the induction 226 // variable associated with the loop at this level. 227 // Leave this out of line, so it will serve as a virtual method anchor 228 bool Dependence::isScalar(unsigned level) const { 229 return false; 230 } 231 232 233 //===----------------------------------------------------------------------===// 234 // FullDependence methods 235 236 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 237 bool PossiblyLoopIndependent, 238 unsigned CommonLevels) 239 : Dependence(Source, Destination), Levels(CommonLevels), 240 LoopIndependent(PossiblyLoopIndependent) { 241 Consistent = true; 242 if (CommonLevels) 243 DV = make_unique<DVEntry[]>(CommonLevels); 244 } 245 246 // The rest are simple getters that hide the implementation. 247 248 // getDirection - Returns the direction associated with a particular level. 249 unsigned FullDependence::getDirection(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Direction; 252 } 253 254 255 // Returns the distance (or NULL) associated with a particular level. 256 const SCEV *FullDependence::getDistance(unsigned Level) const { 257 assert(0 < Level && Level <= Levels && "Level out of range"); 258 return DV[Level - 1].Distance; 259 } 260 261 262 // Returns true if a particular level is scalar; that is, 263 // if no subscript in the source or destination mention the induction 264 // variable associated with the loop at this level. 265 bool FullDependence::isScalar(unsigned Level) const { 266 assert(0 < Level && Level <= Levels && "Level out of range"); 267 return DV[Level - 1].Scalar; 268 } 269 270 271 // Returns true if peeling the first iteration from this loop 272 // will break this dependence. 273 bool FullDependence::isPeelFirst(unsigned Level) const { 274 assert(0 < Level && Level <= Levels && "Level out of range"); 275 return DV[Level - 1].PeelFirst; 276 } 277 278 279 // Returns true if peeling the last iteration from this loop 280 // will break this dependence. 281 bool FullDependence::isPeelLast(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].PeelLast; 284 } 285 286 287 // Returns true if splitting this loop will break the dependence. 288 bool FullDependence::isSplitable(unsigned Level) const { 289 assert(0 < Level && Level <= Levels && "Level out of range"); 290 return DV[Level - 1].Splitable; 291 } 292 293 294 //===----------------------------------------------------------------------===// 295 // DependenceInfo::Constraint methods 296 297 // If constraint is a point <X, Y>, returns X. 298 // Otherwise assert. 299 const SCEV *DependenceInfo::Constraint::getX() const { 300 assert(Kind == Point && "Kind should be Point"); 301 return A; 302 } 303 304 305 // If constraint is a point <X, Y>, returns Y. 306 // Otherwise assert. 307 const SCEV *DependenceInfo::Constraint::getY() const { 308 assert(Kind == Point && "Kind should be Point"); 309 return B; 310 } 311 312 313 // If constraint is a line AX + BY = C, returns A. 314 // Otherwise assert. 315 const SCEV *DependenceInfo::Constraint::getA() const { 316 assert((Kind == Line || Kind == Distance) && 317 "Kind should be Line (or Distance)"); 318 return A; 319 } 320 321 322 // If constraint is a line AX + BY = C, returns B. 323 // Otherwise assert. 324 const SCEV *DependenceInfo::Constraint::getB() const { 325 assert((Kind == Line || Kind == Distance) && 326 "Kind should be Line (or Distance)"); 327 return B; 328 } 329 330 331 // If constraint is a line AX + BY = C, returns C. 332 // Otherwise assert. 333 const SCEV *DependenceInfo::Constraint::getC() const { 334 assert((Kind == Line || Kind == Distance) && 335 "Kind should be Line (or Distance)"); 336 return C; 337 } 338 339 340 // If constraint is a distance, returns D. 341 // Otherwise assert. 342 const SCEV *DependenceInfo::Constraint::getD() const { 343 assert(Kind == Distance && "Kind should be Distance"); 344 return SE->getNegativeSCEV(C); 345 } 346 347 348 // Returns the loop associated with this constraint. 349 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 350 assert((Kind == Distance || Kind == Line || Kind == Point) && 351 "Kind should be Distance, Line, or Point"); 352 return AssociatedLoop; 353 } 354 355 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 356 const Loop *CurLoop) { 357 Kind = Point; 358 A = X; 359 B = Y; 360 AssociatedLoop = CurLoop; 361 } 362 363 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 364 const SCEV *CC, const Loop *CurLoop) { 365 Kind = Line; 366 A = AA; 367 B = BB; 368 C = CC; 369 AssociatedLoop = CurLoop; 370 } 371 372 void DependenceInfo::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getOne(D->getType()); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 382 383 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 384 SE = NewSE; 385 Kind = Any; 386 } 387 388 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 389 // For debugging purposes. Dumps the constraint out to OS. 390 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 391 if (isEmpty()) 392 OS << " Empty\n"; 393 else if (isAny()) 394 OS << " Any\n"; 395 else if (isPoint()) 396 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 397 else if (isDistance()) 398 OS << " Distance is " << *getD() << 399 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 400 else if (isLine()) 401 OS << " Line is " << *getA() << "*X + " << 402 *getB() << "*Y = " << *getC() << "\n"; 403 else 404 llvm_unreachable("unknown constraint type in Constraint::dump"); 405 } 406 #endif 407 408 409 // Updates X with the intersection 410 // of the Constraints X and Y. Returns true if X has changed. 411 // Corresponds to Figure 4 from the paper 412 // 413 // Practical Dependence Testing 414 // Goff, Kennedy, Tseng 415 // PLDI 1991 416 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 417 ++DeltaApplications; 418 LLVM_DEBUG(dbgs() << "\tintersect constraints\n"); 419 LLVM_DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 420 LLVM_DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 421 assert(!Y->isPoint() && "Y must not be a Point"); 422 if (X->isAny()) { 423 if (Y->isAny()) 424 return false; 425 *X = *Y; 426 return true; 427 } 428 if (X->isEmpty()) 429 return false; 430 if (Y->isEmpty()) { 431 X->setEmpty(); 432 return true; 433 } 434 435 if (X->isDistance() && Y->isDistance()) { 436 LLVM_DEBUG(dbgs() << "\t intersect 2 distances\n"); 437 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 438 return false; 439 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 440 X->setEmpty(); 441 ++DeltaSuccesses; 442 return true; 443 } 444 // Hmmm, interesting situation. 445 // I guess if either is constant, keep it and ignore the other. 446 if (isa<SCEVConstant>(Y->getD())) { 447 *X = *Y; 448 return true; 449 } 450 return false; 451 } 452 453 // At this point, the pseudo-code in Figure 4 of the paper 454 // checks if (X->isPoint() && Y->isPoint()). 455 // This case can't occur in our implementation, 456 // since a Point can only arise as the result of intersecting 457 // two Line constraints, and the right-hand value, Y, is never 458 // the result of an intersection. 459 assert(!(X->isPoint() && Y->isPoint()) && 460 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 461 462 if (X->isLine() && Y->isLine()) { 463 LLVM_DEBUG(dbgs() << "\t intersect 2 lines\n"); 464 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 465 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 466 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 467 // slopes are equal, so lines are parallel 468 LLVM_DEBUG(dbgs() << "\t\tsame slope\n"); 469 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 470 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 471 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 472 return false; 473 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 474 X->setEmpty(); 475 ++DeltaSuccesses; 476 return true; 477 } 478 return false; 479 } 480 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 481 // slopes differ, so lines intersect 482 LLVM_DEBUG(dbgs() << "\t\tdifferent slopes\n"); 483 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 484 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 485 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 486 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 487 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 488 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 489 const SCEVConstant *C1A2_C2A1 = 490 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 491 const SCEVConstant *C1B2_C2B1 = 492 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 493 const SCEVConstant *A1B2_A2B1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 495 const SCEVConstant *A2B1_A1B2 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 497 if (!C1B2_C2B1 || !C1A2_C2A1 || 498 !A1B2_A2B1 || !A2B1_A1B2) 499 return false; 500 APInt Xtop = C1B2_C2B1->getAPInt(); 501 APInt Xbot = A1B2_A2B1->getAPInt(); 502 APInt Ytop = C1A2_C2A1->getAPInt(); 503 APInt Ybot = A2B1_A1B2->getAPInt(); 504 LLVM_DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 505 LLVM_DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 506 LLVM_DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 507 LLVM_DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 508 APInt Xq = Xtop; // these need to be initialized, even 509 APInt Xr = Xtop; // though they're just going to be overwritten 510 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 511 APInt Yq = Ytop; 512 APInt Yr = Ytop; 513 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 514 if (Xr != 0 || Yr != 0) { 515 X->setEmpty(); 516 ++DeltaSuccesses; 517 return true; 518 } 519 LLVM_DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 520 if (Xq.slt(0) || Yq.slt(0)) { 521 X->setEmpty(); 522 ++DeltaSuccesses; 523 return true; 524 } 525 if (const SCEVConstant *CUB = 526 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 527 const APInt &UpperBound = CUB->getAPInt(); 528 LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 529 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 530 X->setEmpty(); 531 ++DeltaSuccesses; 532 return true; 533 } 534 } 535 X->setPoint(SE->getConstant(Xq), 536 SE->getConstant(Yq), 537 X->getAssociatedLoop()); 538 ++DeltaSuccesses; 539 return true; 540 } 541 return false; 542 } 543 544 // if (X->isLine() && Y->isPoint()) This case can't occur. 545 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 546 547 if (X->isPoint() && Y->isLine()) { 548 LLVM_DEBUG(dbgs() << "\t intersect Point and Line\n"); 549 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 550 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 551 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 552 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 553 return false; 554 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 555 X->setEmpty(); 556 ++DeltaSuccesses; 557 return true; 558 } 559 return false; 560 } 561 562 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 563 return false; 564 } 565 566 567 //===----------------------------------------------------------------------===// 568 // DependenceInfo methods 569 570 // For debugging purposes. Dumps a dependence to OS. 571 void Dependence::dump(raw_ostream &OS) const { 572 bool Splitable = false; 573 if (isConfused()) 574 OS << "confused"; 575 else { 576 if (isConsistent()) 577 OS << "consistent "; 578 if (isFlow()) 579 OS << "flow"; 580 else if (isOutput()) 581 OS << "output"; 582 else if (isAnti()) 583 OS << "anti"; 584 else if (isInput()) 585 OS << "input"; 586 unsigned Levels = getLevels(); 587 OS << " ["; 588 for (unsigned II = 1; II <= Levels; ++II) { 589 if (isSplitable(II)) 590 Splitable = true; 591 if (isPeelFirst(II)) 592 OS << 'p'; 593 const SCEV *Distance = getDistance(II); 594 if (Distance) 595 OS << *Distance; 596 else if (isScalar(II)) 597 OS << "S"; 598 else { 599 unsigned Direction = getDirection(II); 600 if (Direction == DVEntry::ALL) 601 OS << "*"; 602 else { 603 if (Direction & DVEntry::LT) 604 OS << "<"; 605 if (Direction & DVEntry::EQ) 606 OS << "="; 607 if (Direction & DVEntry::GT) 608 OS << ">"; 609 } 610 } 611 if (isPeelLast(II)) 612 OS << 'p'; 613 if (II < Levels) 614 OS << " "; 615 } 616 if (isLoopIndependent()) 617 OS << "|<"; 618 OS << "]"; 619 if (Splitable) 620 OS << " splitable"; 621 } 622 OS << "!\n"; 623 } 624 625 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their 626 // underlaying objects. If LocA and LocB are known to not alias (for any reason: 627 // tbaa, non-overlapping regions etc), then it is known there is no dependecy. 628 // Otherwise the underlying objects are checked to see if they point to 629 // different identifiable objects. 630 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 631 const DataLayout &DL, 632 const MemoryLocation &LocA, 633 const MemoryLocation &LocB) { 634 // Check the original locations (minus size) for noalias, which can happen for 635 // tbaa, incompatible underlying object locations, etc. 636 MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags); 637 MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags); 638 if (AA->alias(LocAS, LocBS) == NoAlias) 639 return NoAlias; 640 641 // Check the underlying objects are the same 642 const Value *AObj = GetUnderlyingObject(LocA.Ptr, DL); 643 const Value *BObj = GetUnderlyingObject(LocB.Ptr, DL); 644 645 // If the underlying objects are the same, they must alias 646 if (AObj == BObj) 647 return MustAlias; 648 649 // We may have hit the recursion limit for underlying objects, or have 650 // underlying objects where we don't know they will alias. 651 if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) 652 return MayAlias; 653 654 // Otherwise we know the objects are different and both identified objects so 655 // must not alias. 656 return NoAlias; 657 } 658 659 660 // Returns true if the load or store can be analyzed. Atomic and volatile 661 // operations have properties which this analysis does not understand. 662 static 663 bool isLoadOrStore(const Instruction *I) { 664 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 665 return LI->isUnordered(); 666 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 667 return SI->isUnordered(); 668 return false; 669 } 670 671 672 // Examines the loop nesting of the Src and Dst 673 // instructions and establishes their shared loops. Sets the variables 674 // CommonLevels, SrcLevels, and MaxLevels. 675 // The source and destination instructions needn't be contained in the same 676 // loop. The routine establishNestingLevels finds the level of most deeply 677 // nested loop that contains them both, CommonLevels. An instruction that's 678 // not contained in a loop is at level = 0. MaxLevels is equal to the level 679 // of the source plus the level of the destination, minus CommonLevels. 680 // This lets us allocate vectors MaxLevels in length, with room for every 681 // distinct loop referenced in both the source and destination subscripts. 682 // The variable SrcLevels is the nesting depth of the source instruction. 683 // It's used to help calculate distinct loops referenced by the destination. 684 // Here's the map from loops to levels: 685 // 0 - unused 686 // 1 - outermost common loop 687 // ... - other common loops 688 // CommonLevels - innermost common loop 689 // ... - loops containing Src but not Dst 690 // SrcLevels - innermost loop containing Src but not Dst 691 // ... - loops containing Dst but not Src 692 // MaxLevels - innermost loops containing Dst but not Src 693 // Consider the follow code fragment: 694 // for (a = ...) { 695 // for (b = ...) { 696 // for (c = ...) { 697 // for (d = ...) { 698 // A[] = ...; 699 // } 700 // } 701 // for (e = ...) { 702 // for (f = ...) { 703 // for (g = ...) { 704 // ... = A[]; 705 // } 706 // } 707 // } 708 // } 709 // } 710 // If we're looking at the possibility of a dependence between the store 711 // to A (the Src) and the load from A (the Dst), we'll note that they 712 // have 2 loops in common, so CommonLevels will equal 2 and the direction 713 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 714 // A map from loop names to loop numbers would look like 715 // a - 1 716 // b - 2 = CommonLevels 717 // c - 3 718 // d - 4 = SrcLevels 719 // e - 5 720 // f - 6 721 // g - 7 = MaxLevels 722 void DependenceInfo::establishNestingLevels(const Instruction *Src, 723 const Instruction *Dst) { 724 const BasicBlock *SrcBlock = Src->getParent(); 725 const BasicBlock *DstBlock = Dst->getParent(); 726 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 727 unsigned DstLevel = LI->getLoopDepth(DstBlock); 728 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 729 const Loop *DstLoop = LI->getLoopFor(DstBlock); 730 SrcLevels = SrcLevel; 731 MaxLevels = SrcLevel + DstLevel; 732 while (SrcLevel > DstLevel) { 733 SrcLoop = SrcLoop->getParentLoop(); 734 SrcLevel--; 735 } 736 while (DstLevel > SrcLevel) { 737 DstLoop = DstLoop->getParentLoop(); 738 DstLevel--; 739 } 740 while (SrcLoop != DstLoop) { 741 SrcLoop = SrcLoop->getParentLoop(); 742 DstLoop = DstLoop->getParentLoop(); 743 SrcLevel--; 744 } 745 CommonLevels = SrcLevel; 746 MaxLevels -= CommonLevels; 747 } 748 749 750 // Given one of the loops containing the source, return 751 // its level index in our numbering scheme. 752 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 753 return SrcLoop->getLoopDepth(); 754 } 755 756 757 // Given one of the loops containing the destination, 758 // return its level index in our numbering scheme. 759 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 760 unsigned D = DstLoop->getLoopDepth(); 761 if (D > CommonLevels) 762 return D - CommonLevels + SrcLevels; 763 else 764 return D; 765 } 766 767 768 // Returns true if Expression is loop invariant in LoopNest. 769 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 770 const Loop *LoopNest) const { 771 if (!LoopNest) 772 return true; 773 return SE->isLoopInvariant(Expression, LoopNest) && 774 isLoopInvariant(Expression, LoopNest->getParentLoop()); 775 } 776 777 778 779 // Finds the set of loops from the LoopNest that 780 // have a level <= CommonLevels and are referred to by the SCEV Expression. 781 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 782 const Loop *LoopNest, 783 SmallBitVector &Loops) const { 784 while (LoopNest) { 785 unsigned Level = LoopNest->getLoopDepth(); 786 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 787 Loops.set(Level); 788 LoopNest = LoopNest->getParentLoop(); 789 } 790 } 791 792 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 793 794 unsigned widestWidthSeen = 0; 795 Type *widestType; 796 797 // Go through each pair and find the widest bit to which we need 798 // to extend all of them. 799 for (Subscript *Pair : Pairs) { 800 const SCEV *Src = Pair->Src; 801 const SCEV *Dst = Pair->Dst; 802 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 803 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 804 if (SrcTy == nullptr || DstTy == nullptr) { 805 assert(SrcTy == DstTy && "This function only unify integer types and " 806 "expect Src and Dst share the same type " 807 "otherwise."); 808 continue; 809 } 810 if (SrcTy->getBitWidth() > widestWidthSeen) { 811 widestWidthSeen = SrcTy->getBitWidth(); 812 widestType = SrcTy; 813 } 814 if (DstTy->getBitWidth() > widestWidthSeen) { 815 widestWidthSeen = DstTy->getBitWidth(); 816 widestType = DstTy; 817 } 818 } 819 820 821 assert(widestWidthSeen > 0); 822 823 // Now extend each pair to the widest seen. 824 for (Subscript *Pair : Pairs) { 825 const SCEV *Src = Pair->Src; 826 const SCEV *Dst = Pair->Dst; 827 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 828 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 829 if (SrcTy == nullptr || DstTy == nullptr) { 830 assert(SrcTy == DstTy && "This function only unify integer types and " 831 "expect Src and Dst share the same type " 832 "otherwise."); 833 continue; 834 } 835 if (SrcTy->getBitWidth() < widestWidthSeen) 836 // Sign-extend Src to widestType 837 Pair->Src = SE->getSignExtendExpr(Src, widestType); 838 if (DstTy->getBitWidth() < widestWidthSeen) { 839 // Sign-extend Dst to widestType 840 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 841 } 842 } 843 } 844 845 // removeMatchingExtensions - Examines a subscript pair. 846 // If the source and destination are identically sign (or zero) 847 // extended, it strips off the extension in an effect to simplify 848 // the actual analysis. 849 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 850 const SCEV *Src = Pair->Src; 851 const SCEV *Dst = Pair->Dst; 852 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 853 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 854 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 855 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 856 const SCEV *SrcCastOp = SrcCast->getOperand(); 857 const SCEV *DstCastOp = DstCast->getOperand(); 858 if (SrcCastOp->getType() == DstCastOp->getType()) { 859 Pair->Src = SrcCastOp; 860 Pair->Dst = DstCastOp; 861 } 862 } 863 } 864 865 866 // Examine the scev and return true iff it's linear. 867 // Collect any loops mentioned in the set of "Loops". 868 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 869 SmallBitVector &Loops) { 870 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 871 if (!AddRec) 872 return isLoopInvariant(Src, LoopNest); 873 const SCEV *Start = AddRec->getStart(); 874 const SCEV *Step = AddRec->getStepRecurrence(*SE); 875 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 876 if (!isa<SCEVCouldNotCompute>(UB)) { 877 if (SE->getTypeSizeInBits(Start->getType()) < 878 SE->getTypeSizeInBits(UB->getType())) { 879 if (!AddRec->getNoWrapFlags()) 880 return false; 881 } 882 } 883 if (!isLoopInvariant(Step, LoopNest)) 884 return false; 885 Loops.set(mapSrcLoop(AddRec->getLoop())); 886 return checkSrcSubscript(Start, LoopNest, Loops); 887 } 888 889 890 891 // Examine the scev and return true iff it's linear. 892 // Collect any loops mentioned in the set of "Loops". 893 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 894 SmallBitVector &Loops) { 895 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 896 if (!AddRec) 897 return isLoopInvariant(Dst, LoopNest); 898 const SCEV *Start = AddRec->getStart(); 899 const SCEV *Step = AddRec->getStepRecurrence(*SE); 900 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 901 if (!isa<SCEVCouldNotCompute>(UB)) { 902 if (SE->getTypeSizeInBits(Start->getType()) < 903 SE->getTypeSizeInBits(UB->getType())) { 904 if (!AddRec->getNoWrapFlags()) 905 return false; 906 } 907 } 908 if (!isLoopInvariant(Step, LoopNest)) 909 return false; 910 Loops.set(mapDstLoop(AddRec->getLoop())); 911 return checkDstSubscript(Start, LoopNest, Loops); 912 } 913 914 915 // Examines the subscript pair (the Src and Dst SCEVs) 916 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 917 // Collects the associated loops in a set. 918 DependenceInfo::Subscript::ClassificationKind 919 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 920 const SCEV *Dst, const Loop *DstLoopNest, 921 SmallBitVector &Loops) { 922 SmallBitVector SrcLoops(MaxLevels + 1); 923 SmallBitVector DstLoops(MaxLevels + 1); 924 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 925 return Subscript::NonLinear; 926 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 927 return Subscript::NonLinear; 928 Loops = SrcLoops; 929 Loops |= DstLoops; 930 unsigned N = Loops.count(); 931 if (N == 0) 932 return Subscript::ZIV; 933 if (N == 1) 934 return Subscript::SIV; 935 if (N == 2 && (SrcLoops.count() == 0 || 936 DstLoops.count() == 0 || 937 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 938 return Subscript::RDIV; 939 return Subscript::MIV; 940 } 941 942 943 // A wrapper around SCEV::isKnownPredicate. 944 // Looks for cases where we're interested in comparing for equality. 945 // If both X and Y have been identically sign or zero extended, 946 // it strips off the (confusing) extensions before invoking 947 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 948 // will be similarly updated. 949 // 950 // If SCEV::isKnownPredicate can't prove the predicate, 951 // we try simple subtraction, which seems to help in some cases 952 // involving symbolics. 953 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 954 const SCEV *Y) const { 955 if (Pred == CmpInst::ICMP_EQ || 956 Pred == CmpInst::ICMP_NE) { 957 if ((isa<SCEVSignExtendExpr>(X) && 958 isa<SCEVSignExtendExpr>(Y)) || 959 (isa<SCEVZeroExtendExpr>(X) && 960 isa<SCEVZeroExtendExpr>(Y))) { 961 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 962 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 963 const SCEV *Xop = CX->getOperand(); 964 const SCEV *Yop = CY->getOperand(); 965 if (Xop->getType() == Yop->getType()) { 966 X = Xop; 967 Y = Yop; 968 } 969 } 970 } 971 if (SE->isKnownPredicate(Pred, X, Y)) 972 return true; 973 // If SE->isKnownPredicate can't prove the condition, 974 // we try the brute-force approach of subtracting 975 // and testing the difference. 976 // By testing with SE->isKnownPredicate first, we avoid 977 // the possibility of overflow when the arguments are constants. 978 const SCEV *Delta = SE->getMinusSCEV(X, Y); 979 switch (Pred) { 980 case CmpInst::ICMP_EQ: 981 return Delta->isZero(); 982 case CmpInst::ICMP_NE: 983 return SE->isKnownNonZero(Delta); 984 case CmpInst::ICMP_SGE: 985 return SE->isKnownNonNegative(Delta); 986 case CmpInst::ICMP_SLE: 987 return SE->isKnownNonPositive(Delta); 988 case CmpInst::ICMP_SGT: 989 return SE->isKnownPositive(Delta); 990 case CmpInst::ICMP_SLT: 991 return SE->isKnownNegative(Delta); 992 default: 993 llvm_unreachable("unexpected predicate in isKnownPredicate"); 994 } 995 } 996 997 998 // All subscripts are all the same type. 999 // Loop bound may be smaller (e.g., a char). 1000 // Should zero extend loop bound, since it's always >= 0. 1001 // This routine collects upper bound and extends or truncates if needed. 1002 // Truncating is safe when subscripts are known not to wrap. Cases without 1003 // nowrap flags should have been rejected earlier. 1004 // Return null if no bound available. 1005 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 1006 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 1007 const SCEV *UB = SE->getBackedgeTakenCount(L); 1008 return SE->getTruncateOrZeroExtend(UB, T); 1009 } 1010 return nullptr; 1011 } 1012 1013 1014 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1015 // If the cast fails, returns NULL. 1016 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1017 Type *T) const { 1018 if (const SCEV *UB = collectUpperBound(L, T)) 1019 return dyn_cast<SCEVConstant>(UB); 1020 return nullptr; 1021 } 1022 1023 1024 // testZIV - 1025 // When we have a pair of subscripts of the form [c1] and [c2], 1026 // where c1 and c2 are both loop invariant, we attack it using 1027 // the ZIV test. Basically, we test by comparing the two values, 1028 // but there are actually three possible results: 1029 // 1) the values are equal, so there's a dependence 1030 // 2) the values are different, so there's no dependence 1031 // 3) the values might be equal, so we have to assume a dependence. 1032 // 1033 // Return true if dependence disproved. 1034 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1035 FullDependence &Result) const { 1036 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 1037 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1038 ++ZIVapplications; 1039 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1040 LLVM_DEBUG(dbgs() << " provably dependent\n"); 1041 return false; // provably dependent 1042 } 1043 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1044 LLVM_DEBUG(dbgs() << " provably independent\n"); 1045 ++ZIVindependence; 1046 return true; // provably independent 1047 } 1048 LLVM_DEBUG(dbgs() << " possibly dependent\n"); 1049 Result.Consistent = false; 1050 return false; // possibly dependent 1051 } 1052 1053 1054 // strongSIVtest - 1055 // From the paper, Practical Dependence Testing, Section 4.2.1 1056 // 1057 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1058 // where i is an induction variable, c1 and c2 are loop invariant, 1059 // and a is a constant, we can solve it exactly using the Strong SIV test. 1060 // 1061 // Can prove independence. Failing that, can compute distance (and direction). 1062 // In the presence of symbolic terms, we can sometimes make progress. 1063 // 1064 // If there's a dependence, 1065 // 1066 // c1 + a*i = c2 + a*i' 1067 // 1068 // The dependence distance is 1069 // 1070 // d = i' - i = (c1 - c2)/a 1071 // 1072 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1073 // loop's upper bound. If a dependence exists, the dependence direction is 1074 // defined as 1075 // 1076 // { < if d > 0 1077 // direction = { = if d = 0 1078 // { > if d < 0 1079 // 1080 // Return true if dependence disproved. 1081 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1082 const SCEV *DstConst, const Loop *CurLoop, 1083 unsigned Level, FullDependence &Result, 1084 Constraint &NewConstraint) const { 1085 LLVM_DEBUG(dbgs() << "\tStrong SIV test\n"); 1086 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1087 LLVM_DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1088 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1089 LLVM_DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1090 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1091 LLVM_DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1092 ++StrongSIVapplications; 1093 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1094 Level--; 1095 1096 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1097 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta); 1098 LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1099 1100 // check that |Delta| < iteration count 1101 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1102 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1103 LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1104 const SCEV *AbsDelta = 1105 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1106 const SCEV *AbsCoeff = 1107 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1108 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1109 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1110 // Distance greater than trip count - no dependence 1111 ++StrongSIVindependence; 1112 ++StrongSIVsuccesses; 1113 return true; 1114 } 1115 } 1116 1117 // Can we compute distance? 1118 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1119 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1120 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1121 APInt Distance = ConstDelta; // these need to be initialized 1122 APInt Remainder = ConstDelta; 1123 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1124 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1125 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1126 // Make sure Coeff divides Delta exactly 1127 if (Remainder != 0) { 1128 // Coeff doesn't divide Distance, no dependence 1129 ++StrongSIVindependence; 1130 ++StrongSIVsuccesses; 1131 return true; 1132 } 1133 Result.DV[Level].Distance = SE->getConstant(Distance); 1134 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1135 if (Distance.sgt(0)) 1136 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1137 else if (Distance.slt(0)) 1138 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1139 else 1140 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1141 ++StrongSIVsuccesses; 1142 } 1143 else if (Delta->isZero()) { 1144 // since 0/X == 0 1145 Result.DV[Level].Distance = Delta; 1146 NewConstraint.setDistance(Delta, CurLoop); 1147 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1148 ++StrongSIVsuccesses; 1149 } 1150 else { 1151 if (Coeff->isOne()) { 1152 LLVM_DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1153 Result.DV[Level].Distance = Delta; // since X/1 == X 1154 NewConstraint.setDistance(Delta, CurLoop); 1155 } 1156 else { 1157 Result.Consistent = false; 1158 NewConstraint.setLine(Coeff, 1159 SE->getNegativeSCEV(Coeff), 1160 SE->getNegativeSCEV(Delta), CurLoop); 1161 } 1162 1163 // maybe we can get a useful direction 1164 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1165 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1166 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1167 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1168 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1169 // The double negatives above are confusing. 1170 // It helps to read !SE->isKnownNonZero(Delta) 1171 // as "Delta might be Zero" 1172 unsigned NewDirection = Dependence::DVEntry::NONE; 1173 if ((DeltaMaybePositive && CoeffMaybePositive) || 1174 (DeltaMaybeNegative && CoeffMaybeNegative)) 1175 NewDirection = Dependence::DVEntry::LT; 1176 if (DeltaMaybeZero) 1177 NewDirection |= Dependence::DVEntry::EQ; 1178 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1179 (DeltaMaybePositive && CoeffMaybeNegative)) 1180 NewDirection |= Dependence::DVEntry::GT; 1181 if (NewDirection < Result.DV[Level].Direction) 1182 ++StrongSIVsuccesses; 1183 Result.DV[Level].Direction &= NewDirection; 1184 } 1185 return false; 1186 } 1187 1188 1189 // weakCrossingSIVtest - 1190 // From the paper, Practical Dependence Testing, Section 4.2.2 1191 // 1192 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1193 // where i is an induction variable, c1 and c2 are loop invariant, 1194 // and a is a constant, we can solve it exactly using the 1195 // Weak-Crossing SIV test. 1196 // 1197 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1198 // the two lines, where i = i', yielding 1199 // 1200 // c1 + a*i = c2 - a*i 1201 // 2a*i = c2 - c1 1202 // i = (c2 - c1)/2a 1203 // 1204 // If i < 0, there is no dependence. 1205 // If i > upperbound, there is no dependence. 1206 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1207 // If i = upperbound, there's a dependence with distance = 0. 1208 // If i is integral, there's a dependence (all directions). 1209 // If the non-integer part = 1/2, there's a dependence (<> directions). 1210 // Otherwise, there's no dependence. 1211 // 1212 // Can prove independence. Failing that, 1213 // can sometimes refine the directions. 1214 // Can determine iteration for splitting. 1215 // 1216 // Return true if dependence disproved. 1217 bool DependenceInfo::weakCrossingSIVtest( 1218 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1219 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1220 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1221 LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1222 LLVM_DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1223 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1224 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1225 ++WeakCrossingSIVapplications; 1226 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1227 Level--; 1228 Result.Consistent = false; 1229 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1230 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1231 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1232 if (Delta->isZero()) { 1233 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1234 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1235 ++WeakCrossingSIVsuccesses; 1236 if (!Result.DV[Level].Direction) { 1237 ++WeakCrossingSIVindependence; 1238 return true; 1239 } 1240 Result.DV[Level].Distance = Delta; // = 0 1241 return false; 1242 } 1243 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1244 if (!ConstCoeff) 1245 return false; 1246 1247 Result.DV[Level].Splitable = true; 1248 if (SE->isKnownNegative(ConstCoeff)) { 1249 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1250 assert(ConstCoeff && 1251 "dynamic cast of negative of ConstCoeff should yield constant"); 1252 Delta = SE->getNegativeSCEV(Delta); 1253 } 1254 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1255 1256 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1257 SplitIter = SE->getUDivExpr( 1258 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1259 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1260 LLVM_DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1261 1262 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1263 if (!ConstDelta) 1264 return false; 1265 1266 // We're certain that ConstCoeff > 0; therefore, 1267 // if Delta < 0, then no dependence. 1268 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1269 LLVM_DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1270 if (SE->isKnownNegative(Delta)) { 1271 // No dependence, Delta < 0 1272 ++WeakCrossingSIVindependence; 1273 ++WeakCrossingSIVsuccesses; 1274 return true; 1275 } 1276 1277 // We're certain that Delta > 0 and ConstCoeff > 0. 1278 // Check Delta/(2*ConstCoeff) against upper loop bound 1279 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1280 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1281 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1282 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1283 ConstantTwo); 1284 LLVM_DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1285 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1286 // Delta too big, no dependence 1287 ++WeakCrossingSIVindependence; 1288 ++WeakCrossingSIVsuccesses; 1289 return true; 1290 } 1291 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1292 // i = i' = UB 1293 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1294 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1295 ++WeakCrossingSIVsuccesses; 1296 if (!Result.DV[Level].Direction) { 1297 ++WeakCrossingSIVindependence; 1298 return true; 1299 } 1300 Result.DV[Level].Splitable = false; 1301 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1302 return false; 1303 } 1304 } 1305 1306 // check that Coeff divides Delta 1307 APInt APDelta = ConstDelta->getAPInt(); 1308 APInt APCoeff = ConstCoeff->getAPInt(); 1309 APInt Distance = APDelta; // these need to be initialzed 1310 APInt Remainder = APDelta; 1311 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1312 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1313 if (Remainder != 0) { 1314 // Coeff doesn't divide Delta, no dependence 1315 ++WeakCrossingSIVindependence; 1316 ++WeakCrossingSIVsuccesses; 1317 return true; 1318 } 1319 LLVM_DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1320 1321 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1322 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1323 Remainder = Distance.srem(Two); 1324 LLVM_DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1325 if (Remainder != 0) { 1326 // Equal direction isn't possible 1327 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1328 ++WeakCrossingSIVsuccesses; 1329 } 1330 return false; 1331 } 1332 1333 1334 // Kirch's algorithm, from 1335 // 1336 // Optimizing Supercompilers for Supercomputers 1337 // Michael Wolfe 1338 // MIT Press, 1989 1339 // 1340 // Program 2.1, page 29. 1341 // Computes the GCD of AM and BM. 1342 // Also finds a solution to the equation ax - by = gcd(a, b). 1343 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1344 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1345 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1346 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1347 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1348 APInt G0 = AM.abs(); 1349 APInt G1 = BM.abs(); 1350 APInt Q = G0; // these need to be initialized 1351 APInt R = G0; 1352 APInt::sdivrem(G0, G1, Q, R); 1353 while (R != 0) { 1354 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1355 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1356 G0 = G1; G1 = R; 1357 APInt::sdivrem(G0, G1, Q, R); 1358 } 1359 G = G1; 1360 LLVM_DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1361 X = AM.slt(0) ? -A1 : A1; 1362 Y = BM.slt(0) ? B1 : -B1; 1363 1364 // make sure gcd divides Delta 1365 R = Delta.srem(G); 1366 if (R != 0) 1367 return true; // gcd doesn't divide Delta, no dependence 1368 Q = Delta.sdiv(G); 1369 X *= Q; 1370 Y *= Q; 1371 return false; 1372 } 1373 1374 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1375 APInt Q = A; // these need to be initialized 1376 APInt R = A; 1377 APInt::sdivrem(A, B, Q, R); 1378 if (R == 0) 1379 return Q; 1380 if ((A.sgt(0) && B.sgt(0)) || 1381 (A.slt(0) && B.slt(0))) 1382 return Q; 1383 else 1384 return Q - 1; 1385 } 1386 1387 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1388 APInt Q = A; // these need to be initialized 1389 APInt R = A; 1390 APInt::sdivrem(A, B, Q, R); 1391 if (R == 0) 1392 return Q; 1393 if ((A.sgt(0) && B.sgt(0)) || 1394 (A.slt(0) && B.slt(0))) 1395 return Q + 1; 1396 else 1397 return Q; 1398 } 1399 1400 1401 static 1402 APInt maxAPInt(APInt A, APInt B) { 1403 return A.sgt(B) ? A : B; 1404 } 1405 1406 1407 static 1408 APInt minAPInt(APInt A, APInt B) { 1409 return A.slt(B) ? A : B; 1410 } 1411 1412 1413 // exactSIVtest - 1414 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1415 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1416 // and a2 are constant, we can solve it exactly using an algorithm developed 1417 // by Banerjee and Wolfe. See Section 2.5.3 in 1418 // 1419 // Optimizing Supercompilers for Supercomputers 1420 // Michael Wolfe 1421 // MIT Press, 1989 1422 // 1423 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1424 // so use them if possible. They're also a bit better with symbolics and, 1425 // in the case of the strong SIV test, can compute Distances. 1426 // 1427 // Return true if dependence disproved. 1428 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1429 const SCEV *SrcConst, const SCEV *DstConst, 1430 const Loop *CurLoop, unsigned Level, 1431 FullDependence &Result, 1432 Constraint &NewConstraint) const { 1433 LLVM_DEBUG(dbgs() << "\tExact SIV test\n"); 1434 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1435 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1436 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1437 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1438 ++ExactSIVapplications; 1439 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1440 Level--; 1441 Result.Consistent = false; 1442 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1443 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1444 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1445 Delta, CurLoop); 1446 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1447 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1448 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1449 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1450 return false; 1451 1452 // find gcd 1453 APInt G, X, Y; 1454 APInt AM = ConstSrcCoeff->getAPInt(); 1455 APInt BM = ConstDstCoeff->getAPInt(); 1456 unsigned Bits = AM.getBitWidth(); 1457 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1458 // gcd doesn't divide Delta, no dependence 1459 ++ExactSIVindependence; 1460 ++ExactSIVsuccesses; 1461 return true; 1462 } 1463 1464 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1465 1466 // since SCEV construction normalizes, LM = 0 1467 APInt UM(Bits, 1, true); 1468 bool UMvalid = false; 1469 // UM is perhaps unavailable, let's check 1470 if (const SCEVConstant *CUB = 1471 collectConstantUpperBound(CurLoop, Delta->getType())) { 1472 UM = CUB->getAPInt(); 1473 LLVM_DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1474 UMvalid = true; 1475 } 1476 1477 APInt TU(APInt::getSignedMaxValue(Bits)); 1478 APInt TL(APInt::getSignedMinValue(Bits)); 1479 1480 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1481 APInt TMUL = BM.sdiv(G); 1482 if (TMUL.sgt(0)) { 1483 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1484 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1485 if (UMvalid) { 1486 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1487 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1488 } 1489 } 1490 else { 1491 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1492 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1493 if (UMvalid) { 1494 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1495 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1496 } 1497 } 1498 1499 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1500 TMUL = AM.sdiv(G); 1501 if (TMUL.sgt(0)) { 1502 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1503 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1504 if (UMvalid) { 1505 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1506 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1507 } 1508 } 1509 else { 1510 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1511 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1512 if (UMvalid) { 1513 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1514 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1515 } 1516 } 1517 if (TL.sgt(TU)) { 1518 ++ExactSIVindependence; 1519 ++ExactSIVsuccesses; 1520 return true; 1521 } 1522 1523 // explore directions 1524 unsigned NewDirection = Dependence::DVEntry::NONE; 1525 1526 // less than 1527 APInt SaveTU(TU); // save these 1528 APInt SaveTL(TL); 1529 LLVM_DEBUG(dbgs() << "\t exploring LT direction\n"); 1530 TMUL = AM - BM; 1531 if (TMUL.sgt(0)) { 1532 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1533 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1534 } 1535 else { 1536 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1537 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1538 } 1539 if (TL.sle(TU)) { 1540 NewDirection |= Dependence::DVEntry::LT; 1541 ++ExactSIVsuccesses; 1542 } 1543 1544 // equal 1545 TU = SaveTU; // restore 1546 TL = SaveTL; 1547 LLVM_DEBUG(dbgs() << "\t exploring EQ direction\n"); 1548 if (TMUL.sgt(0)) { 1549 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1550 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1551 } 1552 else { 1553 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1554 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1555 } 1556 TMUL = BM - AM; 1557 if (TMUL.sgt(0)) { 1558 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1559 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1560 } 1561 else { 1562 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1563 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1564 } 1565 if (TL.sle(TU)) { 1566 NewDirection |= Dependence::DVEntry::EQ; 1567 ++ExactSIVsuccesses; 1568 } 1569 1570 // greater than 1571 TU = SaveTU; // restore 1572 TL = SaveTL; 1573 LLVM_DEBUG(dbgs() << "\t exploring GT direction\n"); 1574 if (TMUL.sgt(0)) { 1575 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1576 LLVM_DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1577 } 1578 else { 1579 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1580 LLVM_DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1581 } 1582 if (TL.sle(TU)) { 1583 NewDirection |= Dependence::DVEntry::GT; 1584 ++ExactSIVsuccesses; 1585 } 1586 1587 // finished 1588 Result.DV[Level].Direction &= NewDirection; 1589 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1590 ++ExactSIVindependence; 1591 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1592 } 1593 1594 1595 1596 // Return true if the divisor evenly divides the dividend. 1597 static 1598 bool isRemainderZero(const SCEVConstant *Dividend, 1599 const SCEVConstant *Divisor) { 1600 const APInt &ConstDividend = Dividend->getAPInt(); 1601 const APInt &ConstDivisor = Divisor->getAPInt(); 1602 return ConstDividend.srem(ConstDivisor) == 0; 1603 } 1604 1605 1606 // weakZeroSrcSIVtest - 1607 // From the paper, Practical Dependence Testing, Section 4.2.2 1608 // 1609 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1610 // where i is an induction variable, c1 and c2 are loop invariant, 1611 // and a is a constant, we can solve it exactly using the 1612 // Weak-Zero SIV test. 1613 // 1614 // Given 1615 // 1616 // c1 = c2 + a*i 1617 // 1618 // we get 1619 // 1620 // (c1 - c2)/a = i 1621 // 1622 // If i is not an integer, there's no dependence. 1623 // If i < 0 or > UB, there's no dependence. 1624 // If i = 0, the direction is >= and peeling the 1625 // 1st iteration will break the dependence. 1626 // If i = UB, the direction is <= and peeling the 1627 // last iteration will break the dependence. 1628 // Otherwise, the direction is *. 1629 // 1630 // Can prove independence. Failing that, we can sometimes refine 1631 // the directions. Can sometimes show that first or last 1632 // iteration carries all the dependences (so worth peeling). 1633 // 1634 // (see also weakZeroDstSIVtest) 1635 // 1636 // Return true if dependence disproved. 1637 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1638 const SCEV *SrcConst, 1639 const SCEV *DstConst, 1640 const Loop *CurLoop, unsigned Level, 1641 FullDependence &Result, 1642 Constraint &NewConstraint) const { 1643 // For the WeakSIV test, it's possible the loop isn't common to 1644 // the Src and Dst loops. If it isn't, then there's no need to 1645 // record a direction. 1646 LLVM_DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1647 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1648 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1649 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1650 ++WeakZeroSIVapplications; 1651 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1652 Level--; 1653 Result.Consistent = false; 1654 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1655 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1656 CurLoop); 1657 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1658 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1659 if (Level < CommonLevels) { 1660 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1661 Result.DV[Level].PeelFirst = true; 1662 ++WeakZeroSIVsuccesses; 1663 } 1664 return false; // dependences caused by first iteration 1665 } 1666 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1667 if (!ConstCoeff) 1668 return false; 1669 const SCEV *AbsCoeff = 1670 SE->isKnownNegative(ConstCoeff) ? 1671 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1672 const SCEV *NewDelta = 1673 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1674 1675 // check that Delta/SrcCoeff < iteration count 1676 // really check NewDelta < count*AbsCoeff 1677 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1678 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1679 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1680 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1681 ++WeakZeroSIVindependence; 1682 ++WeakZeroSIVsuccesses; 1683 return true; 1684 } 1685 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1686 // dependences caused by last iteration 1687 if (Level < CommonLevels) { 1688 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1689 Result.DV[Level].PeelLast = true; 1690 ++WeakZeroSIVsuccesses; 1691 } 1692 return false; 1693 } 1694 } 1695 1696 // check that Delta/SrcCoeff >= 0 1697 // really check that NewDelta >= 0 1698 if (SE->isKnownNegative(NewDelta)) { 1699 // No dependence, newDelta < 0 1700 ++WeakZeroSIVindependence; 1701 ++WeakZeroSIVsuccesses; 1702 return true; 1703 } 1704 1705 // if SrcCoeff doesn't divide Delta, then no dependence 1706 if (isa<SCEVConstant>(Delta) && 1707 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1708 ++WeakZeroSIVindependence; 1709 ++WeakZeroSIVsuccesses; 1710 return true; 1711 } 1712 return false; 1713 } 1714 1715 1716 // weakZeroDstSIVtest - 1717 // From the paper, Practical Dependence Testing, Section 4.2.2 1718 // 1719 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1720 // where i is an induction variable, c1 and c2 are loop invariant, 1721 // and a is a constant, we can solve it exactly using the 1722 // Weak-Zero SIV test. 1723 // 1724 // Given 1725 // 1726 // c1 + a*i = c2 1727 // 1728 // we get 1729 // 1730 // i = (c2 - c1)/a 1731 // 1732 // If i is not an integer, there's no dependence. 1733 // If i < 0 or > UB, there's no dependence. 1734 // If i = 0, the direction is <= and peeling the 1735 // 1st iteration will break the dependence. 1736 // If i = UB, the direction is >= and peeling the 1737 // last iteration will break the dependence. 1738 // Otherwise, the direction is *. 1739 // 1740 // Can prove independence. Failing that, we can sometimes refine 1741 // the directions. Can sometimes show that first or last 1742 // iteration carries all the dependences (so worth peeling). 1743 // 1744 // (see also weakZeroSrcSIVtest) 1745 // 1746 // Return true if dependence disproved. 1747 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1748 const SCEV *SrcConst, 1749 const SCEV *DstConst, 1750 const Loop *CurLoop, unsigned Level, 1751 FullDependence &Result, 1752 Constraint &NewConstraint) const { 1753 // For the WeakSIV test, it's possible the loop isn't common to the 1754 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1755 LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1756 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1757 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1758 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1759 ++WeakZeroSIVapplications; 1760 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1761 Level--; 1762 Result.Consistent = false; 1763 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1764 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1765 CurLoop); 1766 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1767 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1768 if (Level < CommonLevels) { 1769 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1770 Result.DV[Level].PeelFirst = true; 1771 ++WeakZeroSIVsuccesses; 1772 } 1773 return false; // dependences caused by first iteration 1774 } 1775 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1776 if (!ConstCoeff) 1777 return false; 1778 const SCEV *AbsCoeff = 1779 SE->isKnownNegative(ConstCoeff) ? 1780 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1781 const SCEV *NewDelta = 1782 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1783 1784 // check that Delta/SrcCoeff < iteration count 1785 // really check NewDelta < count*AbsCoeff 1786 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1787 LLVM_DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1788 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1789 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1790 ++WeakZeroSIVindependence; 1791 ++WeakZeroSIVsuccesses; 1792 return true; 1793 } 1794 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1795 // dependences caused by last iteration 1796 if (Level < CommonLevels) { 1797 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1798 Result.DV[Level].PeelLast = true; 1799 ++WeakZeroSIVsuccesses; 1800 } 1801 return false; 1802 } 1803 } 1804 1805 // check that Delta/SrcCoeff >= 0 1806 // really check that NewDelta >= 0 1807 if (SE->isKnownNegative(NewDelta)) { 1808 // No dependence, newDelta < 0 1809 ++WeakZeroSIVindependence; 1810 ++WeakZeroSIVsuccesses; 1811 return true; 1812 } 1813 1814 // if SrcCoeff doesn't divide Delta, then no dependence 1815 if (isa<SCEVConstant>(Delta) && 1816 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1817 ++WeakZeroSIVindependence; 1818 ++WeakZeroSIVsuccesses; 1819 return true; 1820 } 1821 return false; 1822 } 1823 1824 1825 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1826 // Things of the form [c1 + a*i] and [c2 + b*j], 1827 // where i and j are induction variable, c1 and c2 are loop invariant, 1828 // and a and b are constants. 1829 // Returns true if any possible dependence is disproved. 1830 // Marks the result as inconsistent. 1831 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1832 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1833 const SCEV *SrcConst, const SCEV *DstConst, 1834 const Loop *SrcLoop, const Loop *DstLoop, 1835 FullDependence &Result) const { 1836 LLVM_DEBUG(dbgs() << "\tExact RDIV test\n"); 1837 LLVM_DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1838 LLVM_DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1839 LLVM_DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1840 LLVM_DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1841 ++ExactRDIVapplications; 1842 Result.Consistent = false; 1843 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1844 LLVM_DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1845 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1846 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1847 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1848 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1849 return false; 1850 1851 // find gcd 1852 APInt G, X, Y; 1853 APInt AM = ConstSrcCoeff->getAPInt(); 1854 APInt BM = ConstDstCoeff->getAPInt(); 1855 unsigned Bits = AM.getBitWidth(); 1856 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1857 // gcd doesn't divide Delta, no dependence 1858 ++ExactRDIVindependence; 1859 return true; 1860 } 1861 1862 LLVM_DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1863 1864 // since SCEV construction seems to normalize, LM = 0 1865 APInt SrcUM(Bits, 1, true); 1866 bool SrcUMvalid = false; 1867 // SrcUM is perhaps unavailable, let's check 1868 if (const SCEVConstant *UpperBound = 1869 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1870 SrcUM = UpperBound->getAPInt(); 1871 LLVM_DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1872 SrcUMvalid = true; 1873 } 1874 1875 APInt DstUM(Bits, 1, true); 1876 bool DstUMvalid = false; 1877 // UM is perhaps unavailable, let's check 1878 if (const SCEVConstant *UpperBound = 1879 collectConstantUpperBound(DstLoop, Delta->getType())) { 1880 DstUM = UpperBound->getAPInt(); 1881 LLVM_DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1882 DstUMvalid = true; 1883 } 1884 1885 APInt TU(APInt::getSignedMaxValue(Bits)); 1886 APInt TL(APInt::getSignedMinValue(Bits)); 1887 1888 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1889 APInt TMUL = BM.sdiv(G); 1890 if (TMUL.sgt(0)) { 1891 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1892 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1893 if (SrcUMvalid) { 1894 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1895 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1896 } 1897 } 1898 else { 1899 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1900 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1901 if (SrcUMvalid) { 1902 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1903 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1904 } 1905 } 1906 1907 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1908 TMUL = AM.sdiv(G); 1909 if (TMUL.sgt(0)) { 1910 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1911 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1912 if (DstUMvalid) { 1913 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1914 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1915 } 1916 } 1917 else { 1918 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1919 LLVM_DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1920 if (DstUMvalid) { 1921 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1922 LLVM_DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1923 } 1924 } 1925 if (TL.sgt(TU)) 1926 ++ExactRDIVindependence; 1927 return TL.sgt(TU); 1928 } 1929 1930 1931 // symbolicRDIVtest - 1932 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1933 // introduce a special case of Banerjee's Inequalities (also called the 1934 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1935 // particularly cases with symbolics. Since it's only able to disprove 1936 // dependence (not compute distances or directions), we'll use it as a 1937 // fall back for the other tests. 1938 // 1939 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1940 // where i and j are induction variables and c1 and c2 are loop invariants, 1941 // we can use the symbolic tests to disprove some dependences, serving as a 1942 // backup for the RDIV test. Note that i and j can be the same variable, 1943 // letting this test serve as a backup for the various SIV tests. 1944 // 1945 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1946 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1947 // loop bounds for the i and j loops, respectively. So, ... 1948 // 1949 // c1 + a1*i = c2 + a2*j 1950 // a1*i - a2*j = c2 - c1 1951 // 1952 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1953 // range of the maximum and minimum possible values of a1*i - a2*j. 1954 // Considering the signs of a1 and a2, we have 4 possible cases: 1955 // 1956 // 1) If a1 >= 0 and a2 >= 0, then 1957 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1958 // -a2*N2 <= c2 - c1 <= a1*N1 1959 // 1960 // 2) If a1 >= 0 and a2 <= 0, then 1961 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1962 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1963 // 1964 // 3) If a1 <= 0 and a2 >= 0, then 1965 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1966 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1967 // 1968 // 4) If a1 <= 0 and a2 <= 0, then 1969 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1970 // a1*N1 <= c2 - c1 <= -a2*N2 1971 // 1972 // return true if dependence disproved 1973 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 1974 const SCEV *C1, const SCEV *C2, 1975 const Loop *Loop1, 1976 const Loop *Loop2) const { 1977 ++SymbolicRDIVapplications; 1978 LLVM_DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1979 LLVM_DEBUG(dbgs() << "\t A1 = " << *A1); 1980 LLVM_DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1981 LLVM_DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1982 LLVM_DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1983 LLVM_DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1984 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1985 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1986 LLVM_DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1987 LLVM_DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1988 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1989 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1990 LLVM_DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1991 LLVM_DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1992 if (SE->isKnownNonNegative(A1)) { 1993 if (SE->isKnownNonNegative(A2)) { 1994 // A1 >= 0 && A2 >= 0 1995 if (N1) { 1996 // make sure that c2 - c1 <= a1*N1 1997 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1998 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1999 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 2000 ++SymbolicRDIVindependence; 2001 return true; 2002 } 2003 } 2004 if (N2) { 2005 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 2006 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2007 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2008 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 2009 ++SymbolicRDIVindependence; 2010 return true; 2011 } 2012 } 2013 } 2014 else if (SE->isKnownNonPositive(A2)) { 2015 // a1 >= 0 && a2 <= 0 2016 if (N1 && N2) { 2017 // make sure that c2 - c1 <= a1*N1 - a2*N2 2018 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2019 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2020 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2021 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2022 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2023 ++SymbolicRDIVindependence; 2024 return true; 2025 } 2026 } 2027 // make sure that 0 <= c2 - c1 2028 if (SE->isKnownNegative(C2_C1)) { 2029 ++SymbolicRDIVindependence; 2030 return true; 2031 } 2032 } 2033 } 2034 else if (SE->isKnownNonPositive(A1)) { 2035 if (SE->isKnownNonNegative(A2)) { 2036 // a1 <= 0 && a2 >= 0 2037 if (N1 && N2) { 2038 // make sure that a1*N1 - a2*N2 <= c2 - c1 2039 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2040 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2041 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2042 LLVM_DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2043 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2044 ++SymbolicRDIVindependence; 2045 return true; 2046 } 2047 } 2048 // make sure that c2 - c1 <= 0 2049 if (SE->isKnownPositive(C2_C1)) { 2050 ++SymbolicRDIVindependence; 2051 return true; 2052 } 2053 } 2054 else if (SE->isKnownNonPositive(A2)) { 2055 // a1 <= 0 && a2 <= 0 2056 if (N1) { 2057 // make sure that a1*N1 <= c2 - c1 2058 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2059 LLVM_DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2060 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2061 ++SymbolicRDIVindependence; 2062 return true; 2063 } 2064 } 2065 if (N2) { 2066 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2067 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2068 LLVM_DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2069 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2070 ++SymbolicRDIVindependence; 2071 return true; 2072 } 2073 } 2074 } 2075 } 2076 return false; 2077 } 2078 2079 2080 // testSIV - 2081 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2082 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2083 // a2 are constant, we attack it with an SIV test. While they can all be 2084 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2085 // they apply; they're cheaper and sometimes more precise. 2086 // 2087 // Return true if dependence disproved. 2088 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2089 FullDependence &Result, Constraint &NewConstraint, 2090 const SCEV *&SplitIter) const { 2091 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2092 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2093 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2094 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2095 if (SrcAddRec && DstAddRec) { 2096 const SCEV *SrcConst = SrcAddRec->getStart(); 2097 const SCEV *DstConst = DstAddRec->getStart(); 2098 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2099 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2100 const Loop *CurLoop = SrcAddRec->getLoop(); 2101 assert(CurLoop == DstAddRec->getLoop() && 2102 "both loops in SIV should be same"); 2103 Level = mapSrcLoop(CurLoop); 2104 bool disproven; 2105 if (SrcCoeff == DstCoeff) 2106 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2107 Level, Result, NewConstraint); 2108 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2109 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2110 Level, Result, NewConstraint, SplitIter); 2111 else 2112 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2113 Level, Result, NewConstraint); 2114 return disproven || 2115 gcdMIVtest(Src, Dst, Result) || 2116 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2117 } 2118 if (SrcAddRec) { 2119 const SCEV *SrcConst = SrcAddRec->getStart(); 2120 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2121 const SCEV *DstConst = Dst; 2122 const Loop *CurLoop = SrcAddRec->getLoop(); 2123 Level = mapSrcLoop(CurLoop); 2124 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2125 Level, Result, NewConstraint) || 2126 gcdMIVtest(Src, Dst, Result); 2127 } 2128 if (DstAddRec) { 2129 const SCEV *DstConst = DstAddRec->getStart(); 2130 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2131 const SCEV *SrcConst = Src; 2132 const Loop *CurLoop = DstAddRec->getLoop(); 2133 Level = mapDstLoop(CurLoop); 2134 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2135 CurLoop, Level, Result, NewConstraint) || 2136 gcdMIVtest(Src, Dst, Result); 2137 } 2138 llvm_unreachable("SIV test expected at least one AddRec"); 2139 return false; 2140 } 2141 2142 2143 // testRDIV - 2144 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2145 // where i and j are induction variables, c1 and c2 are loop invariant, 2146 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2147 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2148 // It doesn't make sense to talk about distance or direction in this case, 2149 // so there's no point in making special versions of the Strong SIV test or 2150 // the Weak-crossing SIV test. 2151 // 2152 // With minor algebra, this test can also be used for things like 2153 // [c1 + a1*i + a2*j][c2]. 2154 // 2155 // Return true if dependence disproved. 2156 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2157 FullDependence &Result) const { 2158 // we have 3 possible situations here: 2159 // 1) [a*i + b] and [c*j + d] 2160 // 2) [a*i + c*j + b] and [d] 2161 // 3) [b] and [a*i + c*j + d] 2162 // We need to find what we've got and get organized 2163 2164 const SCEV *SrcConst, *DstConst; 2165 const SCEV *SrcCoeff, *DstCoeff; 2166 const Loop *SrcLoop, *DstLoop; 2167 2168 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2169 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2170 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2171 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2172 if (SrcAddRec && DstAddRec) { 2173 SrcConst = SrcAddRec->getStart(); 2174 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2175 SrcLoop = SrcAddRec->getLoop(); 2176 DstConst = DstAddRec->getStart(); 2177 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2178 DstLoop = DstAddRec->getLoop(); 2179 } 2180 else if (SrcAddRec) { 2181 if (const SCEVAddRecExpr *tmpAddRec = 2182 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2183 SrcConst = tmpAddRec->getStart(); 2184 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2185 SrcLoop = tmpAddRec->getLoop(); 2186 DstConst = Dst; 2187 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2188 DstLoop = SrcAddRec->getLoop(); 2189 } 2190 else 2191 llvm_unreachable("RDIV reached by surprising SCEVs"); 2192 } 2193 else if (DstAddRec) { 2194 if (const SCEVAddRecExpr *tmpAddRec = 2195 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2196 DstConst = tmpAddRec->getStart(); 2197 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2198 DstLoop = tmpAddRec->getLoop(); 2199 SrcConst = Src; 2200 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2201 SrcLoop = DstAddRec->getLoop(); 2202 } 2203 else 2204 llvm_unreachable("RDIV reached by surprising SCEVs"); 2205 } 2206 else 2207 llvm_unreachable("RDIV expected at least one AddRec"); 2208 return exactRDIVtest(SrcCoeff, DstCoeff, 2209 SrcConst, DstConst, 2210 SrcLoop, DstLoop, 2211 Result) || 2212 gcdMIVtest(Src, Dst, Result) || 2213 symbolicRDIVtest(SrcCoeff, DstCoeff, 2214 SrcConst, DstConst, 2215 SrcLoop, DstLoop); 2216 } 2217 2218 2219 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2220 // Return true if dependence disproved. 2221 // Can sometimes refine direction vectors. 2222 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2223 const SmallBitVector &Loops, 2224 FullDependence &Result) const { 2225 LLVM_DEBUG(dbgs() << " src = " << *Src << "\n"); 2226 LLVM_DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2227 Result.Consistent = false; 2228 return gcdMIVtest(Src, Dst, Result) || 2229 banerjeeMIVtest(Src, Dst, Loops, Result); 2230 } 2231 2232 2233 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2234 // in this case 10. If there is no constant part, returns NULL. 2235 static 2236 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2237 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2238 return Constant; 2239 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2240 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2241 return Constant; 2242 return nullptr; 2243 } 2244 2245 2246 //===----------------------------------------------------------------------===// 2247 // gcdMIVtest - 2248 // Tests an MIV subscript pair for dependence. 2249 // Returns true if any possible dependence is disproved. 2250 // Marks the result as inconsistent. 2251 // Can sometimes disprove the equal direction for 1 or more loops, 2252 // as discussed in Michael Wolfe's book, 2253 // High Performance Compilers for Parallel Computing, page 235. 2254 // 2255 // We spend some effort (code!) to handle cases like 2256 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2257 // but M and N are just loop-invariant variables. 2258 // This should help us handle linearized subscripts; 2259 // also makes this test a useful backup to the various SIV tests. 2260 // 2261 // It occurs to me that the presence of loop-invariant variables 2262 // changes the nature of the test from "greatest common divisor" 2263 // to "a common divisor". 2264 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2265 FullDependence &Result) const { 2266 LLVM_DEBUG(dbgs() << "starting gcd\n"); 2267 ++GCDapplications; 2268 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2269 APInt RunningGCD = APInt::getNullValue(BitWidth); 2270 2271 // Examine Src coefficients. 2272 // Compute running GCD and record source constant. 2273 // Because we're looking for the constant at the end of the chain, 2274 // we can't quit the loop just because the GCD == 1. 2275 const SCEV *Coefficients = Src; 2276 while (const SCEVAddRecExpr *AddRec = 2277 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2278 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2279 // If the coefficient is the product of a constant and other stuff, 2280 // we can use the constant in the GCD computation. 2281 const auto *Constant = getConstantPart(Coeff); 2282 if (!Constant) 2283 return false; 2284 APInt ConstCoeff = Constant->getAPInt(); 2285 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2286 Coefficients = AddRec->getStart(); 2287 } 2288 const SCEV *SrcConst = Coefficients; 2289 2290 // Examine Dst coefficients. 2291 // Compute running GCD and record destination constant. 2292 // Because we're looking for the constant at the end of the chain, 2293 // we can't quit the loop just because the GCD == 1. 2294 Coefficients = Dst; 2295 while (const SCEVAddRecExpr *AddRec = 2296 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2297 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2298 // If the coefficient is the product of a constant and other stuff, 2299 // we can use the constant in the GCD computation. 2300 const auto *Constant = getConstantPart(Coeff); 2301 if (!Constant) 2302 return false; 2303 APInt ConstCoeff = Constant->getAPInt(); 2304 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2305 Coefficients = AddRec->getStart(); 2306 } 2307 const SCEV *DstConst = Coefficients; 2308 2309 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2310 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2311 LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2312 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2313 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2314 // If Delta is a sum of products, we may be able to make further progress. 2315 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2316 const SCEV *Operand = Sum->getOperand(Op); 2317 if (isa<SCEVConstant>(Operand)) { 2318 assert(!Constant && "Surprised to find multiple constants"); 2319 Constant = cast<SCEVConstant>(Operand); 2320 } 2321 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2322 // Search for constant operand to participate in GCD; 2323 // If none found; return false. 2324 const SCEVConstant *ConstOp = getConstantPart(Product); 2325 if (!ConstOp) 2326 return false; 2327 APInt ConstOpValue = ConstOp->getAPInt(); 2328 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2329 ConstOpValue.abs()); 2330 } 2331 else 2332 return false; 2333 } 2334 } 2335 if (!Constant) 2336 return false; 2337 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2338 LLVM_DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2339 if (ConstDelta == 0) 2340 return false; 2341 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2342 LLVM_DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2343 APInt Remainder = ConstDelta.srem(RunningGCD); 2344 if (Remainder != 0) { 2345 ++GCDindependence; 2346 return true; 2347 } 2348 2349 // Try to disprove equal directions. 2350 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2351 // the code above can't disprove the dependence because the GCD = 1. 2352 // So we consider what happen if i = i' and what happens if j = j'. 2353 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2354 // which is infeasible, so we can disallow the = direction for the i level. 2355 // Setting j = j' doesn't help matters, so we end up with a direction vector 2356 // of [<>, *] 2357 // 2358 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2359 // we need to remember that the constant part is 5 and the RunningGCD should 2360 // be initialized to ExtraGCD = 30. 2361 LLVM_DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2362 2363 bool Improved = false; 2364 Coefficients = Src; 2365 while (const SCEVAddRecExpr *AddRec = 2366 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2367 Coefficients = AddRec->getStart(); 2368 const Loop *CurLoop = AddRec->getLoop(); 2369 RunningGCD = ExtraGCD; 2370 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2371 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2372 const SCEV *Inner = Src; 2373 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2374 AddRec = cast<SCEVAddRecExpr>(Inner); 2375 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2376 if (CurLoop == AddRec->getLoop()) 2377 ; // SrcCoeff == Coeff 2378 else { 2379 // If the coefficient is the product of a constant and other stuff, 2380 // we can use the constant in the GCD computation. 2381 Constant = getConstantPart(Coeff); 2382 if (!Constant) 2383 return false; 2384 APInt ConstCoeff = Constant->getAPInt(); 2385 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2386 } 2387 Inner = AddRec->getStart(); 2388 } 2389 Inner = Dst; 2390 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2391 AddRec = cast<SCEVAddRecExpr>(Inner); 2392 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2393 if (CurLoop == AddRec->getLoop()) 2394 DstCoeff = Coeff; 2395 else { 2396 // If the coefficient is the product of a constant and other stuff, 2397 // we can use the constant in the GCD computation. 2398 Constant = getConstantPart(Coeff); 2399 if (!Constant) 2400 return false; 2401 APInt ConstCoeff = Constant->getAPInt(); 2402 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2403 } 2404 Inner = AddRec->getStart(); 2405 } 2406 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2407 // If the coefficient is the product of a constant and other stuff, 2408 // we can use the constant in the GCD computation. 2409 Constant = getConstantPart(Delta); 2410 if (!Constant) 2411 // The difference of the two coefficients might not be a product 2412 // or constant, in which case we give up on this direction. 2413 continue; 2414 APInt ConstCoeff = Constant->getAPInt(); 2415 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2416 LLVM_DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2417 if (RunningGCD != 0) { 2418 Remainder = ConstDelta.srem(RunningGCD); 2419 LLVM_DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2420 if (Remainder != 0) { 2421 unsigned Level = mapSrcLoop(CurLoop); 2422 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2423 Improved = true; 2424 } 2425 } 2426 } 2427 if (Improved) 2428 ++GCDsuccesses; 2429 LLVM_DEBUG(dbgs() << "all done\n"); 2430 return false; 2431 } 2432 2433 2434 //===----------------------------------------------------------------------===// 2435 // banerjeeMIVtest - 2436 // Use Banerjee's Inequalities to test an MIV subscript pair. 2437 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2438 // Generally follows the discussion in Section 2.5.2 of 2439 // 2440 // Optimizing Supercompilers for Supercomputers 2441 // Michael Wolfe 2442 // 2443 // The inequalities given on page 25 are simplified in that loops are 2444 // normalized so that the lower bound is always 0 and the stride is always 1. 2445 // For example, Wolfe gives 2446 // 2447 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2448 // 2449 // where A_k is the coefficient of the kth index in the source subscript, 2450 // B_k is the coefficient of the kth index in the destination subscript, 2451 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2452 // index, and N_k is the stride of the kth index. Since all loops are normalized 2453 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2454 // equation to 2455 // 2456 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2457 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2458 // 2459 // Similar simplifications are possible for the other equations. 2460 // 2461 // When we can't determine the number of iterations for a loop, 2462 // we use NULL as an indicator for the worst case, infinity. 2463 // When computing the upper bound, NULL denotes +inf; 2464 // for the lower bound, NULL denotes -inf. 2465 // 2466 // Return true if dependence disproved. 2467 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2468 const SmallBitVector &Loops, 2469 FullDependence &Result) const { 2470 LLVM_DEBUG(dbgs() << "starting Banerjee\n"); 2471 ++BanerjeeApplications; 2472 LLVM_DEBUG(dbgs() << " Src = " << *Src << '\n'); 2473 const SCEV *A0; 2474 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2475 LLVM_DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2476 const SCEV *B0; 2477 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2478 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2479 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2480 LLVM_DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2481 2482 // Compute bounds for all the * directions. 2483 LLVM_DEBUG(dbgs() << "\tBounds[*]\n"); 2484 for (unsigned K = 1; K <= MaxLevels; ++K) { 2485 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2486 Bound[K].Direction = Dependence::DVEntry::ALL; 2487 Bound[K].DirSet = Dependence::DVEntry::NONE; 2488 findBoundsALL(A, B, Bound, K); 2489 #ifndef NDEBUG 2490 LLVM_DEBUG(dbgs() << "\t " << K << '\t'); 2491 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2492 LLVM_DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2493 else 2494 LLVM_DEBUG(dbgs() << "-inf\t"); 2495 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2496 LLVM_DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2497 else 2498 LLVM_DEBUG(dbgs() << "+inf\n"); 2499 #endif 2500 } 2501 2502 // Test the *, *, *, ... case. 2503 bool Disproved = false; 2504 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2505 // Explore the direction vector hierarchy. 2506 unsigned DepthExpanded = 0; 2507 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2508 Loops, DepthExpanded, Delta); 2509 if (NewDeps > 0) { 2510 bool Improved = false; 2511 for (unsigned K = 1; K <= CommonLevels; ++K) { 2512 if (Loops[K]) { 2513 unsigned Old = Result.DV[K - 1].Direction; 2514 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2515 Improved |= Old != Result.DV[K - 1].Direction; 2516 if (!Result.DV[K - 1].Direction) { 2517 Improved = false; 2518 Disproved = true; 2519 break; 2520 } 2521 } 2522 } 2523 if (Improved) 2524 ++BanerjeeSuccesses; 2525 } 2526 else { 2527 ++BanerjeeIndependence; 2528 Disproved = true; 2529 } 2530 } 2531 else { 2532 ++BanerjeeIndependence; 2533 Disproved = true; 2534 } 2535 delete [] Bound; 2536 delete [] A; 2537 delete [] B; 2538 return Disproved; 2539 } 2540 2541 2542 // Hierarchically expands the direction vector 2543 // search space, combining the directions of discovered dependences 2544 // in the DirSet field of Bound. Returns the number of distinct 2545 // dependences discovered. If the dependence is disproved, 2546 // it will return 0. 2547 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2548 CoefficientInfo *B, BoundInfo *Bound, 2549 const SmallBitVector &Loops, 2550 unsigned &DepthExpanded, 2551 const SCEV *Delta) const { 2552 if (Level > CommonLevels) { 2553 // record result 2554 LLVM_DEBUG(dbgs() << "\t["); 2555 for (unsigned K = 1; K <= CommonLevels; ++K) { 2556 if (Loops[K]) { 2557 Bound[K].DirSet |= Bound[K].Direction; 2558 #ifndef NDEBUG 2559 switch (Bound[K].Direction) { 2560 case Dependence::DVEntry::LT: 2561 LLVM_DEBUG(dbgs() << " <"); 2562 break; 2563 case Dependence::DVEntry::EQ: 2564 LLVM_DEBUG(dbgs() << " ="); 2565 break; 2566 case Dependence::DVEntry::GT: 2567 LLVM_DEBUG(dbgs() << " >"); 2568 break; 2569 case Dependence::DVEntry::ALL: 2570 LLVM_DEBUG(dbgs() << " *"); 2571 break; 2572 default: 2573 llvm_unreachable("unexpected Bound[K].Direction"); 2574 } 2575 #endif 2576 } 2577 } 2578 LLVM_DEBUG(dbgs() << " ]\n"); 2579 return 1; 2580 } 2581 if (Loops[Level]) { 2582 if (Level > DepthExpanded) { 2583 DepthExpanded = Level; 2584 // compute bounds for <, =, > at current level 2585 findBoundsLT(A, B, Bound, Level); 2586 findBoundsGT(A, B, Bound, Level); 2587 findBoundsEQ(A, B, Bound, Level); 2588 #ifndef NDEBUG 2589 LLVM_DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2590 LLVM_DEBUG(dbgs() << "\t <\t"); 2591 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2592 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] 2593 << '\t'); 2594 else 2595 LLVM_DEBUG(dbgs() << "-inf\t"); 2596 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2597 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] 2598 << '\n'); 2599 else 2600 LLVM_DEBUG(dbgs() << "+inf\n"); 2601 LLVM_DEBUG(dbgs() << "\t =\t"); 2602 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2603 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] 2604 << '\t'); 2605 else 2606 LLVM_DEBUG(dbgs() << "-inf\t"); 2607 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2608 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] 2609 << '\n'); 2610 else 2611 LLVM_DEBUG(dbgs() << "+inf\n"); 2612 LLVM_DEBUG(dbgs() << "\t >\t"); 2613 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2614 LLVM_DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] 2615 << '\t'); 2616 else 2617 LLVM_DEBUG(dbgs() << "-inf\t"); 2618 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2619 LLVM_DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] 2620 << '\n'); 2621 else 2622 LLVM_DEBUG(dbgs() << "+inf\n"); 2623 #endif 2624 } 2625 2626 unsigned NewDeps = 0; 2627 2628 // test bounds for <, *, *, ... 2629 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2630 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2631 Loops, DepthExpanded, Delta); 2632 2633 // Test bounds for =, *, *, ... 2634 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2635 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2636 Loops, DepthExpanded, Delta); 2637 2638 // test bounds for >, *, *, ... 2639 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2640 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2641 Loops, DepthExpanded, Delta); 2642 2643 Bound[Level].Direction = Dependence::DVEntry::ALL; 2644 return NewDeps; 2645 } 2646 else 2647 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2648 } 2649 2650 2651 // Returns true iff the current bounds are plausible. 2652 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2653 BoundInfo *Bound, const SCEV *Delta) const { 2654 Bound[Level].Direction = DirKind; 2655 if (const SCEV *LowerBound = getLowerBound(Bound)) 2656 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2657 return false; 2658 if (const SCEV *UpperBound = getUpperBound(Bound)) 2659 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2660 return false; 2661 return true; 2662 } 2663 2664 2665 // Computes the upper and lower bounds for level K 2666 // using the * direction. Records them in Bound. 2667 // Wolfe gives the equations 2668 // 2669 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2670 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2671 // 2672 // Since we normalize loops, we can simplify these equations to 2673 // 2674 // LB^*_k = (A^-_k - B^+_k)U_k 2675 // UB^*_k = (A^+_k - B^-_k)U_k 2676 // 2677 // We must be careful to handle the case where the upper bound is unknown. 2678 // Note that the lower bound is always <= 0 2679 // and the upper bound is always >= 0. 2680 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2681 BoundInfo *Bound, unsigned K) const { 2682 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2683 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2684 if (Bound[K].Iterations) { 2685 Bound[K].Lower[Dependence::DVEntry::ALL] = 2686 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2687 Bound[K].Iterations); 2688 Bound[K].Upper[Dependence::DVEntry::ALL] = 2689 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2690 Bound[K].Iterations); 2691 } 2692 else { 2693 // If the difference is 0, we won't need to know the number of iterations. 2694 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2695 Bound[K].Lower[Dependence::DVEntry::ALL] = 2696 SE->getZero(A[K].Coeff->getType()); 2697 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2698 Bound[K].Upper[Dependence::DVEntry::ALL] = 2699 SE->getZero(A[K].Coeff->getType()); 2700 } 2701 } 2702 2703 2704 // Computes the upper and lower bounds for level K 2705 // using the = direction. Records them in Bound. 2706 // Wolfe gives the equations 2707 // 2708 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2709 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2710 // 2711 // Since we normalize loops, we can simplify these equations to 2712 // 2713 // LB^=_k = (A_k - B_k)^- U_k 2714 // UB^=_k = (A_k - B_k)^+ U_k 2715 // 2716 // We must be careful to handle the case where the upper bound is unknown. 2717 // Note that the lower bound is always <= 0 2718 // and the upper bound is always >= 0. 2719 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2720 BoundInfo *Bound, unsigned K) const { 2721 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2722 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2723 if (Bound[K].Iterations) { 2724 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2725 const SCEV *NegativePart = getNegativePart(Delta); 2726 Bound[K].Lower[Dependence::DVEntry::EQ] = 2727 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2728 const SCEV *PositivePart = getPositivePart(Delta); 2729 Bound[K].Upper[Dependence::DVEntry::EQ] = 2730 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2731 } 2732 else { 2733 // If the positive/negative part of the difference is 0, 2734 // we won't need to know the number of iterations. 2735 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2736 const SCEV *NegativePart = getNegativePart(Delta); 2737 if (NegativePart->isZero()) 2738 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2739 const SCEV *PositivePart = getPositivePart(Delta); 2740 if (PositivePart->isZero()) 2741 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2742 } 2743 } 2744 2745 2746 // Computes the upper and lower bounds for level K 2747 // using the < direction. Records them in Bound. 2748 // Wolfe gives the equations 2749 // 2750 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2751 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2752 // 2753 // Since we normalize loops, we can simplify these equations to 2754 // 2755 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2756 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2757 // 2758 // We must be careful to handle the case where the upper bound is unknown. 2759 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2760 BoundInfo *Bound, unsigned K) const { 2761 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2762 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2763 if (Bound[K].Iterations) { 2764 const SCEV *Iter_1 = SE->getMinusSCEV( 2765 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2766 const SCEV *NegPart = 2767 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2768 Bound[K].Lower[Dependence::DVEntry::LT] = 2769 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2770 const SCEV *PosPart = 2771 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2772 Bound[K].Upper[Dependence::DVEntry::LT] = 2773 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2774 } 2775 else { 2776 // If the positive/negative part of the difference is 0, 2777 // we won't need to know the number of iterations. 2778 const SCEV *NegPart = 2779 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2780 if (NegPart->isZero()) 2781 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2782 const SCEV *PosPart = 2783 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2784 if (PosPart->isZero()) 2785 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2786 } 2787 } 2788 2789 2790 // Computes the upper and lower bounds for level K 2791 // using the > direction. Records them in Bound. 2792 // Wolfe gives the equations 2793 // 2794 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2795 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2796 // 2797 // Since we normalize loops, we can simplify these equations to 2798 // 2799 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2800 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2801 // 2802 // We must be careful to handle the case where the upper bound is unknown. 2803 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2804 BoundInfo *Bound, unsigned K) const { 2805 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2806 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2807 if (Bound[K].Iterations) { 2808 const SCEV *Iter_1 = SE->getMinusSCEV( 2809 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2810 const SCEV *NegPart = 2811 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2812 Bound[K].Lower[Dependence::DVEntry::GT] = 2813 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2814 const SCEV *PosPart = 2815 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2816 Bound[K].Upper[Dependence::DVEntry::GT] = 2817 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2818 } 2819 else { 2820 // If the positive/negative part of the difference is 0, 2821 // we won't need to know the number of iterations. 2822 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2823 if (NegPart->isZero()) 2824 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2825 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2826 if (PosPart->isZero()) 2827 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2828 } 2829 } 2830 2831 2832 // X^+ = max(X, 0) 2833 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2834 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2835 } 2836 2837 2838 // X^- = min(X, 0) 2839 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2840 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2841 } 2842 2843 2844 // Walks through the subscript, 2845 // collecting each coefficient, the associated loop bounds, 2846 // and recording its positive and negative parts for later use. 2847 DependenceInfo::CoefficientInfo * 2848 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2849 const SCEV *&Constant) const { 2850 const SCEV *Zero = SE->getZero(Subscript->getType()); 2851 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2852 for (unsigned K = 1; K <= MaxLevels; ++K) { 2853 CI[K].Coeff = Zero; 2854 CI[K].PosPart = Zero; 2855 CI[K].NegPart = Zero; 2856 CI[K].Iterations = nullptr; 2857 } 2858 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2859 const Loop *L = AddRec->getLoop(); 2860 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2861 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2862 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2863 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2864 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2865 Subscript = AddRec->getStart(); 2866 } 2867 Constant = Subscript; 2868 #ifndef NDEBUG 2869 LLVM_DEBUG(dbgs() << "\tCoefficient Info\n"); 2870 for (unsigned K = 1; K <= MaxLevels; ++K) { 2871 LLVM_DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2872 LLVM_DEBUG(dbgs() << "\tPos Part = "); 2873 LLVM_DEBUG(dbgs() << *CI[K].PosPart); 2874 LLVM_DEBUG(dbgs() << "\tNeg Part = "); 2875 LLVM_DEBUG(dbgs() << *CI[K].NegPart); 2876 LLVM_DEBUG(dbgs() << "\tUpper Bound = "); 2877 if (CI[K].Iterations) 2878 LLVM_DEBUG(dbgs() << *CI[K].Iterations); 2879 else 2880 LLVM_DEBUG(dbgs() << "+inf"); 2881 LLVM_DEBUG(dbgs() << '\n'); 2882 } 2883 LLVM_DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2884 #endif 2885 return CI; 2886 } 2887 2888 2889 // Looks through all the bounds info and 2890 // computes the lower bound given the current direction settings 2891 // at each level. If the lower bound for any level is -inf, 2892 // the result is -inf. 2893 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2894 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2895 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2896 if (Bound[K].Lower[Bound[K].Direction]) 2897 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2898 else 2899 Sum = nullptr; 2900 } 2901 return Sum; 2902 } 2903 2904 2905 // Looks through all the bounds info and 2906 // computes the upper bound given the current direction settings 2907 // at each level. If the upper bound at any level is +inf, 2908 // the result is +inf. 2909 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2910 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2911 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2912 if (Bound[K].Upper[Bound[K].Direction]) 2913 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2914 else 2915 Sum = nullptr; 2916 } 2917 return Sum; 2918 } 2919 2920 2921 //===----------------------------------------------------------------------===// 2922 // Constraint manipulation for Delta test. 2923 2924 // Given a linear SCEV, 2925 // return the coefficient (the step) 2926 // corresponding to the specified loop. 2927 // If there isn't one, return 0. 2928 // For example, given a*i + b*j + c*k, finding the coefficient 2929 // corresponding to the j loop would yield b. 2930 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2931 const Loop *TargetLoop) const { 2932 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2933 if (!AddRec) 2934 return SE->getZero(Expr->getType()); 2935 if (AddRec->getLoop() == TargetLoop) 2936 return AddRec->getStepRecurrence(*SE); 2937 return findCoefficient(AddRec->getStart(), TargetLoop); 2938 } 2939 2940 2941 // Given a linear SCEV, 2942 // return the SCEV given by zeroing out the coefficient 2943 // corresponding to the specified loop. 2944 // For example, given a*i + b*j + c*k, zeroing the coefficient 2945 // corresponding to the j loop would yield a*i + c*k. 2946 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 2947 const Loop *TargetLoop) const { 2948 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2949 if (!AddRec) 2950 return Expr; // ignore 2951 if (AddRec->getLoop() == TargetLoop) 2952 return AddRec->getStart(); 2953 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2954 AddRec->getStepRecurrence(*SE), 2955 AddRec->getLoop(), 2956 AddRec->getNoWrapFlags()); 2957 } 2958 2959 2960 // Given a linear SCEV Expr, 2961 // return the SCEV given by adding some Value to the 2962 // coefficient corresponding to the specified TargetLoop. 2963 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2964 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2965 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 2966 const Loop *TargetLoop, 2967 const SCEV *Value) const { 2968 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2969 if (!AddRec) // create a new addRec 2970 return SE->getAddRecExpr(Expr, 2971 Value, 2972 TargetLoop, 2973 SCEV::FlagAnyWrap); // Worst case, with no info. 2974 if (AddRec->getLoop() == TargetLoop) { 2975 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2976 if (Sum->isZero()) 2977 return AddRec->getStart(); 2978 return SE->getAddRecExpr(AddRec->getStart(), 2979 Sum, 2980 AddRec->getLoop(), 2981 AddRec->getNoWrapFlags()); 2982 } 2983 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2984 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2985 return SE->getAddRecExpr( 2986 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2987 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2988 AddRec->getNoWrapFlags()); 2989 } 2990 2991 2992 // Review the constraints, looking for opportunities 2993 // to simplify a subscript pair (Src and Dst). 2994 // Return true if some simplification occurs. 2995 // If the simplification isn't exact (that is, if it is conservative 2996 // in terms of dependence), set consistent to false. 2997 // Corresponds to Figure 5 from the paper 2998 // 2999 // Practical Dependence Testing 3000 // Goff, Kennedy, Tseng 3001 // PLDI 1991 3002 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 3003 SmallBitVector &Loops, 3004 SmallVectorImpl<Constraint> &Constraints, 3005 bool &Consistent) { 3006 bool Result = false; 3007 for (unsigned LI : Loops.set_bits()) { 3008 LLVM_DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 3009 LLVM_DEBUG(Constraints[LI].dump(dbgs())); 3010 if (Constraints[LI].isDistance()) 3011 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 3012 else if (Constraints[LI].isLine()) 3013 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 3014 else if (Constraints[LI].isPoint()) 3015 Result |= propagatePoint(Src, Dst, Constraints[LI]); 3016 } 3017 return Result; 3018 } 3019 3020 3021 // Attempt to propagate a distance 3022 // constraint into a subscript pair (Src and Dst). 3023 // Return true if some simplification occurs. 3024 // If the simplification isn't exact (that is, if it is conservative 3025 // in terms of dependence), set consistent to false. 3026 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3027 Constraint &CurConstraint, 3028 bool &Consistent) { 3029 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3030 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3031 const SCEV *A_K = findCoefficient(Src, CurLoop); 3032 if (A_K->isZero()) 3033 return false; 3034 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3035 Src = SE->getMinusSCEV(Src, DA_K); 3036 Src = zeroCoefficient(Src, CurLoop); 3037 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3038 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3039 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3040 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3041 if (!findCoefficient(Dst, CurLoop)->isZero()) 3042 Consistent = false; 3043 return true; 3044 } 3045 3046 3047 // Attempt to propagate a line 3048 // constraint into a subscript pair (Src and Dst). 3049 // Return true if some simplification occurs. 3050 // If the simplification isn't exact (that is, if it is conservative 3051 // in terms of dependence), set consistent to false. 3052 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3053 Constraint &CurConstraint, 3054 bool &Consistent) { 3055 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3056 const SCEV *A = CurConstraint.getA(); 3057 const SCEV *B = CurConstraint.getB(); 3058 const SCEV *C = CurConstraint.getC(); 3059 LLVM_DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C 3060 << "\n"); 3061 LLVM_DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3062 LLVM_DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3063 if (A->isZero()) { 3064 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3065 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3066 if (!Bconst || !Cconst) return false; 3067 APInt Beta = Bconst->getAPInt(); 3068 APInt Charlie = Cconst->getAPInt(); 3069 APInt CdivB = Charlie.sdiv(Beta); 3070 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3071 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3072 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3073 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3074 Dst = zeroCoefficient(Dst, CurLoop); 3075 if (!findCoefficient(Src, CurLoop)->isZero()) 3076 Consistent = false; 3077 } 3078 else if (B->isZero()) { 3079 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3080 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3081 if (!Aconst || !Cconst) return false; 3082 APInt Alpha = Aconst->getAPInt(); 3083 APInt Charlie = Cconst->getAPInt(); 3084 APInt CdivA = Charlie.sdiv(Alpha); 3085 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3086 const SCEV *A_K = findCoefficient(Src, CurLoop); 3087 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3088 Src = zeroCoefficient(Src, CurLoop); 3089 if (!findCoefficient(Dst, CurLoop)->isZero()) 3090 Consistent = false; 3091 } 3092 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3093 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3094 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3095 if (!Aconst || !Cconst) return false; 3096 APInt Alpha = Aconst->getAPInt(); 3097 APInt Charlie = Cconst->getAPInt(); 3098 APInt CdivA = Charlie.sdiv(Alpha); 3099 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3100 const SCEV *A_K = findCoefficient(Src, CurLoop); 3101 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3102 Src = zeroCoefficient(Src, CurLoop); 3103 Dst = addToCoefficient(Dst, CurLoop, A_K); 3104 if (!findCoefficient(Dst, CurLoop)->isZero()) 3105 Consistent = false; 3106 } 3107 else { 3108 // paper is incorrect here, or perhaps just misleading 3109 const SCEV *A_K = findCoefficient(Src, CurLoop); 3110 Src = SE->getMulExpr(Src, A); 3111 Dst = SE->getMulExpr(Dst, A); 3112 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3113 Src = zeroCoefficient(Src, CurLoop); 3114 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3115 if (!findCoefficient(Dst, CurLoop)->isZero()) 3116 Consistent = false; 3117 } 3118 LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3119 LLVM_DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3120 return true; 3121 } 3122 3123 3124 // Attempt to propagate a point 3125 // constraint into a subscript pair (Src and Dst). 3126 // Return true if some simplification occurs. 3127 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3128 Constraint &CurConstraint) { 3129 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3130 const SCEV *A_K = findCoefficient(Src, CurLoop); 3131 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3132 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3133 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3134 LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3135 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3136 Src = zeroCoefficient(Src, CurLoop); 3137 LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3138 LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3139 Dst = zeroCoefficient(Dst, CurLoop); 3140 LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3141 return true; 3142 } 3143 3144 3145 // Update direction vector entry based on the current constraint. 3146 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3147 const Constraint &CurConstraint) const { 3148 LLVM_DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3149 LLVM_DEBUG(CurConstraint.dump(dbgs())); 3150 if (CurConstraint.isAny()) 3151 ; // use defaults 3152 else if (CurConstraint.isDistance()) { 3153 // this one is consistent, the others aren't 3154 Level.Scalar = false; 3155 Level.Distance = CurConstraint.getD(); 3156 unsigned NewDirection = Dependence::DVEntry::NONE; 3157 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3158 NewDirection = Dependence::DVEntry::EQ; 3159 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3160 NewDirection |= Dependence::DVEntry::LT; 3161 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3162 NewDirection |= Dependence::DVEntry::GT; 3163 Level.Direction &= NewDirection; 3164 } 3165 else if (CurConstraint.isLine()) { 3166 Level.Scalar = false; 3167 Level.Distance = nullptr; 3168 // direction should be accurate 3169 } 3170 else if (CurConstraint.isPoint()) { 3171 Level.Scalar = false; 3172 Level.Distance = nullptr; 3173 unsigned NewDirection = Dependence::DVEntry::NONE; 3174 if (!isKnownPredicate(CmpInst::ICMP_NE, 3175 CurConstraint.getY(), 3176 CurConstraint.getX())) 3177 // if X may be = Y 3178 NewDirection |= Dependence::DVEntry::EQ; 3179 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3180 CurConstraint.getY(), 3181 CurConstraint.getX())) 3182 // if Y may be > X 3183 NewDirection |= Dependence::DVEntry::LT; 3184 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3185 CurConstraint.getY(), 3186 CurConstraint.getX())) 3187 // if Y may be < X 3188 NewDirection |= Dependence::DVEntry::GT; 3189 Level.Direction &= NewDirection; 3190 } 3191 else 3192 llvm_unreachable("constraint has unexpected kind"); 3193 } 3194 3195 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3196 /// source and destination array references are recurrences on a nested loop, 3197 /// this function flattens the nested recurrences into separate recurrences 3198 /// for each loop level. 3199 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3200 SmallVectorImpl<Subscript> &Pair) { 3201 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3202 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3203 Value *SrcPtr = getLoadStorePointerOperand(Src); 3204 Value *DstPtr = getLoadStorePointerOperand(Dst); 3205 3206 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3207 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3208 3209 // Below code mimics the code in Delinearization.cpp 3210 const SCEV *SrcAccessFn = 3211 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3212 const SCEV *DstAccessFn = 3213 SE->getSCEVAtScope(DstPtr, DstLoop); 3214 3215 const SCEVUnknown *SrcBase = 3216 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3217 const SCEVUnknown *DstBase = 3218 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3219 3220 if (!SrcBase || !DstBase || SrcBase != DstBase) 3221 return false; 3222 3223 const SCEV *ElementSize = SE->getElementSize(Src); 3224 if (ElementSize != SE->getElementSize(Dst)) 3225 return false; 3226 3227 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3228 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3229 3230 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3231 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3232 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3233 return false; 3234 3235 // First step: collect parametric terms in both array references. 3236 SmallVector<const SCEV *, 4> Terms; 3237 SE->collectParametricTerms(SrcAR, Terms); 3238 SE->collectParametricTerms(DstAR, Terms); 3239 3240 // Second step: find subscript sizes. 3241 SmallVector<const SCEV *, 4> Sizes; 3242 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3243 3244 // Third step: compute the access functions for each subscript. 3245 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3246 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3247 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3248 3249 // Fail when there is only a subscript: that's a linearized access function. 3250 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3251 SrcSubscripts.size() != DstSubscripts.size()) 3252 return false; 3253 3254 int size = SrcSubscripts.size(); 3255 3256 LLVM_DEBUG({ 3257 dbgs() << "\nSrcSubscripts: "; 3258 for (int i = 0; i < size; i++) 3259 dbgs() << *SrcSubscripts[i]; 3260 dbgs() << "\nDstSubscripts: "; 3261 for (int i = 0; i < size; i++) 3262 dbgs() << *DstSubscripts[i]; 3263 }); 3264 3265 // The delinearization transforms a single-subscript MIV dependence test into 3266 // a multi-subscript SIV dependence test that is easier to compute. So we 3267 // resize Pair to contain as many pairs of subscripts as the delinearization 3268 // has found, and then initialize the pairs following the delinearization. 3269 Pair.resize(size); 3270 for (int i = 0; i < size; ++i) { 3271 Pair[i].Src = SrcSubscripts[i]; 3272 Pair[i].Dst = DstSubscripts[i]; 3273 unifySubscriptType(&Pair[i]); 3274 3275 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3276 // delinearization has found, and add these constraints to the dependence 3277 // check to avoid memory accesses overflow from one dimension into another. 3278 // This is related to the problem of determining the existence of data 3279 // dependences in array accesses using a different number of subscripts: in 3280 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3281 } 3282 3283 return true; 3284 } 3285 3286 //===----------------------------------------------------------------------===// 3287 3288 #ifndef NDEBUG 3289 // For debugging purposes, dump a small bit vector to dbgs(). 3290 static void dumpSmallBitVector(SmallBitVector &BV) { 3291 dbgs() << "{"; 3292 for (unsigned VI : BV.set_bits()) { 3293 dbgs() << VI; 3294 if (BV.find_next(VI) >= 0) 3295 dbgs() << ' '; 3296 } 3297 dbgs() << "}\n"; 3298 } 3299 #endif 3300 3301 // depends - 3302 // Returns NULL if there is no dependence. 3303 // Otherwise, return a Dependence with as many details as possible. 3304 // Corresponds to Section 3.1 in the paper 3305 // 3306 // Practical Dependence Testing 3307 // Goff, Kennedy, Tseng 3308 // PLDI 1991 3309 // 3310 // Care is required to keep the routine below, getSplitIteration(), 3311 // up to date with respect to this routine. 3312 std::unique_ptr<Dependence> 3313 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3314 bool PossiblyLoopIndependent) { 3315 if (Src == Dst) 3316 PossiblyLoopIndependent = false; 3317 3318 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3319 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3320 // if both instructions don't reference memory, there's no dependence 3321 return nullptr; 3322 3323 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3324 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3325 LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3326 return make_unique<Dependence>(Src, Dst); 3327 } 3328 3329 assert(isLoadOrStore(Src) && "instruction is not load or store"); 3330 assert(isLoadOrStore(Dst) && "instruction is not load or store"); 3331 Value *SrcPtr = getLoadStorePointerOperand(Src); 3332 Value *DstPtr = getLoadStorePointerOperand(Dst); 3333 3334 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3335 MemoryLocation::get(Dst), 3336 MemoryLocation::get(Src))) { 3337 case MayAlias: 3338 case PartialAlias: 3339 // cannot analyse objects if we don't understand their aliasing. 3340 LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3341 return make_unique<Dependence>(Src, Dst); 3342 case NoAlias: 3343 // If the objects noalias, they are distinct, accesses are independent. 3344 LLVM_DEBUG(dbgs() << "no alias\n"); 3345 return nullptr; 3346 case MustAlias: 3347 break; // The underlying objects alias; test accesses for dependence. 3348 } 3349 3350 // establish loop nesting levels 3351 establishNestingLevels(Src, Dst); 3352 LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3353 LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3354 3355 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3356 ++TotalArrayPairs; 3357 3358 unsigned Pairs = 1; 3359 SmallVector<Subscript, 2> Pair(Pairs); 3360 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3361 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3362 LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3363 LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3364 Pair[0].Src = SrcSCEV; 3365 Pair[0].Dst = DstSCEV; 3366 3367 if (Delinearize) { 3368 if (tryDelinearize(Src, Dst, Pair)) { 3369 LLVM_DEBUG(dbgs() << " delinearized\n"); 3370 Pairs = Pair.size(); 3371 } 3372 } 3373 3374 for (unsigned P = 0; P < Pairs; ++P) { 3375 Pair[P].Loops.resize(MaxLevels + 1); 3376 Pair[P].GroupLoops.resize(MaxLevels + 1); 3377 Pair[P].Group.resize(Pairs); 3378 removeMatchingExtensions(&Pair[P]); 3379 Pair[P].Classification = 3380 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3381 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3382 Pair[P].Loops); 3383 Pair[P].GroupLoops = Pair[P].Loops; 3384 Pair[P].Group.set(P); 3385 LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); 3386 LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3387 LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3388 LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3389 LLVM_DEBUG(dbgs() << "\tloops = "); 3390 LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3391 } 3392 3393 SmallBitVector Separable(Pairs); 3394 SmallBitVector Coupled(Pairs); 3395 3396 // Partition subscripts into separable and minimally-coupled groups 3397 // Algorithm in paper is algorithmically better; 3398 // this may be faster in practice. Check someday. 3399 // 3400 // Here's an example of how it works. Consider this code: 3401 // 3402 // for (i = ...) { 3403 // for (j = ...) { 3404 // for (k = ...) { 3405 // for (l = ...) { 3406 // for (m = ...) { 3407 // A[i][j][k][m] = ...; 3408 // ... = A[0][j][l][i + j]; 3409 // } 3410 // } 3411 // } 3412 // } 3413 // } 3414 // 3415 // There are 4 subscripts here: 3416 // 0 [i] and [0] 3417 // 1 [j] and [j] 3418 // 2 [k] and [l] 3419 // 3 [m] and [i + j] 3420 // 3421 // We've already classified each subscript pair as ZIV, SIV, etc., 3422 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3423 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3424 // and set Pair[P].Group = {P}. 3425 // 3426 // Src Dst Classification Loops GroupLoops Group 3427 // 0 [i] [0] SIV {1} {1} {0} 3428 // 1 [j] [j] SIV {2} {2} {1} 3429 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3430 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3431 // 3432 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3433 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3434 // 3435 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3436 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3437 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3438 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3439 // to either Separable or Coupled). 3440 // 3441 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3442 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3443 // so Pair[3].Group = {0, 1, 3} and Done = false. 3444 // 3445 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3446 // Since Done remains true, we add 2 to the set of Separable pairs. 3447 // 3448 // Finally, we consider 3. There's nothing to compare it with, 3449 // so Done remains true and we add it to the Coupled set. 3450 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3451 // 3452 // In the end, we've got 1 separable subscript and 1 coupled group. 3453 for (unsigned SI = 0; SI < Pairs; ++SI) { 3454 if (Pair[SI].Classification == Subscript::NonLinear) { 3455 // ignore these, but collect loops for later 3456 ++NonlinearSubscriptPairs; 3457 collectCommonLoops(Pair[SI].Src, 3458 LI->getLoopFor(Src->getParent()), 3459 Pair[SI].Loops); 3460 collectCommonLoops(Pair[SI].Dst, 3461 LI->getLoopFor(Dst->getParent()), 3462 Pair[SI].Loops); 3463 Result.Consistent = false; 3464 } else if (Pair[SI].Classification == Subscript::ZIV) { 3465 // always separable 3466 Separable.set(SI); 3467 } 3468 else { 3469 // SIV, RDIV, or MIV, so check for coupled group 3470 bool Done = true; 3471 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3472 SmallBitVector Intersection = Pair[SI].GroupLoops; 3473 Intersection &= Pair[SJ].GroupLoops; 3474 if (Intersection.any()) { 3475 // accumulate set of all the loops in group 3476 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3477 // accumulate set of all subscripts in group 3478 Pair[SJ].Group |= Pair[SI].Group; 3479 Done = false; 3480 } 3481 } 3482 if (Done) { 3483 if (Pair[SI].Group.count() == 1) { 3484 Separable.set(SI); 3485 ++SeparableSubscriptPairs; 3486 } 3487 else { 3488 Coupled.set(SI); 3489 ++CoupledSubscriptPairs; 3490 } 3491 } 3492 } 3493 } 3494 3495 LLVM_DEBUG(dbgs() << " Separable = "); 3496 LLVM_DEBUG(dumpSmallBitVector(Separable)); 3497 LLVM_DEBUG(dbgs() << " Coupled = "); 3498 LLVM_DEBUG(dumpSmallBitVector(Coupled)); 3499 3500 Constraint NewConstraint; 3501 NewConstraint.setAny(SE); 3502 3503 // test separable subscripts 3504 for (unsigned SI : Separable.set_bits()) { 3505 LLVM_DEBUG(dbgs() << "testing subscript " << SI); 3506 switch (Pair[SI].Classification) { 3507 case Subscript::ZIV: 3508 LLVM_DEBUG(dbgs() << ", ZIV\n"); 3509 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3510 return nullptr; 3511 break; 3512 case Subscript::SIV: { 3513 LLVM_DEBUG(dbgs() << ", SIV\n"); 3514 unsigned Level; 3515 const SCEV *SplitIter = nullptr; 3516 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3517 SplitIter)) 3518 return nullptr; 3519 break; 3520 } 3521 case Subscript::RDIV: 3522 LLVM_DEBUG(dbgs() << ", RDIV\n"); 3523 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3524 return nullptr; 3525 break; 3526 case Subscript::MIV: 3527 LLVM_DEBUG(dbgs() << ", MIV\n"); 3528 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3529 return nullptr; 3530 break; 3531 default: 3532 llvm_unreachable("subscript has unexpected classification"); 3533 } 3534 } 3535 3536 if (Coupled.count()) { 3537 // test coupled subscript groups 3538 LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); 3539 LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3540 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3541 for (unsigned II = 0; II <= MaxLevels; ++II) 3542 Constraints[II].setAny(SE); 3543 for (unsigned SI : Coupled.set_bits()) { 3544 LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3545 SmallBitVector Group(Pair[SI].Group); 3546 SmallBitVector Sivs(Pairs); 3547 SmallBitVector Mivs(Pairs); 3548 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3549 SmallVector<Subscript *, 4> PairsInGroup; 3550 for (unsigned SJ : Group.set_bits()) { 3551 LLVM_DEBUG(dbgs() << SJ << " "); 3552 if (Pair[SJ].Classification == Subscript::SIV) 3553 Sivs.set(SJ); 3554 else 3555 Mivs.set(SJ); 3556 PairsInGroup.push_back(&Pair[SJ]); 3557 } 3558 unifySubscriptType(PairsInGroup); 3559 LLVM_DEBUG(dbgs() << "}\n"); 3560 while (Sivs.any()) { 3561 bool Changed = false; 3562 for (unsigned SJ : Sivs.set_bits()) { 3563 LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3564 // SJ is an SIV subscript that's part of the current coupled group 3565 unsigned Level; 3566 const SCEV *SplitIter = nullptr; 3567 LLVM_DEBUG(dbgs() << "SIV\n"); 3568 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3569 SplitIter)) 3570 return nullptr; 3571 ConstrainedLevels.set(Level); 3572 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3573 if (Constraints[Level].isEmpty()) { 3574 ++DeltaIndependence; 3575 return nullptr; 3576 } 3577 Changed = true; 3578 } 3579 Sivs.reset(SJ); 3580 } 3581 if (Changed) { 3582 // propagate, possibly creating new SIVs and ZIVs 3583 LLVM_DEBUG(dbgs() << " propagating\n"); 3584 LLVM_DEBUG(dbgs() << "\tMivs = "); 3585 LLVM_DEBUG(dumpSmallBitVector(Mivs)); 3586 for (unsigned SJ : Mivs.set_bits()) { 3587 // SJ is an MIV subscript that's part of the current coupled group 3588 LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3589 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3590 Constraints, Result.Consistent)) { 3591 LLVM_DEBUG(dbgs() << "\t Changed\n"); 3592 ++DeltaPropagations; 3593 Pair[SJ].Classification = 3594 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3595 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3596 Pair[SJ].Loops); 3597 switch (Pair[SJ].Classification) { 3598 case Subscript::ZIV: 3599 LLVM_DEBUG(dbgs() << "ZIV\n"); 3600 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3601 return nullptr; 3602 Mivs.reset(SJ); 3603 break; 3604 case Subscript::SIV: 3605 Sivs.set(SJ); 3606 Mivs.reset(SJ); 3607 break; 3608 case Subscript::RDIV: 3609 case Subscript::MIV: 3610 break; 3611 default: 3612 llvm_unreachable("bad subscript classification"); 3613 } 3614 } 3615 } 3616 } 3617 } 3618 3619 // test & propagate remaining RDIVs 3620 for (unsigned SJ : Mivs.set_bits()) { 3621 if (Pair[SJ].Classification == Subscript::RDIV) { 3622 LLVM_DEBUG(dbgs() << "RDIV test\n"); 3623 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3624 return nullptr; 3625 // I don't yet understand how to propagate RDIV results 3626 Mivs.reset(SJ); 3627 } 3628 } 3629 3630 // test remaining MIVs 3631 // This code is temporary. 3632 // Better to somehow test all remaining subscripts simultaneously. 3633 for (unsigned SJ : Mivs.set_bits()) { 3634 if (Pair[SJ].Classification == Subscript::MIV) { 3635 LLVM_DEBUG(dbgs() << "MIV test\n"); 3636 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3637 return nullptr; 3638 } 3639 else 3640 llvm_unreachable("expected only MIV subscripts at this point"); 3641 } 3642 3643 // update Result.DV from constraint vector 3644 LLVM_DEBUG(dbgs() << " updating\n"); 3645 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3646 if (SJ > CommonLevels) 3647 break; 3648 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3649 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3650 return nullptr; 3651 } 3652 } 3653 } 3654 3655 // Make sure the Scalar flags are set correctly. 3656 SmallBitVector CompleteLoops(MaxLevels + 1); 3657 for (unsigned SI = 0; SI < Pairs; ++SI) 3658 CompleteLoops |= Pair[SI].Loops; 3659 for (unsigned II = 1; II <= CommonLevels; ++II) 3660 if (CompleteLoops[II]) 3661 Result.DV[II - 1].Scalar = false; 3662 3663 if (PossiblyLoopIndependent) { 3664 // Make sure the LoopIndependent flag is set correctly. 3665 // All directions must include equal, otherwise no 3666 // loop-independent dependence is possible. 3667 for (unsigned II = 1; II <= CommonLevels; ++II) { 3668 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3669 Result.LoopIndependent = false; 3670 break; 3671 } 3672 } 3673 } 3674 else { 3675 // On the other hand, if all directions are equal and there's no 3676 // loop-independent dependence possible, then no dependence exists. 3677 bool AllEqual = true; 3678 for (unsigned II = 1; II <= CommonLevels; ++II) { 3679 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3680 AllEqual = false; 3681 break; 3682 } 3683 } 3684 if (AllEqual) 3685 return nullptr; 3686 } 3687 3688 return make_unique<FullDependence>(std::move(Result)); 3689 } 3690 3691 3692 3693 //===----------------------------------------------------------------------===// 3694 // getSplitIteration - 3695 // Rather than spend rarely-used space recording the splitting iteration 3696 // during the Weak-Crossing SIV test, we re-compute it on demand. 3697 // The re-computation is basically a repeat of the entire dependence test, 3698 // though simplified since we know that the dependence exists. 3699 // It's tedious, since we must go through all propagations, etc. 3700 // 3701 // Care is required to keep this code up to date with respect to the routine 3702 // above, depends(). 3703 // 3704 // Generally, the dependence analyzer will be used to build 3705 // a dependence graph for a function (basically a map from instructions 3706 // to dependences). Looking for cycles in the graph shows us loops 3707 // that cannot be trivially vectorized/parallelized. 3708 // 3709 // We can try to improve the situation by examining all the dependences 3710 // that make up the cycle, looking for ones we can break. 3711 // Sometimes, peeling the first or last iteration of a loop will break 3712 // dependences, and we've got flags for those possibilities. 3713 // Sometimes, splitting a loop at some other iteration will do the trick, 3714 // and we've got a flag for that case. Rather than waste the space to 3715 // record the exact iteration (since we rarely know), we provide 3716 // a method that calculates the iteration. It's a drag that it must work 3717 // from scratch, but wonderful in that it's possible. 3718 // 3719 // Here's an example: 3720 // 3721 // for (i = 0; i < 10; i++) 3722 // A[i] = ... 3723 // ... = A[11 - i] 3724 // 3725 // There's a loop-carried flow dependence from the store to the load, 3726 // found by the weak-crossing SIV test. The dependence will have a flag, 3727 // indicating that the dependence can be broken by splitting the loop. 3728 // Calling getSplitIteration will return 5. 3729 // Splitting the loop breaks the dependence, like so: 3730 // 3731 // for (i = 0; i <= 5; i++) 3732 // A[i] = ... 3733 // ... = A[11 - i] 3734 // for (i = 6; i < 10; i++) 3735 // A[i] = ... 3736 // ... = A[11 - i] 3737 // 3738 // breaks the dependence and allows us to vectorize/parallelize 3739 // both loops. 3740 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3741 unsigned SplitLevel) { 3742 assert(Dep.isSplitable(SplitLevel) && 3743 "Dep should be splitable at SplitLevel"); 3744 Instruction *Src = Dep.getSrc(); 3745 Instruction *Dst = Dep.getDst(); 3746 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3747 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3748 assert(isLoadOrStore(Src)); 3749 assert(isLoadOrStore(Dst)); 3750 Value *SrcPtr = getLoadStorePointerOperand(Src); 3751 Value *DstPtr = getLoadStorePointerOperand(Dst); 3752 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), 3753 MemoryLocation::get(Dst), 3754 MemoryLocation::get(Src)) == MustAlias); 3755 3756 // establish loop nesting levels 3757 establishNestingLevels(Src, Dst); 3758 3759 FullDependence Result(Src, Dst, false, CommonLevels); 3760 3761 unsigned Pairs = 1; 3762 SmallVector<Subscript, 2> Pair(Pairs); 3763 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3764 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3765 Pair[0].Src = SrcSCEV; 3766 Pair[0].Dst = DstSCEV; 3767 3768 if (Delinearize) { 3769 if (tryDelinearize(Src, Dst, Pair)) { 3770 LLVM_DEBUG(dbgs() << " delinearized\n"); 3771 Pairs = Pair.size(); 3772 } 3773 } 3774 3775 for (unsigned P = 0; P < Pairs; ++P) { 3776 Pair[P].Loops.resize(MaxLevels + 1); 3777 Pair[P].GroupLoops.resize(MaxLevels + 1); 3778 Pair[P].Group.resize(Pairs); 3779 removeMatchingExtensions(&Pair[P]); 3780 Pair[P].Classification = 3781 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3782 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3783 Pair[P].Loops); 3784 Pair[P].GroupLoops = Pair[P].Loops; 3785 Pair[P].Group.set(P); 3786 } 3787 3788 SmallBitVector Separable(Pairs); 3789 SmallBitVector Coupled(Pairs); 3790 3791 // partition subscripts into separable and minimally-coupled groups 3792 for (unsigned SI = 0; SI < Pairs; ++SI) { 3793 if (Pair[SI].Classification == Subscript::NonLinear) { 3794 // ignore these, but collect loops for later 3795 collectCommonLoops(Pair[SI].Src, 3796 LI->getLoopFor(Src->getParent()), 3797 Pair[SI].Loops); 3798 collectCommonLoops(Pair[SI].Dst, 3799 LI->getLoopFor(Dst->getParent()), 3800 Pair[SI].Loops); 3801 Result.Consistent = false; 3802 } 3803 else if (Pair[SI].Classification == Subscript::ZIV) 3804 Separable.set(SI); 3805 else { 3806 // SIV, RDIV, or MIV, so check for coupled group 3807 bool Done = true; 3808 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3809 SmallBitVector Intersection = Pair[SI].GroupLoops; 3810 Intersection &= Pair[SJ].GroupLoops; 3811 if (Intersection.any()) { 3812 // accumulate set of all the loops in group 3813 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3814 // accumulate set of all subscripts in group 3815 Pair[SJ].Group |= Pair[SI].Group; 3816 Done = false; 3817 } 3818 } 3819 if (Done) { 3820 if (Pair[SI].Group.count() == 1) 3821 Separable.set(SI); 3822 else 3823 Coupled.set(SI); 3824 } 3825 } 3826 } 3827 3828 Constraint NewConstraint; 3829 NewConstraint.setAny(SE); 3830 3831 // test separable subscripts 3832 for (unsigned SI : Separable.set_bits()) { 3833 switch (Pair[SI].Classification) { 3834 case Subscript::SIV: { 3835 unsigned Level; 3836 const SCEV *SplitIter = nullptr; 3837 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3838 Result, NewConstraint, SplitIter); 3839 if (Level == SplitLevel) { 3840 assert(SplitIter != nullptr); 3841 return SplitIter; 3842 } 3843 break; 3844 } 3845 case Subscript::ZIV: 3846 case Subscript::RDIV: 3847 case Subscript::MIV: 3848 break; 3849 default: 3850 llvm_unreachable("subscript has unexpected classification"); 3851 } 3852 } 3853 3854 if (Coupled.count()) { 3855 // test coupled subscript groups 3856 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3857 for (unsigned II = 0; II <= MaxLevels; ++II) 3858 Constraints[II].setAny(SE); 3859 for (unsigned SI : Coupled.set_bits()) { 3860 SmallBitVector Group(Pair[SI].Group); 3861 SmallBitVector Sivs(Pairs); 3862 SmallBitVector Mivs(Pairs); 3863 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3864 for (unsigned SJ : Group.set_bits()) { 3865 if (Pair[SJ].Classification == Subscript::SIV) 3866 Sivs.set(SJ); 3867 else 3868 Mivs.set(SJ); 3869 } 3870 while (Sivs.any()) { 3871 bool Changed = false; 3872 for (unsigned SJ : Sivs.set_bits()) { 3873 // SJ is an SIV subscript that's part of the current coupled group 3874 unsigned Level; 3875 const SCEV *SplitIter = nullptr; 3876 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3877 Result, NewConstraint, SplitIter); 3878 if (Level == SplitLevel && SplitIter) 3879 return SplitIter; 3880 ConstrainedLevels.set(Level); 3881 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3882 Changed = true; 3883 Sivs.reset(SJ); 3884 } 3885 if (Changed) { 3886 // propagate, possibly creating new SIVs and ZIVs 3887 for (unsigned SJ : Mivs.set_bits()) { 3888 // SJ is an MIV subscript that's part of the current coupled group 3889 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3890 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3891 Pair[SJ].Classification = 3892 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3893 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3894 Pair[SJ].Loops); 3895 switch (Pair[SJ].Classification) { 3896 case Subscript::ZIV: 3897 Mivs.reset(SJ); 3898 break; 3899 case Subscript::SIV: 3900 Sivs.set(SJ); 3901 Mivs.reset(SJ); 3902 break; 3903 case Subscript::RDIV: 3904 case Subscript::MIV: 3905 break; 3906 default: 3907 llvm_unreachable("bad subscript classification"); 3908 } 3909 } 3910 } 3911 } 3912 } 3913 } 3914 } 3915 llvm_unreachable("somehow reached end of routine"); 3916 return nullptr; 3917 } 3918