1 //===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // DependenceAnalysis is an LLVM pass that analyses dependences between memory 11 // accesses. Currently, it is an (incomplete) implementation of the approach 12 // described in 13 // 14 // Practical Dependence Testing 15 // Goff, Kennedy, Tseng 16 // PLDI 1991 17 // 18 // There's a single entry point that analyzes the dependence between a pair 19 // of memory references in a function, returning either NULL, for no dependence, 20 // or a more-or-less detailed description of the dependence between them. 21 // 22 // Currently, the implementation cannot propagate constraints between 23 // coupled RDIV subscripts and lacks a multi-subscript MIV test. 24 // Both of these are conservative weaknesses; 25 // that is, not a source of correctness problems. 26 // 27 // The implementation depends on the GEP instruction to differentiate 28 // subscripts. Since Clang linearizes some array subscripts, the dependence 29 // analysis is using SCEV->delinearize to recover the representation of multiple 30 // subscripts, and thus avoid the more expensive and less precise MIV tests. The 31 // delinearization is controlled by the flag -da-delinearize. 32 // 33 // We should pay some careful attention to the possibility of integer overflow 34 // in the implementation of the various tests. This could happen with Add, 35 // Subtract, or Multiply, with both APInt's and SCEV's. 36 // 37 // Some non-linear subscript pairs can be handled by the GCD test 38 // (and perhaps other tests). 39 // Should explore how often these things occur. 40 // 41 // Finally, it seems like certain test cases expose weaknesses in the SCEV 42 // simplification, especially in the handling of sign and zero extensions. 43 // It could be useful to spend time exploring these. 44 // 45 // Please note that this is work in progress and the interface is subject to 46 // change. 47 // 48 //===----------------------------------------------------------------------===// 49 // // 50 // In memory of Ken Kennedy, 1945 - 2007 // 51 // // 52 //===----------------------------------------------------------------------===// 53 54 #include "llvm/Analysis/DependenceAnalysis.h" 55 #include "llvm/ADT/STLExtras.h" 56 #include "llvm/ADT/Statistic.h" 57 #include "llvm/Analysis/AliasAnalysis.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/ScalarEvolution.h" 60 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 61 #include "llvm/Analysis/ValueTracking.h" 62 #include "llvm/IR/InstIterator.h" 63 #include "llvm/IR/Module.h" 64 #include "llvm/IR/Operator.h" 65 #include "llvm/Support/CommandLine.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 70 using namespace llvm; 71 72 #define DEBUG_TYPE "da" 73 74 //===----------------------------------------------------------------------===// 75 // statistics 76 77 STATISTIC(TotalArrayPairs, "Array pairs tested"); 78 STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs"); 79 STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs"); 80 STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs"); 81 STATISTIC(ZIVapplications, "ZIV applications"); 82 STATISTIC(ZIVindependence, "ZIV independence"); 83 STATISTIC(StrongSIVapplications, "Strong SIV applications"); 84 STATISTIC(StrongSIVsuccesses, "Strong SIV successes"); 85 STATISTIC(StrongSIVindependence, "Strong SIV independence"); 86 STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications"); 87 STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes"); 88 STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence"); 89 STATISTIC(ExactSIVapplications, "Exact SIV applications"); 90 STATISTIC(ExactSIVsuccesses, "Exact SIV successes"); 91 STATISTIC(ExactSIVindependence, "Exact SIV independence"); 92 STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications"); 93 STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes"); 94 STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence"); 95 STATISTIC(ExactRDIVapplications, "Exact RDIV applications"); 96 STATISTIC(ExactRDIVindependence, "Exact RDIV independence"); 97 STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications"); 98 STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence"); 99 STATISTIC(DeltaApplications, "Delta applications"); 100 STATISTIC(DeltaSuccesses, "Delta successes"); 101 STATISTIC(DeltaIndependence, "Delta independence"); 102 STATISTIC(DeltaPropagations, "Delta propagations"); 103 STATISTIC(GCDapplications, "GCD applications"); 104 STATISTIC(GCDsuccesses, "GCD successes"); 105 STATISTIC(GCDindependence, "GCD independence"); 106 STATISTIC(BanerjeeApplications, "Banerjee applications"); 107 STATISTIC(BanerjeeIndependence, "Banerjee independence"); 108 STATISTIC(BanerjeeSuccesses, "Banerjee successes"); 109 110 static cl::opt<bool> 111 Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, 112 cl::desc("Try to delinearize array references.")); 113 114 //===----------------------------------------------------------------------===// 115 // basics 116 117 DependenceAnalysis::Result 118 DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 119 auto &AA = FAM.getResult<AAManager>(F); 120 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 121 auto &LI = FAM.getResult<LoopAnalysis>(F); 122 return DependenceInfo(&F, &AA, &SE, &LI); 123 } 124 125 AnalysisKey DependenceAnalysis::Key; 126 127 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da", 128 "Dependence Analysis", true, true) 129 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 132 INITIALIZE_PASS_END(DependenceAnalysisWrapperPass, "da", "Dependence Analysis", 133 true, true) 134 135 char DependenceAnalysisWrapperPass::ID = 0; 136 137 FunctionPass *llvm::createDependenceAnalysisWrapperPass() { 138 return new DependenceAnalysisWrapperPass(); 139 } 140 141 bool DependenceAnalysisWrapperPass::runOnFunction(Function &F) { 142 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 143 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 144 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 145 info.reset(new DependenceInfo(&F, &AA, &SE, &LI)); 146 return false; 147 } 148 149 DependenceInfo &DependenceAnalysisWrapperPass::getDI() const { return *info; } 150 151 void DependenceAnalysisWrapperPass::releaseMemory() { info.reset(); } 152 153 void DependenceAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 154 AU.setPreservesAll(); 155 AU.addRequiredTransitive<AAResultsWrapperPass>(); 156 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 157 AU.addRequiredTransitive<LoopInfoWrapperPass>(); 158 } 159 160 161 // Used to test the dependence analyzer. 162 // Looks through the function, noting loads and stores. 163 // Calls depends() on every possible pair and prints out the result. 164 // Ignores all other instructions. 165 static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA) { 166 auto *F = DA->getFunction(); 167 for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; 168 ++SrcI) { 169 if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { 170 for (inst_iterator DstI = SrcI, DstE = inst_end(F); 171 DstI != DstE; ++DstI) { 172 if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { 173 OS << "da analyze - "; 174 if (auto D = DA->depends(&*SrcI, &*DstI, true)) { 175 D->dump(OS); 176 for (unsigned Level = 1; Level <= D->getLevels(); Level++) { 177 if (D->isSplitable(Level)) { 178 OS << "da analyze - split level = " << Level; 179 OS << ", iteration = " << *DA->getSplitIteration(*D, Level); 180 OS << "!\n"; 181 } 182 } 183 } 184 else 185 OS << "none!\n"; 186 } 187 } 188 } 189 } 190 } 191 192 void DependenceAnalysisWrapperPass::print(raw_ostream &OS, 193 const Module *) const { 194 dumpExampleDependence(OS, info.get()); 195 } 196 197 //===----------------------------------------------------------------------===// 198 // Dependence methods 199 200 // Returns true if this is an input dependence. 201 bool Dependence::isInput() const { 202 return Src->mayReadFromMemory() && Dst->mayReadFromMemory(); 203 } 204 205 206 // Returns true if this is an output dependence. 207 bool Dependence::isOutput() const { 208 return Src->mayWriteToMemory() && Dst->mayWriteToMemory(); 209 } 210 211 212 // Returns true if this is an flow (aka true) dependence. 213 bool Dependence::isFlow() const { 214 return Src->mayWriteToMemory() && Dst->mayReadFromMemory(); 215 } 216 217 218 // Returns true if this is an anti dependence. 219 bool Dependence::isAnti() const { 220 return Src->mayReadFromMemory() && Dst->mayWriteToMemory(); 221 } 222 223 224 // Returns true if a particular level is scalar; that is, 225 // if no subscript in the source or destination mention the induction 226 // variable associated with the loop at this level. 227 // Leave this out of line, so it will serve as a virtual method anchor 228 bool Dependence::isScalar(unsigned level) const { 229 return false; 230 } 231 232 233 //===----------------------------------------------------------------------===// 234 // FullDependence methods 235 236 FullDependence::FullDependence(Instruction *Source, Instruction *Destination, 237 bool PossiblyLoopIndependent, 238 unsigned CommonLevels) 239 : Dependence(Source, Destination), Levels(CommonLevels), 240 LoopIndependent(PossiblyLoopIndependent) { 241 Consistent = true; 242 if (CommonLevels) 243 DV = make_unique<DVEntry[]>(CommonLevels); 244 } 245 246 // The rest are simple getters that hide the implementation. 247 248 // getDirection - Returns the direction associated with a particular level. 249 unsigned FullDependence::getDirection(unsigned Level) const { 250 assert(0 < Level && Level <= Levels && "Level out of range"); 251 return DV[Level - 1].Direction; 252 } 253 254 255 // Returns the distance (or NULL) associated with a particular level. 256 const SCEV *FullDependence::getDistance(unsigned Level) const { 257 assert(0 < Level && Level <= Levels && "Level out of range"); 258 return DV[Level - 1].Distance; 259 } 260 261 262 // Returns true if a particular level is scalar; that is, 263 // if no subscript in the source or destination mention the induction 264 // variable associated with the loop at this level. 265 bool FullDependence::isScalar(unsigned Level) const { 266 assert(0 < Level && Level <= Levels && "Level out of range"); 267 return DV[Level - 1].Scalar; 268 } 269 270 271 // Returns true if peeling the first iteration from this loop 272 // will break this dependence. 273 bool FullDependence::isPeelFirst(unsigned Level) const { 274 assert(0 < Level && Level <= Levels && "Level out of range"); 275 return DV[Level - 1].PeelFirst; 276 } 277 278 279 // Returns true if peeling the last iteration from this loop 280 // will break this dependence. 281 bool FullDependence::isPeelLast(unsigned Level) const { 282 assert(0 < Level && Level <= Levels && "Level out of range"); 283 return DV[Level - 1].PeelLast; 284 } 285 286 287 // Returns true if splitting this loop will break the dependence. 288 bool FullDependence::isSplitable(unsigned Level) const { 289 assert(0 < Level && Level <= Levels && "Level out of range"); 290 return DV[Level - 1].Splitable; 291 } 292 293 294 //===----------------------------------------------------------------------===// 295 // DependenceInfo::Constraint methods 296 297 // If constraint is a point <X, Y>, returns X. 298 // Otherwise assert. 299 const SCEV *DependenceInfo::Constraint::getX() const { 300 assert(Kind == Point && "Kind should be Point"); 301 return A; 302 } 303 304 305 // If constraint is a point <X, Y>, returns Y. 306 // Otherwise assert. 307 const SCEV *DependenceInfo::Constraint::getY() const { 308 assert(Kind == Point && "Kind should be Point"); 309 return B; 310 } 311 312 313 // If constraint is a line AX + BY = C, returns A. 314 // Otherwise assert. 315 const SCEV *DependenceInfo::Constraint::getA() const { 316 assert((Kind == Line || Kind == Distance) && 317 "Kind should be Line (or Distance)"); 318 return A; 319 } 320 321 322 // If constraint is a line AX + BY = C, returns B. 323 // Otherwise assert. 324 const SCEV *DependenceInfo::Constraint::getB() const { 325 assert((Kind == Line || Kind == Distance) && 326 "Kind should be Line (or Distance)"); 327 return B; 328 } 329 330 331 // If constraint is a line AX + BY = C, returns C. 332 // Otherwise assert. 333 const SCEV *DependenceInfo::Constraint::getC() const { 334 assert((Kind == Line || Kind == Distance) && 335 "Kind should be Line (or Distance)"); 336 return C; 337 } 338 339 340 // If constraint is a distance, returns D. 341 // Otherwise assert. 342 const SCEV *DependenceInfo::Constraint::getD() const { 343 assert(Kind == Distance && "Kind should be Distance"); 344 return SE->getNegativeSCEV(C); 345 } 346 347 348 // Returns the loop associated with this constraint. 349 const Loop *DependenceInfo::Constraint::getAssociatedLoop() const { 350 assert((Kind == Distance || Kind == Line || Kind == Point) && 351 "Kind should be Distance, Line, or Point"); 352 return AssociatedLoop; 353 } 354 355 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y, 356 const Loop *CurLoop) { 357 Kind = Point; 358 A = X; 359 B = Y; 360 AssociatedLoop = CurLoop; 361 } 362 363 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB, 364 const SCEV *CC, const Loop *CurLoop) { 365 Kind = Line; 366 A = AA; 367 B = BB; 368 C = CC; 369 AssociatedLoop = CurLoop; 370 } 371 372 void DependenceInfo::Constraint::setDistance(const SCEV *D, 373 const Loop *CurLoop) { 374 Kind = Distance; 375 A = SE->getOne(D->getType()); 376 B = SE->getNegativeSCEV(A); 377 C = SE->getNegativeSCEV(D); 378 AssociatedLoop = CurLoop; 379 } 380 381 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; } 382 383 void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) { 384 SE = NewSE; 385 Kind = Any; 386 } 387 388 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 389 // For debugging purposes. Dumps the constraint out to OS. 390 LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const { 391 if (isEmpty()) 392 OS << " Empty\n"; 393 else if (isAny()) 394 OS << " Any\n"; 395 else if (isPoint()) 396 OS << " Point is <" << *getX() << ", " << *getY() << ">\n"; 397 else if (isDistance()) 398 OS << " Distance is " << *getD() << 399 " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n"; 400 else if (isLine()) 401 OS << " Line is " << *getA() << "*X + " << 402 *getB() << "*Y = " << *getC() << "\n"; 403 else 404 llvm_unreachable("unknown constraint type in Constraint::dump"); 405 } 406 #endif 407 408 409 // Updates X with the intersection 410 // of the Constraints X and Y. Returns true if X has changed. 411 // Corresponds to Figure 4 from the paper 412 // 413 // Practical Dependence Testing 414 // Goff, Kennedy, Tseng 415 // PLDI 1991 416 bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) { 417 ++DeltaApplications; 418 DEBUG(dbgs() << "\tintersect constraints\n"); 419 DEBUG(dbgs() << "\t X ="; X->dump(dbgs())); 420 DEBUG(dbgs() << "\t Y ="; Y->dump(dbgs())); 421 assert(!Y->isPoint() && "Y must not be a Point"); 422 if (X->isAny()) { 423 if (Y->isAny()) 424 return false; 425 *X = *Y; 426 return true; 427 } 428 if (X->isEmpty()) 429 return false; 430 if (Y->isEmpty()) { 431 X->setEmpty(); 432 return true; 433 } 434 435 if (X->isDistance() && Y->isDistance()) { 436 DEBUG(dbgs() << "\t intersect 2 distances\n"); 437 if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD())) 438 return false; 439 if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) { 440 X->setEmpty(); 441 ++DeltaSuccesses; 442 return true; 443 } 444 // Hmmm, interesting situation. 445 // I guess if either is constant, keep it and ignore the other. 446 if (isa<SCEVConstant>(Y->getD())) { 447 *X = *Y; 448 return true; 449 } 450 return false; 451 } 452 453 // At this point, the pseudo-code in Figure 4 of the paper 454 // checks if (X->isPoint() && Y->isPoint()). 455 // This case can't occur in our implementation, 456 // since a Point can only arise as the result of intersecting 457 // two Line constraints, and the right-hand value, Y, is never 458 // the result of an intersection. 459 assert(!(X->isPoint() && Y->isPoint()) && 460 "We shouldn't ever see X->isPoint() && Y->isPoint()"); 461 462 if (X->isLine() && Y->isLine()) { 463 DEBUG(dbgs() << "\t intersect 2 lines\n"); 464 const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB()); 465 const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA()); 466 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) { 467 // slopes are equal, so lines are parallel 468 DEBUG(dbgs() << "\t\tsame slope\n"); 469 Prod1 = SE->getMulExpr(X->getC(), Y->getB()); 470 Prod2 = SE->getMulExpr(X->getB(), Y->getC()); 471 if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) 472 return false; 473 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 474 X->setEmpty(); 475 ++DeltaSuccesses; 476 return true; 477 } 478 return false; 479 } 480 if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) { 481 // slopes differ, so lines intersect 482 DEBUG(dbgs() << "\t\tdifferent slopes\n"); 483 const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB()); 484 const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA()); 485 const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB()); 486 const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA()); 487 const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB()); 488 const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB()); 489 const SCEVConstant *C1A2_C2A1 = 490 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1)); 491 const SCEVConstant *C1B2_C2B1 = 492 dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1)); 493 const SCEVConstant *A1B2_A2B1 = 494 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1)); 495 const SCEVConstant *A2B1_A1B2 = 496 dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2)); 497 if (!C1B2_C2B1 || !C1A2_C2A1 || 498 !A1B2_A2B1 || !A2B1_A1B2) 499 return false; 500 APInt Xtop = C1B2_C2B1->getAPInt(); 501 APInt Xbot = A1B2_A2B1->getAPInt(); 502 APInt Ytop = C1A2_C2A1->getAPInt(); 503 APInt Ybot = A2B1_A1B2->getAPInt(); 504 DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n"); 505 DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n"); 506 DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); 507 DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n"); 508 APInt Xq = Xtop; // these need to be initialized, even 509 APInt Xr = Xtop; // though they're just going to be overwritten 510 APInt::sdivrem(Xtop, Xbot, Xq, Xr); 511 APInt Yq = Ytop; 512 APInt Yr = Ytop; 513 APInt::sdivrem(Ytop, Ybot, Yq, Yr); 514 if (Xr != 0 || Yr != 0) { 515 X->setEmpty(); 516 ++DeltaSuccesses; 517 return true; 518 } 519 DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n"); 520 if (Xq.slt(0) || Yq.slt(0)) { 521 X->setEmpty(); 522 ++DeltaSuccesses; 523 return true; 524 } 525 if (const SCEVConstant *CUB = 526 collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { 527 const APInt &UpperBound = CUB->getAPInt(); 528 DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n"); 529 if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) { 530 X->setEmpty(); 531 ++DeltaSuccesses; 532 return true; 533 } 534 } 535 X->setPoint(SE->getConstant(Xq), 536 SE->getConstant(Yq), 537 X->getAssociatedLoop()); 538 ++DeltaSuccesses; 539 return true; 540 } 541 return false; 542 } 543 544 // if (X->isLine() && Y->isPoint()) This case can't occur. 545 assert(!(X->isLine() && Y->isPoint()) && "This case should never occur"); 546 547 if (X->isPoint() && Y->isLine()) { 548 DEBUG(dbgs() << "\t intersect Point and Line\n"); 549 const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX()); 550 const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY()); 551 const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1); 552 if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC())) 553 return false; 554 if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) { 555 X->setEmpty(); 556 ++DeltaSuccesses; 557 return true; 558 } 559 return false; 560 } 561 562 llvm_unreachable("shouldn't reach the end of Constraint intersection"); 563 return false; 564 } 565 566 567 //===----------------------------------------------------------------------===// 568 // DependenceInfo methods 569 570 // For debugging purposes. Dumps a dependence to OS. 571 void Dependence::dump(raw_ostream &OS) const { 572 bool Splitable = false; 573 if (isConfused()) 574 OS << "confused"; 575 else { 576 if (isConsistent()) 577 OS << "consistent "; 578 if (isFlow()) 579 OS << "flow"; 580 else if (isOutput()) 581 OS << "output"; 582 else if (isAnti()) 583 OS << "anti"; 584 else if (isInput()) 585 OS << "input"; 586 unsigned Levels = getLevels(); 587 OS << " ["; 588 for (unsigned II = 1; II <= Levels; ++II) { 589 if (isSplitable(II)) 590 Splitable = true; 591 if (isPeelFirst(II)) 592 OS << 'p'; 593 const SCEV *Distance = getDistance(II); 594 if (Distance) 595 OS << *Distance; 596 else if (isScalar(II)) 597 OS << "S"; 598 else { 599 unsigned Direction = getDirection(II); 600 if (Direction == DVEntry::ALL) 601 OS << "*"; 602 else { 603 if (Direction & DVEntry::LT) 604 OS << "<"; 605 if (Direction & DVEntry::EQ) 606 OS << "="; 607 if (Direction & DVEntry::GT) 608 OS << ">"; 609 } 610 } 611 if (isPeelLast(II)) 612 OS << 'p'; 613 if (II < Levels) 614 OS << " "; 615 } 616 if (isLoopIndependent()) 617 OS << "|<"; 618 OS << "]"; 619 if (Splitable) 620 OS << " splitable"; 621 } 622 OS << "!\n"; 623 } 624 625 static AliasResult underlyingObjectsAlias(AliasAnalysis *AA, 626 const DataLayout &DL, const Value *A, 627 const Value *B) { 628 const Value *AObj = GetUnderlyingObject(A, DL); 629 const Value *BObj = GetUnderlyingObject(B, DL); 630 return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), 631 BObj, DL.getTypeStoreSize(BObj->getType())); 632 } 633 634 635 // Returns true if the load or store can be analyzed. Atomic and volatile 636 // operations have properties which this analysis does not understand. 637 static 638 bool isLoadOrStore(const Instruction *I) { 639 if (const LoadInst *LI = dyn_cast<LoadInst>(I)) 640 return LI->isUnordered(); 641 else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) 642 return SI->isUnordered(); 643 return false; 644 } 645 646 647 static 648 Value *getPointerOperand(Instruction *I) { 649 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 650 return LI->getPointerOperand(); 651 if (StoreInst *SI = dyn_cast<StoreInst>(I)) 652 return SI->getPointerOperand(); 653 llvm_unreachable("Value is not load or store instruction"); 654 return nullptr; 655 } 656 657 658 // Examines the loop nesting of the Src and Dst 659 // instructions and establishes their shared loops. Sets the variables 660 // CommonLevels, SrcLevels, and MaxLevels. 661 // The source and destination instructions needn't be contained in the same 662 // loop. The routine establishNestingLevels finds the level of most deeply 663 // nested loop that contains them both, CommonLevels. An instruction that's 664 // not contained in a loop is at level = 0. MaxLevels is equal to the level 665 // of the source plus the level of the destination, minus CommonLevels. 666 // This lets us allocate vectors MaxLevels in length, with room for every 667 // distinct loop referenced in both the source and destination subscripts. 668 // The variable SrcLevels is the nesting depth of the source instruction. 669 // It's used to help calculate distinct loops referenced by the destination. 670 // Here's the map from loops to levels: 671 // 0 - unused 672 // 1 - outermost common loop 673 // ... - other common loops 674 // CommonLevels - innermost common loop 675 // ... - loops containing Src but not Dst 676 // SrcLevels - innermost loop containing Src but not Dst 677 // ... - loops containing Dst but not Src 678 // MaxLevels - innermost loops containing Dst but not Src 679 // Consider the follow code fragment: 680 // for (a = ...) { 681 // for (b = ...) { 682 // for (c = ...) { 683 // for (d = ...) { 684 // A[] = ...; 685 // } 686 // } 687 // for (e = ...) { 688 // for (f = ...) { 689 // for (g = ...) { 690 // ... = A[]; 691 // } 692 // } 693 // } 694 // } 695 // } 696 // If we're looking at the possibility of a dependence between the store 697 // to A (the Src) and the load from A (the Dst), we'll note that they 698 // have 2 loops in common, so CommonLevels will equal 2 and the direction 699 // vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7. 700 // A map from loop names to loop numbers would look like 701 // a - 1 702 // b - 2 = CommonLevels 703 // c - 3 704 // d - 4 = SrcLevels 705 // e - 5 706 // f - 6 707 // g - 7 = MaxLevels 708 void DependenceInfo::establishNestingLevels(const Instruction *Src, 709 const Instruction *Dst) { 710 const BasicBlock *SrcBlock = Src->getParent(); 711 const BasicBlock *DstBlock = Dst->getParent(); 712 unsigned SrcLevel = LI->getLoopDepth(SrcBlock); 713 unsigned DstLevel = LI->getLoopDepth(DstBlock); 714 const Loop *SrcLoop = LI->getLoopFor(SrcBlock); 715 const Loop *DstLoop = LI->getLoopFor(DstBlock); 716 SrcLevels = SrcLevel; 717 MaxLevels = SrcLevel + DstLevel; 718 while (SrcLevel > DstLevel) { 719 SrcLoop = SrcLoop->getParentLoop(); 720 SrcLevel--; 721 } 722 while (DstLevel > SrcLevel) { 723 DstLoop = DstLoop->getParentLoop(); 724 DstLevel--; 725 } 726 while (SrcLoop != DstLoop) { 727 SrcLoop = SrcLoop->getParentLoop(); 728 DstLoop = DstLoop->getParentLoop(); 729 SrcLevel--; 730 } 731 CommonLevels = SrcLevel; 732 MaxLevels -= CommonLevels; 733 } 734 735 736 // Given one of the loops containing the source, return 737 // its level index in our numbering scheme. 738 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { 739 return SrcLoop->getLoopDepth(); 740 } 741 742 743 // Given one of the loops containing the destination, 744 // return its level index in our numbering scheme. 745 unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { 746 unsigned D = DstLoop->getLoopDepth(); 747 if (D > CommonLevels) 748 return D - CommonLevels + SrcLevels; 749 else 750 return D; 751 } 752 753 754 // Returns true if Expression is loop invariant in LoopNest. 755 bool DependenceInfo::isLoopInvariant(const SCEV *Expression, 756 const Loop *LoopNest) const { 757 if (!LoopNest) 758 return true; 759 return SE->isLoopInvariant(Expression, LoopNest) && 760 isLoopInvariant(Expression, LoopNest->getParentLoop()); 761 } 762 763 764 765 // Finds the set of loops from the LoopNest that 766 // have a level <= CommonLevels and are referred to by the SCEV Expression. 767 void DependenceInfo::collectCommonLoops(const SCEV *Expression, 768 const Loop *LoopNest, 769 SmallBitVector &Loops) const { 770 while (LoopNest) { 771 unsigned Level = LoopNest->getLoopDepth(); 772 if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest)) 773 Loops.set(Level); 774 LoopNest = LoopNest->getParentLoop(); 775 } 776 } 777 778 void DependenceInfo::unifySubscriptType(ArrayRef<Subscript *> Pairs) { 779 780 unsigned widestWidthSeen = 0; 781 Type *widestType; 782 783 // Go through each pair and find the widest bit to which we need 784 // to extend all of them. 785 for (Subscript *Pair : Pairs) { 786 const SCEV *Src = Pair->Src; 787 const SCEV *Dst = Pair->Dst; 788 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 789 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 790 if (SrcTy == nullptr || DstTy == nullptr) { 791 assert(SrcTy == DstTy && "This function only unify integer types and " 792 "expect Src and Dst share the same type " 793 "otherwise."); 794 continue; 795 } 796 if (SrcTy->getBitWidth() > widestWidthSeen) { 797 widestWidthSeen = SrcTy->getBitWidth(); 798 widestType = SrcTy; 799 } 800 if (DstTy->getBitWidth() > widestWidthSeen) { 801 widestWidthSeen = DstTy->getBitWidth(); 802 widestType = DstTy; 803 } 804 } 805 806 807 assert(widestWidthSeen > 0); 808 809 // Now extend each pair to the widest seen. 810 for (Subscript *Pair : Pairs) { 811 const SCEV *Src = Pair->Src; 812 const SCEV *Dst = Pair->Dst; 813 IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType()); 814 IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType()); 815 if (SrcTy == nullptr || DstTy == nullptr) { 816 assert(SrcTy == DstTy && "This function only unify integer types and " 817 "expect Src and Dst share the same type " 818 "otherwise."); 819 continue; 820 } 821 if (SrcTy->getBitWidth() < widestWidthSeen) 822 // Sign-extend Src to widestType 823 Pair->Src = SE->getSignExtendExpr(Src, widestType); 824 if (DstTy->getBitWidth() < widestWidthSeen) { 825 // Sign-extend Dst to widestType 826 Pair->Dst = SE->getSignExtendExpr(Dst, widestType); 827 } 828 } 829 } 830 831 // removeMatchingExtensions - Examines a subscript pair. 832 // If the source and destination are identically sign (or zero) 833 // extended, it strips off the extension in an effect to simplify 834 // the actual analysis. 835 void DependenceInfo::removeMatchingExtensions(Subscript *Pair) { 836 const SCEV *Src = Pair->Src; 837 const SCEV *Dst = Pair->Dst; 838 if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) || 839 (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) { 840 const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src); 841 const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst); 842 const SCEV *SrcCastOp = SrcCast->getOperand(); 843 const SCEV *DstCastOp = DstCast->getOperand(); 844 if (SrcCastOp->getType() == DstCastOp->getType()) { 845 Pair->Src = SrcCastOp; 846 Pair->Dst = DstCastOp; 847 } 848 } 849 } 850 851 852 // Examine the scev and return true iff it's linear. 853 // Collect any loops mentioned in the set of "Loops". 854 bool DependenceInfo::checkSrcSubscript(const SCEV *Src, const Loop *LoopNest, 855 SmallBitVector &Loops) { 856 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src); 857 if (!AddRec) 858 return isLoopInvariant(Src, LoopNest); 859 const SCEV *Start = AddRec->getStart(); 860 const SCEV *Step = AddRec->getStepRecurrence(*SE); 861 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 862 if (!isa<SCEVCouldNotCompute>(UB)) { 863 if (SE->getTypeSizeInBits(Start->getType()) < 864 SE->getTypeSizeInBits(UB->getType())) { 865 if (!AddRec->getNoWrapFlags()) 866 return false; 867 } 868 } 869 if (!isLoopInvariant(Step, LoopNest)) 870 return false; 871 Loops.set(mapSrcLoop(AddRec->getLoop())); 872 return checkSrcSubscript(Start, LoopNest, Loops); 873 } 874 875 876 877 // Examine the scev and return true iff it's linear. 878 // Collect any loops mentioned in the set of "Loops". 879 bool DependenceInfo::checkDstSubscript(const SCEV *Dst, const Loop *LoopNest, 880 SmallBitVector &Loops) { 881 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst); 882 if (!AddRec) 883 return isLoopInvariant(Dst, LoopNest); 884 const SCEV *Start = AddRec->getStart(); 885 const SCEV *Step = AddRec->getStepRecurrence(*SE); 886 const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop()); 887 if (!isa<SCEVCouldNotCompute>(UB)) { 888 if (SE->getTypeSizeInBits(Start->getType()) < 889 SE->getTypeSizeInBits(UB->getType())) { 890 if (!AddRec->getNoWrapFlags()) 891 return false; 892 } 893 } 894 if (!isLoopInvariant(Step, LoopNest)) 895 return false; 896 Loops.set(mapDstLoop(AddRec->getLoop())); 897 return checkDstSubscript(Start, LoopNest, Loops); 898 } 899 900 901 // Examines the subscript pair (the Src and Dst SCEVs) 902 // and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear. 903 // Collects the associated loops in a set. 904 DependenceInfo::Subscript::ClassificationKind 905 DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, 906 const SCEV *Dst, const Loop *DstLoopNest, 907 SmallBitVector &Loops) { 908 SmallBitVector SrcLoops(MaxLevels + 1); 909 SmallBitVector DstLoops(MaxLevels + 1); 910 if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops)) 911 return Subscript::NonLinear; 912 if (!checkDstSubscript(Dst, DstLoopNest, DstLoops)) 913 return Subscript::NonLinear; 914 Loops = SrcLoops; 915 Loops |= DstLoops; 916 unsigned N = Loops.count(); 917 if (N == 0) 918 return Subscript::ZIV; 919 if (N == 1) 920 return Subscript::SIV; 921 if (N == 2 && (SrcLoops.count() == 0 || 922 DstLoops.count() == 0 || 923 (SrcLoops.count() == 1 && DstLoops.count() == 1))) 924 return Subscript::RDIV; 925 return Subscript::MIV; 926 } 927 928 929 // A wrapper around SCEV::isKnownPredicate. 930 // Looks for cases where we're interested in comparing for equality. 931 // If both X and Y have been identically sign or zero extended, 932 // it strips off the (confusing) extensions before invoking 933 // SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package 934 // will be similarly updated. 935 // 936 // If SCEV::isKnownPredicate can't prove the predicate, 937 // we try simple subtraction, which seems to help in some cases 938 // involving symbolics. 939 bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, 940 const SCEV *Y) const { 941 if (Pred == CmpInst::ICMP_EQ || 942 Pred == CmpInst::ICMP_NE) { 943 if ((isa<SCEVSignExtendExpr>(X) && 944 isa<SCEVSignExtendExpr>(Y)) || 945 (isa<SCEVZeroExtendExpr>(X) && 946 isa<SCEVZeroExtendExpr>(Y))) { 947 const SCEVCastExpr *CX = cast<SCEVCastExpr>(X); 948 const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y); 949 const SCEV *Xop = CX->getOperand(); 950 const SCEV *Yop = CY->getOperand(); 951 if (Xop->getType() == Yop->getType()) { 952 X = Xop; 953 Y = Yop; 954 } 955 } 956 } 957 if (SE->isKnownPredicate(Pred, X, Y)) 958 return true; 959 // If SE->isKnownPredicate can't prove the condition, 960 // we try the brute-force approach of subtracting 961 // and testing the difference. 962 // By testing with SE->isKnownPredicate first, we avoid 963 // the possibility of overflow when the arguments are constants. 964 const SCEV *Delta = SE->getMinusSCEV(X, Y); 965 switch (Pred) { 966 case CmpInst::ICMP_EQ: 967 return Delta->isZero(); 968 case CmpInst::ICMP_NE: 969 return SE->isKnownNonZero(Delta); 970 case CmpInst::ICMP_SGE: 971 return SE->isKnownNonNegative(Delta); 972 case CmpInst::ICMP_SLE: 973 return SE->isKnownNonPositive(Delta); 974 case CmpInst::ICMP_SGT: 975 return SE->isKnownPositive(Delta); 976 case CmpInst::ICMP_SLT: 977 return SE->isKnownNegative(Delta); 978 default: 979 llvm_unreachable("unexpected predicate in isKnownPredicate"); 980 } 981 } 982 983 984 // All subscripts are all the same type. 985 // Loop bound may be smaller (e.g., a char). 986 // Should zero extend loop bound, since it's always >= 0. 987 // This routine collects upper bound and extends or truncates if needed. 988 // Truncating is safe when subscripts are known not to wrap. Cases without 989 // nowrap flags should have been rejected earlier. 990 // Return null if no bound available. 991 const SCEV *DependenceInfo::collectUpperBound(const Loop *L, Type *T) const { 992 if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 993 const SCEV *UB = SE->getBackedgeTakenCount(L); 994 return SE->getTruncateOrZeroExtend(UB, T); 995 } 996 return nullptr; 997 } 998 999 1000 // Calls collectUpperBound(), then attempts to cast it to SCEVConstant. 1001 // If the cast fails, returns NULL. 1002 const SCEVConstant *DependenceInfo::collectConstantUpperBound(const Loop *L, 1003 Type *T) const { 1004 if (const SCEV *UB = collectUpperBound(L, T)) 1005 return dyn_cast<SCEVConstant>(UB); 1006 return nullptr; 1007 } 1008 1009 1010 // testZIV - 1011 // When we have a pair of subscripts of the form [c1] and [c2], 1012 // where c1 and c2 are both loop invariant, we attack it using 1013 // the ZIV test. Basically, we test by comparing the two values, 1014 // but there are actually three possible results: 1015 // 1) the values are equal, so there's a dependence 1016 // 2) the values are different, so there's no dependence 1017 // 3) the values might be equal, so we have to assume a dependence. 1018 // 1019 // Return true if dependence disproved. 1020 bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst, 1021 FullDependence &Result) const { 1022 DEBUG(dbgs() << " src = " << *Src << "\n"); 1023 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 1024 ++ZIVapplications; 1025 if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) { 1026 DEBUG(dbgs() << " provably dependent\n"); 1027 return false; // provably dependent 1028 } 1029 if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) { 1030 DEBUG(dbgs() << " provably independent\n"); 1031 ++ZIVindependence; 1032 return true; // provably independent 1033 } 1034 DEBUG(dbgs() << " possibly dependent\n"); 1035 Result.Consistent = false; 1036 return false; // possibly dependent 1037 } 1038 1039 1040 // strongSIVtest - 1041 // From the paper, Practical Dependence Testing, Section 4.2.1 1042 // 1043 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i], 1044 // where i is an induction variable, c1 and c2 are loop invariant, 1045 // and a is a constant, we can solve it exactly using the Strong SIV test. 1046 // 1047 // Can prove independence. Failing that, can compute distance (and direction). 1048 // In the presence of symbolic terms, we can sometimes make progress. 1049 // 1050 // If there's a dependence, 1051 // 1052 // c1 + a*i = c2 + a*i' 1053 // 1054 // The dependence distance is 1055 // 1056 // d = i' - i = (c1 - c2)/a 1057 // 1058 // A dependence only exists if d is an integer and abs(d) <= U, where U is the 1059 // loop's upper bound. If a dependence exists, the dependence direction is 1060 // defined as 1061 // 1062 // { < if d > 0 1063 // direction = { = if d = 0 1064 // { > if d < 0 1065 // 1066 // Return true if dependence disproved. 1067 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst, 1068 const SCEV *DstConst, const Loop *CurLoop, 1069 unsigned Level, FullDependence &Result, 1070 Constraint &NewConstraint) const { 1071 DEBUG(dbgs() << "\tStrong SIV test\n"); 1072 DEBUG(dbgs() << "\t Coeff = " << *Coeff); 1073 DEBUG(dbgs() << ", " << *Coeff->getType() << "\n"); 1074 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst); 1075 DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n"); 1076 DEBUG(dbgs() << "\t DstConst = " << *DstConst); 1077 DEBUG(dbgs() << ", " << *DstConst->getType() << "\n"); 1078 ++StrongSIVapplications; 1079 assert(0 < Level && Level <= CommonLevels && "level out of range"); 1080 Level--; 1081 1082 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1083 DEBUG(dbgs() << "\t Delta = " << *Delta); 1084 DEBUG(dbgs() << ", " << *Delta->getType() << "\n"); 1085 1086 // check that |Delta| < iteration count 1087 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1088 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound); 1089 DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n"); 1090 const SCEV *AbsDelta = 1091 SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta); 1092 const SCEV *AbsCoeff = 1093 SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff); 1094 const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff); 1095 if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) { 1096 // Distance greater than trip count - no dependence 1097 ++StrongSIVindependence; 1098 ++StrongSIVsuccesses; 1099 return true; 1100 } 1101 } 1102 1103 // Can we compute distance? 1104 if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { 1105 APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); 1106 APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt(); 1107 APInt Distance = ConstDelta; // these need to be initialized 1108 APInt Remainder = ConstDelta; 1109 APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); 1110 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1111 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1112 // Make sure Coeff divides Delta exactly 1113 if (Remainder != 0) { 1114 // Coeff doesn't divide Distance, no dependence 1115 ++StrongSIVindependence; 1116 ++StrongSIVsuccesses; 1117 return true; 1118 } 1119 Result.DV[Level].Distance = SE->getConstant(Distance); 1120 NewConstraint.setDistance(SE->getConstant(Distance), CurLoop); 1121 if (Distance.sgt(0)) 1122 Result.DV[Level].Direction &= Dependence::DVEntry::LT; 1123 else if (Distance.slt(0)) 1124 Result.DV[Level].Direction &= Dependence::DVEntry::GT; 1125 else 1126 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1127 ++StrongSIVsuccesses; 1128 } 1129 else if (Delta->isZero()) { 1130 // since 0/X == 0 1131 Result.DV[Level].Distance = Delta; 1132 NewConstraint.setDistance(Delta, CurLoop); 1133 Result.DV[Level].Direction &= Dependence::DVEntry::EQ; 1134 ++StrongSIVsuccesses; 1135 } 1136 else { 1137 if (Coeff->isOne()) { 1138 DEBUG(dbgs() << "\t Distance = " << *Delta << "\n"); 1139 Result.DV[Level].Distance = Delta; // since X/1 == X 1140 NewConstraint.setDistance(Delta, CurLoop); 1141 } 1142 else { 1143 Result.Consistent = false; 1144 NewConstraint.setLine(Coeff, 1145 SE->getNegativeSCEV(Coeff), 1146 SE->getNegativeSCEV(Delta), CurLoop); 1147 } 1148 1149 // maybe we can get a useful direction 1150 bool DeltaMaybeZero = !SE->isKnownNonZero(Delta); 1151 bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta); 1152 bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta); 1153 bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff); 1154 bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff); 1155 // The double negatives above are confusing. 1156 // It helps to read !SE->isKnownNonZero(Delta) 1157 // as "Delta might be Zero" 1158 unsigned NewDirection = Dependence::DVEntry::NONE; 1159 if ((DeltaMaybePositive && CoeffMaybePositive) || 1160 (DeltaMaybeNegative && CoeffMaybeNegative)) 1161 NewDirection = Dependence::DVEntry::LT; 1162 if (DeltaMaybeZero) 1163 NewDirection |= Dependence::DVEntry::EQ; 1164 if ((DeltaMaybeNegative && CoeffMaybePositive) || 1165 (DeltaMaybePositive && CoeffMaybeNegative)) 1166 NewDirection |= Dependence::DVEntry::GT; 1167 if (NewDirection < Result.DV[Level].Direction) 1168 ++StrongSIVsuccesses; 1169 Result.DV[Level].Direction &= NewDirection; 1170 } 1171 return false; 1172 } 1173 1174 1175 // weakCrossingSIVtest - 1176 // From the paper, Practical Dependence Testing, Section 4.2.2 1177 // 1178 // When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i], 1179 // where i is an induction variable, c1 and c2 are loop invariant, 1180 // and a is a constant, we can solve it exactly using the 1181 // Weak-Crossing SIV test. 1182 // 1183 // Given c1 + a*i = c2 - a*i', we can look for the intersection of 1184 // the two lines, where i = i', yielding 1185 // 1186 // c1 + a*i = c2 - a*i 1187 // 2a*i = c2 - c1 1188 // i = (c2 - c1)/2a 1189 // 1190 // If i < 0, there is no dependence. 1191 // If i > upperbound, there is no dependence. 1192 // If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0. 1193 // If i = upperbound, there's a dependence with distance = 0. 1194 // If i is integral, there's a dependence (all directions). 1195 // If the non-integer part = 1/2, there's a dependence (<> directions). 1196 // Otherwise, there's no dependence. 1197 // 1198 // Can prove independence. Failing that, 1199 // can sometimes refine the directions. 1200 // Can determine iteration for splitting. 1201 // 1202 // Return true if dependence disproved. 1203 bool DependenceInfo::weakCrossingSIVtest( 1204 const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst, 1205 const Loop *CurLoop, unsigned Level, FullDependence &Result, 1206 Constraint &NewConstraint, const SCEV *&SplitIter) const { 1207 DEBUG(dbgs() << "\tWeak-Crossing SIV test\n"); 1208 DEBUG(dbgs() << "\t Coeff = " << *Coeff << "\n"); 1209 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1210 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1211 ++WeakCrossingSIVapplications; 1212 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1213 Level--; 1214 Result.Consistent = false; 1215 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1216 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1217 NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop); 1218 if (Delta->isZero()) { 1219 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1220 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1221 ++WeakCrossingSIVsuccesses; 1222 if (!Result.DV[Level].Direction) { 1223 ++WeakCrossingSIVindependence; 1224 return true; 1225 } 1226 Result.DV[Level].Distance = Delta; // = 0 1227 return false; 1228 } 1229 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff); 1230 if (!ConstCoeff) 1231 return false; 1232 1233 Result.DV[Level].Splitable = true; 1234 if (SE->isKnownNegative(ConstCoeff)) { 1235 ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff)); 1236 assert(ConstCoeff && 1237 "dynamic cast of negative of ConstCoeff should yield constant"); 1238 Delta = SE->getNegativeSCEV(Delta); 1239 } 1240 assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive"); 1241 1242 // compute SplitIter for use by DependenceInfo::getSplitIteration() 1243 SplitIter = SE->getUDivExpr( 1244 SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), 1245 SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff)); 1246 DEBUG(dbgs() << "\t Split iter = " << *SplitIter << "\n"); 1247 1248 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1249 if (!ConstDelta) 1250 return false; 1251 1252 // We're certain that ConstCoeff > 0; therefore, 1253 // if Delta < 0, then no dependence. 1254 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1255 DEBUG(dbgs() << "\t ConstCoeff = " << *ConstCoeff << "\n"); 1256 if (SE->isKnownNegative(Delta)) { 1257 // No dependence, Delta < 0 1258 ++WeakCrossingSIVindependence; 1259 ++WeakCrossingSIVsuccesses; 1260 return true; 1261 } 1262 1263 // We're certain that Delta > 0 and ConstCoeff > 0. 1264 // Check Delta/(2*ConstCoeff) against upper loop bound 1265 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1266 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1267 const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2); 1268 const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound), 1269 ConstantTwo); 1270 DEBUG(dbgs() << "\t ML = " << *ML << "\n"); 1271 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) { 1272 // Delta too big, no dependence 1273 ++WeakCrossingSIVindependence; 1274 ++WeakCrossingSIVsuccesses; 1275 return true; 1276 } 1277 if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) { 1278 // i = i' = UB 1279 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT); 1280 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT); 1281 ++WeakCrossingSIVsuccesses; 1282 if (!Result.DV[Level].Direction) { 1283 ++WeakCrossingSIVindependence; 1284 return true; 1285 } 1286 Result.DV[Level].Splitable = false; 1287 Result.DV[Level].Distance = SE->getZero(Delta->getType()); 1288 return false; 1289 } 1290 } 1291 1292 // check that Coeff divides Delta 1293 APInt APDelta = ConstDelta->getAPInt(); 1294 APInt APCoeff = ConstCoeff->getAPInt(); 1295 APInt Distance = APDelta; // these need to be initialzed 1296 APInt Remainder = APDelta; 1297 APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); 1298 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1299 if (Remainder != 0) { 1300 // Coeff doesn't divide Delta, no dependence 1301 ++WeakCrossingSIVindependence; 1302 ++WeakCrossingSIVsuccesses; 1303 return true; 1304 } 1305 DEBUG(dbgs() << "\t Distance = " << Distance << "\n"); 1306 1307 // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible 1308 APInt Two = APInt(Distance.getBitWidth(), 2, true); 1309 Remainder = Distance.srem(Two); 1310 DEBUG(dbgs() << "\t Remainder = " << Remainder << "\n"); 1311 if (Remainder != 0) { 1312 // Equal direction isn't possible 1313 Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ); 1314 ++WeakCrossingSIVsuccesses; 1315 } 1316 return false; 1317 } 1318 1319 1320 // Kirch's algorithm, from 1321 // 1322 // Optimizing Supercompilers for Supercomputers 1323 // Michael Wolfe 1324 // MIT Press, 1989 1325 // 1326 // Program 2.1, page 29. 1327 // Computes the GCD of AM and BM. 1328 // Also finds a solution to the equation ax - by = gcd(a, b). 1329 // Returns true if dependence disproved; i.e., gcd does not divide Delta. 1330 static bool findGCD(unsigned Bits, const APInt &AM, const APInt &BM, 1331 const APInt &Delta, APInt &G, APInt &X, APInt &Y) { 1332 APInt A0(Bits, 1, true), A1(Bits, 0, true); 1333 APInt B0(Bits, 0, true), B1(Bits, 1, true); 1334 APInt G0 = AM.abs(); 1335 APInt G1 = BM.abs(); 1336 APInt Q = G0; // these need to be initialized 1337 APInt R = G0; 1338 APInt::sdivrem(G0, G1, Q, R); 1339 while (R != 0) { 1340 APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2; 1341 APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2; 1342 G0 = G1; G1 = R; 1343 APInt::sdivrem(G0, G1, Q, R); 1344 } 1345 G = G1; 1346 DEBUG(dbgs() << "\t GCD = " << G << "\n"); 1347 X = AM.slt(0) ? -A1 : A1; 1348 Y = BM.slt(0) ? B1 : -B1; 1349 1350 // make sure gcd divides Delta 1351 R = Delta.srem(G); 1352 if (R != 0) 1353 return true; // gcd doesn't divide Delta, no dependence 1354 Q = Delta.sdiv(G); 1355 X *= Q; 1356 Y *= Q; 1357 return false; 1358 } 1359 1360 static APInt floorOfQuotient(const APInt &A, const APInt &B) { 1361 APInt Q = A; // these need to be initialized 1362 APInt R = A; 1363 APInt::sdivrem(A, B, Q, R); 1364 if (R == 0) 1365 return Q; 1366 if ((A.sgt(0) && B.sgt(0)) || 1367 (A.slt(0) && B.slt(0))) 1368 return Q; 1369 else 1370 return Q - 1; 1371 } 1372 1373 static APInt ceilingOfQuotient(const APInt &A, const APInt &B) { 1374 APInt Q = A; // these need to be initialized 1375 APInt R = A; 1376 APInt::sdivrem(A, B, Q, R); 1377 if (R == 0) 1378 return Q; 1379 if ((A.sgt(0) && B.sgt(0)) || 1380 (A.slt(0) && B.slt(0))) 1381 return Q + 1; 1382 else 1383 return Q; 1384 } 1385 1386 1387 static 1388 APInt maxAPInt(APInt A, APInt B) { 1389 return A.sgt(B) ? A : B; 1390 } 1391 1392 1393 static 1394 APInt minAPInt(APInt A, APInt B) { 1395 return A.slt(B) ? A : B; 1396 } 1397 1398 1399 // exactSIVtest - 1400 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i], 1401 // where i is an induction variable, c1 and c2 are loop invariant, and a1 1402 // and a2 are constant, we can solve it exactly using an algorithm developed 1403 // by Banerjee and Wolfe. See Section 2.5.3 in 1404 // 1405 // Optimizing Supercompilers for Supercomputers 1406 // Michael Wolfe 1407 // MIT Press, 1989 1408 // 1409 // It's slower than the specialized tests (strong SIV, weak-zero SIV, etc), 1410 // so use them if possible. They're also a bit better with symbolics and, 1411 // in the case of the strong SIV test, can compute Distances. 1412 // 1413 // Return true if dependence disproved. 1414 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1415 const SCEV *SrcConst, const SCEV *DstConst, 1416 const Loop *CurLoop, unsigned Level, 1417 FullDependence &Result, 1418 Constraint &NewConstraint) const { 1419 DEBUG(dbgs() << "\tExact SIV test\n"); 1420 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1421 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1422 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1423 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1424 ++ExactSIVapplications; 1425 assert(0 < Level && Level <= CommonLevels && "Level out of range"); 1426 Level--; 1427 Result.Consistent = false; 1428 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1429 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1430 NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), 1431 Delta, CurLoop); 1432 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1433 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1434 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1435 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1436 return false; 1437 1438 // find gcd 1439 APInt G, X, Y; 1440 APInt AM = ConstSrcCoeff->getAPInt(); 1441 APInt BM = ConstDstCoeff->getAPInt(); 1442 unsigned Bits = AM.getBitWidth(); 1443 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1444 // gcd doesn't divide Delta, no dependence 1445 ++ExactSIVindependence; 1446 ++ExactSIVsuccesses; 1447 return true; 1448 } 1449 1450 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1451 1452 // since SCEV construction normalizes, LM = 0 1453 APInt UM(Bits, 1, true); 1454 bool UMvalid = false; 1455 // UM is perhaps unavailable, let's check 1456 if (const SCEVConstant *CUB = 1457 collectConstantUpperBound(CurLoop, Delta->getType())) { 1458 UM = CUB->getAPInt(); 1459 DEBUG(dbgs() << "\t UM = " << UM << "\n"); 1460 UMvalid = true; 1461 } 1462 1463 APInt TU(APInt::getSignedMaxValue(Bits)); 1464 APInt TL(APInt::getSignedMinValue(Bits)); 1465 1466 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1467 APInt TMUL = BM.sdiv(G); 1468 if (TMUL.sgt(0)) { 1469 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1470 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1471 if (UMvalid) { 1472 TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL)); 1473 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1474 } 1475 } 1476 else { 1477 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1478 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1479 if (UMvalid) { 1480 TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL)); 1481 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1482 } 1483 } 1484 1485 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1486 TMUL = AM.sdiv(G); 1487 if (TMUL.sgt(0)) { 1488 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1489 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1490 if (UMvalid) { 1491 TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL)); 1492 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1493 } 1494 } 1495 else { 1496 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1497 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1498 if (UMvalid) { 1499 TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL)); 1500 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1501 } 1502 } 1503 if (TL.sgt(TU)) { 1504 ++ExactSIVindependence; 1505 ++ExactSIVsuccesses; 1506 return true; 1507 } 1508 1509 // explore directions 1510 unsigned NewDirection = Dependence::DVEntry::NONE; 1511 1512 // less than 1513 APInt SaveTU(TU); // save these 1514 APInt SaveTL(TL); 1515 DEBUG(dbgs() << "\t exploring LT direction\n"); 1516 TMUL = AM - BM; 1517 if (TMUL.sgt(0)) { 1518 TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL)); 1519 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1520 } 1521 else { 1522 TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL)); 1523 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1524 } 1525 if (TL.sle(TU)) { 1526 NewDirection |= Dependence::DVEntry::LT; 1527 ++ExactSIVsuccesses; 1528 } 1529 1530 // equal 1531 TU = SaveTU; // restore 1532 TL = SaveTL; 1533 DEBUG(dbgs() << "\t exploring EQ direction\n"); 1534 if (TMUL.sgt(0)) { 1535 TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL)); 1536 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1537 } 1538 else { 1539 TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL)); 1540 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1541 } 1542 TMUL = BM - AM; 1543 if (TMUL.sgt(0)) { 1544 TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL)); 1545 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1546 } 1547 else { 1548 TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL)); 1549 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1550 } 1551 if (TL.sle(TU)) { 1552 NewDirection |= Dependence::DVEntry::EQ; 1553 ++ExactSIVsuccesses; 1554 } 1555 1556 // greater than 1557 TU = SaveTU; // restore 1558 TL = SaveTL; 1559 DEBUG(dbgs() << "\t exploring GT direction\n"); 1560 if (TMUL.sgt(0)) { 1561 TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL)); 1562 DEBUG(dbgs() << "\t\t TL = " << TL << "\n"); 1563 } 1564 else { 1565 TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL)); 1566 DEBUG(dbgs() << "\t\t TU = " << TU << "\n"); 1567 } 1568 if (TL.sle(TU)) { 1569 NewDirection |= Dependence::DVEntry::GT; 1570 ++ExactSIVsuccesses; 1571 } 1572 1573 // finished 1574 Result.DV[Level].Direction &= NewDirection; 1575 if (Result.DV[Level].Direction == Dependence::DVEntry::NONE) 1576 ++ExactSIVindependence; 1577 return Result.DV[Level].Direction == Dependence::DVEntry::NONE; 1578 } 1579 1580 1581 1582 // Return true if the divisor evenly divides the dividend. 1583 static 1584 bool isRemainderZero(const SCEVConstant *Dividend, 1585 const SCEVConstant *Divisor) { 1586 const APInt &ConstDividend = Dividend->getAPInt(); 1587 const APInt &ConstDivisor = Divisor->getAPInt(); 1588 return ConstDividend.srem(ConstDivisor) == 0; 1589 } 1590 1591 1592 // weakZeroSrcSIVtest - 1593 // From the paper, Practical Dependence Testing, Section 4.2.2 1594 // 1595 // When we have a pair of subscripts of the form [c1] and [c2 + a*i], 1596 // where i is an induction variable, c1 and c2 are loop invariant, 1597 // and a is a constant, we can solve it exactly using the 1598 // Weak-Zero SIV test. 1599 // 1600 // Given 1601 // 1602 // c1 = c2 + a*i 1603 // 1604 // we get 1605 // 1606 // (c1 - c2)/a = i 1607 // 1608 // If i is not an integer, there's no dependence. 1609 // If i < 0 or > UB, there's no dependence. 1610 // If i = 0, the direction is <= and peeling the 1611 // 1st iteration will break the dependence. 1612 // If i = UB, the direction is >= and peeling the 1613 // last iteration will break the dependence. 1614 // Otherwise, the direction is *. 1615 // 1616 // Can prove independence. Failing that, we can sometimes refine 1617 // the directions. Can sometimes show that first or last 1618 // iteration carries all the dependences (so worth peeling). 1619 // 1620 // (see also weakZeroDstSIVtest) 1621 // 1622 // Return true if dependence disproved. 1623 bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff, 1624 const SCEV *SrcConst, 1625 const SCEV *DstConst, 1626 const Loop *CurLoop, unsigned Level, 1627 FullDependence &Result, 1628 Constraint &NewConstraint) const { 1629 // For the WeakSIV test, it's possible the loop isn't common to 1630 // the Src and Dst loops. If it isn't, then there's no need to 1631 // record a direction. 1632 DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n"); 1633 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << "\n"); 1634 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1635 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1636 ++WeakZeroSIVapplications; 1637 assert(0 < Level && Level <= MaxLevels && "Level out of range"); 1638 Level--; 1639 Result.Consistent = false; 1640 const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); 1641 NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, 1642 CurLoop); 1643 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1644 if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) { 1645 if (Level < CommonLevels) { 1646 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1647 Result.DV[Level].PeelFirst = true; 1648 ++WeakZeroSIVsuccesses; 1649 } 1650 return false; // dependences caused by first iteration 1651 } 1652 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1653 if (!ConstCoeff) 1654 return false; 1655 const SCEV *AbsCoeff = 1656 SE->isKnownNegative(ConstCoeff) ? 1657 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1658 const SCEV *NewDelta = 1659 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1660 1661 // check that Delta/SrcCoeff < iteration count 1662 // really check NewDelta < count*AbsCoeff 1663 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1664 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1665 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1666 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1667 ++WeakZeroSIVindependence; 1668 ++WeakZeroSIVsuccesses; 1669 return true; 1670 } 1671 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1672 // dependences caused by last iteration 1673 if (Level < CommonLevels) { 1674 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1675 Result.DV[Level].PeelLast = true; 1676 ++WeakZeroSIVsuccesses; 1677 } 1678 return false; 1679 } 1680 } 1681 1682 // check that Delta/SrcCoeff >= 0 1683 // really check that NewDelta >= 0 1684 if (SE->isKnownNegative(NewDelta)) { 1685 // No dependence, newDelta < 0 1686 ++WeakZeroSIVindependence; 1687 ++WeakZeroSIVsuccesses; 1688 return true; 1689 } 1690 1691 // if SrcCoeff doesn't divide Delta, then no dependence 1692 if (isa<SCEVConstant>(Delta) && 1693 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1694 ++WeakZeroSIVindependence; 1695 ++WeakZeroSIVsuccesses; 1696 return true; 1697 } 1698 return false; 1699 } 1700 1701 1702 // weakZeroDstSIVtest - 1703 // From the paper, Practical Dependence Testing, Section 4.2.2 1704 // 1705 // When we have a pair of subscripts of the form [c1 + a*i] and [c2], 1706 // where i is an induction variable, c1 and c2 are loop invariant, 1707 // and a is a constant, we can solve it exactly using the 1708 // Weak-Zero SIV test. 1709 // 1710 // Given 1711 // 1712 // c1 + a*i = c2 1713 // 1714 // we get 1715 // 1716 // i = (c2 - c1)/a 1717 // 1718 // If i is not an integer, there's no dependence. 1719 // If i < 0 or > UB, there's no dependence. 1720 // If i = 0, the direction is <= and peeling the 1721 // 1st iteration will break the dependence. 1722 // If i = UB, the direction is >= and peeling the 1723 // last iteration will break the dependence. 1724 // Otherwise, the direction is *. 1725 // 1726 // Can prove independence. Failing that, we can sometimes refine 1727 // the directions. Can sometimes show that first or last 1728 // iteration carries all the dependences (so worth peeling). 1729 // 1730 // (see also weakZeroSrcSIVtest) 1731 // 1732 // Return true if dependence disproved. 1733 bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff, 1734 const SCEV *SrcConst, 1735 const SCEV *DstConst, 1736 const Loop *CurLoop, unsigned Level, 1737 FullDependence &Result, 1738 Constraint &NewConstraint) const { 1739 // For the WeakSIV test, it's possible the loop isn't common to the 1740 // Src and Dst loops. If it isn't, then there's no need to record a direction. 1741 DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n"); 1742 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << "\n"); 1743 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1744 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1745 ++WeakZeroSIVapplications; 1746 assert(0 < Level && Level <= SrcLevels && "Level out of range"); 1747 Level--; 1748 Result.Consistent = false; 1749 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1750 NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, 1751 CurLoop); 1752 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1753 if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) { 1754 if (Level < CommonLevels) { 1755 Result.DV[Level].Direction &= Dependence::DVEntry::LE; 1756 Result.DV[Level].PeelFirst = true; 1757 ++WeakZeroSIVsuccesses; 1758 } 1759 return false; // dependences caused by first iteration 1760 } 1761 const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1762 if (!ConstCoeff) 1763 return false; 1764 const SCEV *AbsCoeff = 1765 SE->isKnownNegative(ConstCoeff) ? 1766 SE->getNegativeSCEV(ConstCoeff) : ConstCoeff; 1767 const SCEV *NewDelta = 1768 SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta; 1769 1770 // check that Delta/SrcCoeff < iteration count 1771 // really check NewDelta < count*AbsCoeff 1772 if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) { 1773 DEBUG(dbgs() << "\t UpperBound = " << *UpperBound << "\n"); 1774 const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound); 1775 if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) { 1776 ++WeakZeroSIVindependence; 1777 ++WeakZeroSIVsuccesses; 1778 return true; 1779 } 1780 if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) { 1781 // dependences caused by last iteration 1782 if (Level < CommonLevels) { 1783 Result.DV[Level].Direction &= Dependence::DVEntry::GE; 1784 Result.DV[Level].PeelLast = true; 1785 ++WeakZeroSIVsuccesses; 1786 } 1787 return false; 1788 } 1789 } 1790 1791 // check that Delta/SrcCoeff >= 0 1792 // really check that NewDelta >= 0 1793 if (SE->isKnownNegative(NewDelta)) { 1794 // No dependence, newDelta < 0 1795 ++WeakZeroSIVindependence; 1796 ++WeakZeroSIVsuccesses; 1797 return true; 1798 } 1799 1800 // if SrcCoeff doesn't divide Delta, then no dependence 1801 if (isa<SCEVConstant>(Delta) && 1802 !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) { 1803 ++WeakZeroSIVindependence; 1804 ++WeakZeroSIVsuccesses; 1805 return true; 1806 } 1807 return false; 1808 } 1809 1810 1811 // exactRDIVtest - Tests the RDIV subscript pair for dependence. 1812 // Things of the form [c1 + a*i] and [c2 + b*j], 1813 // where i and j are induction variable, c1 and c2 are loop invariant, 1814 // and a and b are constants. 1815 // Returns true if any possible dependence is disproved. 1816 // Marks the result as inconsistent. 1817 // Works in some cases that symbolicRDIVtest doesn't, and vice versa. 1818 bool DependenceInfo::exactRDIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff, 1819 const SCEV *SrcConst, const SCEV *DstConst, 1820 const Loop *SrcLoop, const Loop *DstLoop, 1821 FullDependence &Result) const { 1822 DEBUG(dbgs() << "\tExact RDIV test\n"); 1823 DEBUG(dbgs() << "\t SrcCoeff = " << *SrcCoeff << " = AM\n"); 1824 DEBUG(dbgs() << "\t DstCoeff = " << *DstCoeff << " = BM\n"); 1825 DEBUG(dbgs() << "\t SrcConst = " << *SrcConst << "\n"); 1826 DEBUG(dbgs() << "\t DstConst = " << *DstConst << "\n"); 1827 ++ExactRDIVapplications; 1828 Result.Consistent = false; 1829 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 1830 DEBUG(dbgs() << "\t Delta = " << *Delta << "\n"); 1831 const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); 1832 const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff); 1833 const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff); 1834 if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff) 1835 return false; 1836 1837 // find gcd 1838 APInt G, X, Y; 1839 APInt AM = ConstSrcCoeff->getAPInt(); 1840 APInt BM = ConstDstCoeff->getAPInt(); 1841 unsigned Bits = AM.getBitWidth(); 1842 if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) { 1843 // gcd doesn't divide Delta, no dependence 1844 ++ExactRDIVindependence; 1845 return true; 1846 } 1847 1848 DEBUG(dbgs() << "\t X = " << X << ", Y = " << Y << "\n"); 1849 1850 // since SCEV construction seems to normalize, LM = 0 1851 APInt SrcUM(Bits, 1, true); 1852 bool SrcUMvalid = false; 1853 // SrcUM is perhaps unavailable, let's check 1854 if (const SCEVConstant *UpperBound = 1855 collectConstantUpperBound(SrcLoop, Delta->getType())) { 1856 SrcUM = UpperBound->getAPInt(); 1857 DEBUG(dbgs() << "\t SrcUM = " << SrcUM << "\n"); 1858 SrcUMvalid = true; 1859 } 1860 1861 APInt DstUM(Bits, 1, true); 1862 bool DstUMvalid = false; 1863 // UM is perhaps unavailable, let's check 1864 if (const SCEVConstant *UpperBound = 1865 collectConstantUpperBound(DstLoop, Delta->getType())) { 1866 DstUM = UpperBound->getAPInt(); 1867 DEBUG(dbgs() << "\t DstUM = " << DstUM << "\n"); 1868 DstUMvalid = true; 1869 } 1870 1871 APInt TU(APInt::getSignedMaxValue(Bits)); 1872 APInt TL(APInt::getSignedMinValue(Bits)); 1873 1874 // test(BM/G, LM-X) and test(-BM/G, X-UM) 1875 APInt TMUL = BM.sdiv(G); 1876 if (TMUL.sgt(0)) { 1877 TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL)); 1878 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1879 if (SrcUMvalid) { 1880 TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL)); 1881 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1882 } 1883 } 1884 else { 1885 TU = minAPInt(TU, floorOfQuotient(-X, TMUL)); 1886 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1887 if (SrcUMvalid) { 1888 TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL)); 1889 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1890 } 1891 } 1892 1893 // test(AM/G, LM-Y) and test(-AM/G, Y-UM) 1894 TMUL = AM.sdiv(G); 1895 if (TMUL.sgt(0)) { 1896 TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL)); 1897 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1898 if (DstUMvalid) { 1899 TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL)); 1900 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1901 } 1902 } 1903 else { 1904 TU = minAPInt(TU, floorOfQuotient(-Y, TMUL)); 1905 DEBUG(dbgs() << "\t TU = " << TU << "\n"); 1906 if (DstUMvalid) { 1907 TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL)); 1908 DEBUG(dbgs() << "\t TL = " << TL << "\n"); 1909 } 1910 } 1911 if (TL.sgt(TU)) 1912 ++ExactRDIVindependence; 1913 return TL.sgt(TU); 1914 } 1915 1916 1917 // symbolicRDIVtest - 1918 // In Section 4.5 of the Practical Dependence Testing paper,the authors 1919 // introduce a special case of Banerjee's Inequalities (also called the 1920 // Extreme-Value Test) that can handle some of the SIV and RDIV cases, 1921 // particularly cases with symbolics. Since it's only able to disprove 1922 // dependence (not compute distances or directions), we'll use it as a 1923 // fall back for the other tests. 1924 // 1925 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 1926 // where i and j are induction variables and c1 and c2 are loop invariants, 1927 // we can use the symbolic tests to disprove some dependences, serving as a 1928 // backup for the RDIV test. Note that i and j can be the same variable, 1929 // letting this test serve as a backup for the various SIV tests. 1930 // 1931 // For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some 1932 // 0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized) 1933 // loop bounds for the i and j loops, respectively. So, ... 1934 // 1935 // c1 + a1*i = c2 + a2*j 1936 // a1*i - a2*j = c2 - c1 1937 // 1938 // To test for a dependence, we compute c2 - c1 and make sure it's in the 1939 // range of the maximum and minimum possible values of a1*i - a2*j. 1940 // Considering the signs of a1 and a2, we have 4 possible cases: 1941 // 1942 // 1) If a1 >= 0 and a2 >= 0, then 1943 // a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0 1944 // -a2*N2 <= c2 - c1 <= a1*N1 1945 // 1946 // 2) If a1 >= 0 and a2 <= 0, then 1947 // a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2 1948 // 0 <= c2 - c1 <= a1*N1 - a2*N2 1949 // 1950 // 3) If a1 <= 0 and a2 >= 0, then 1951 // a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0 1952 // a1*N1 - a2*N2 <= c2 - c1 <= 0 1953 // 1954 // 4) If a1 <= 0 and a2 <= 0, then 1955 // a1*N1 - a2*0 <= c2 - c1 <= a1*0 - a2*N2 1956 // a1*N1 <= c2 - c1 <= -a2*N2 1957 // 1958 // return true if dependence disproved 1959 bool DependenceInfo::symbolicRDIVtest(const SCEV *A1, const SCEV *A2, 1960 const SCEV *C1, const SCEV *C2, 1961 const Loop *Loop1, 1962 const Loop *Loop2) const { 1963 ++SymbolicRDIVapplications; 1964 DEBUG(dbgs() << "\ttry symbolic RDIV test\n"); 1965 DEBUG(dbgs() << "\t A1 = " << *A1); 1966 DEBUG(dbgs() << ", type = " << *A1->getType() << "\n"); 1967 DEBUG(dbgs() << "\t A2 = " << *A2 << "\n"); 1968 DEBUG(dbgs() << "\t C1 = " << *C1 << "\n"); 1969 DEBUG(dbgs() << "\t C2 = " << *C2 << "\n"); 1970 const SCEV *N1 = collectUpperBound(Loop1, A1->getType()); 1971 const SCEV *N2 = collectUpperBound(Loop2, A1->getType()); 1972 DEBUG(if (N1) dbgs() << "\t N1 = " << *N1 << "\n"); 1973 DEBUG(if (N2) dbgs() << "\t N2 = " << *N2 << "\n"); 1974 const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1); 1975 const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2); 1976 DEBUG(dbgs() << "\t C2 - C1 = " << *C2_C1 << "\n"); 1977 DEBUG(dbgs() << "\t C1 - C2 = " << *C1_C2 << "\n"); 1978 if (SE->isKnownNonNegative(A1)) { 1979 if (SE->isKnownNonNegative(A2)) { 1980 // A1 >= 0 && A2 >= 0 1981 if (N1) { 1982 // make sure that c2 - c1 <= a1*N1 1983 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 1984 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 1985 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) { 1986 ++SymbolicRDIVindependence; 1987 return true; 1988 } 1989 } 1990 if (N2) { 1991 // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2 1992 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 1993 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 1994 if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) { 1995 ++SymbolicRDIVindependence; 1996 return true; 1997 } 1998 } 1999 } 2000 else if (SE->isKnownNonPositive(A2)) { 2001 // a1 >= 0 && a2 <= 0 2002 if (N1 && N2) { 2003 // make sure that c2 - c1 <= a1*N1 - a2*N2 2004 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2005 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2006 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2007 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2008 if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) { 2009 ++SymbolicRDIVindependence; 2010 return true; 2011 } 2012 } 2013 // make sure that 0 <= c2 - c1 2014 if (SE->isKnownNegative(C2_C1)) { 2015 ++SymbolicRDIVindependence; 2016 return true; 2017 } 2018 } 2019 } 2020 else if (SE->isKnownNonPositive(A1)) { 2021 if (SE->isKnownNonNegative(A2)) { 2022 // a1 <= 0 && a2 >= 0 2023 if (N1 && N2) { 2024 // make sure that a1*N1 - a2*N2 <= c2 - c1 2025 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2026 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2027 const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2); 2028 DEBUG(dbgs() << "\t A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n"); 2029 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) { 2030 ++SymbolicRDIVindependence; 2031 return true; 2032 } 2033 } 2034 // make sure that c2 - c1 <= 0 2035 if (SE->isKnownPositive(C2_C1)) { 2036 ++SymbolicRDIVindependence; 2037 return true; 2038 } 2039 } 2040 else if (SE->isKnownNonPositive(A2)) { 2041 // a1 <= 0 && a2 <= 0 2042 if (N1) { 2043 // make sure that a1*N1 <= c2 - c1 2044 const SCEV *A1N1 = SE->getMulExpr(A1, N1); 2045 DEBUG(dbgs() << "\t A1*N1 = " << *A1N1 << "\n"); 2046 if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) { 2047 ++SymbolicRDIVindependence; 2048 return true; 2049 } 2050 } 2051 if (N2) { 2052 // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2 2053 const SCEV *A2N2 = SE->getMulExpr(A2, N2); 2054 DEBUG(dbgs() << "\t A2*N2 = " << *A2N2 << "\n"); 2055 if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) { 2056 ++SymbolicRDIVindependence; 2057 return true; 2058 } 2059 } 2060 } 2061 } 2062 return false; 2063 } 2064 2065 2066 // testSIV - 2067 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i] 2068 // where i is an induction variable, c1 and c2 are loop invariant, and a1 and 2069 // a2 are constant, we attack it with an SIV test. While they can all be 2070 // solved with the Exact SIV test, it's worthwhile to use simpler tests when 2071 // they apply; they're cheaper and sometimes more precise. 2072 // 2073 // Return true if dependence disproved. 2074 bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level, 2075 FullDependence &Result, Constraint &NewConstraint, 2076 const SCEV *&SplitIter) const { 2077 DEBUG(dbgs() << " src = " << *Src << "\n"); 2078 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2079 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2080 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2081 if (SrcAddRec && DstAddRec) { 2082 const SCEV *SrcConst = SrcAddRec->getStart(); 2083 const SCEV *DstConst = DstAddRec->getStart(); 2084 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2085 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2086 const Loop *CurLoop = SrcAddRec->getLoop(); 2087 assert(CurLoop == DstAddRec->getLoop() && 2088 "both loops in SIV should be same"); 2089 Level = mapSrcLoop(CurLoop); 2090 bool disproven; 2091 if (SrcCoeff == DstCoeff) 2092 disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2093 Level, Result, NewConstraint); 2094 else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff)) 2095 disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2096 Level, Result, NewConstraint, SplitIter); 2097 else 2098 disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, 2099 Level, Result, NewConstraint); 2100 return disproven || 2101 gcdMIVtest(Src, Dst, Result) || 2102 symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop); 2103 } 2104 if (SrcAddRec) { 2105 const SCEV *SrcConst = SrcAddRec->getStart(); 2106 const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2107 const SCEV *DstConst = Dst; 2108 const Loop *CurLoop = SrcAddRec->getLoop(); 2109 Level = mapSrcLoop(CurLoop); 2110 return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, 2111 Level, Result, NewConstraint) || 2112 gcdMIVtest(Src, Dst, Result); 2113 } 2114 if (DstAddRec) { 2115 const SCEV *DstConst = DstAddRec->getStart(); 2116 const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE); 2117 const SCEV *SrcConst = Src; 2118 const Loop *CurLoop = DstAddRec->getLoop(); 2119 Level = mapDstLoop(CurLoop); 2120 return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, 2121 CurLoop, Level, Result, NewConstraint) || 2122 gcdMIVtest(Src, Dst, Result); 2123 } 2124 llvm_unreachable("SIV test expected at least one AddRec"); 2125 return false; 2126 } 2127 2128 2129 // testRDIV - 2130 // When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j] 2131 // where i and j are induction variables, c1 and c2 are loop invariant, 2132 // and a1 and a2 are constant, we can solve it exactly with an easy adaptation 2133 // of the Exact SIV test, the Restricted Double Index Variable (RDIV) test. 2134 // It doesn't make sense to talk about distance or direction in this case, 2135 // so there's no point in making special versions of the Strong SIV test or 2136 // the Weak-crossing SIV test. 2137 // 2138 // With minor algebra, this test can also be used for things like 2139 // [c1 + a1*i + a2*j][c2]. 2140 // 2141 // Return true if dependence disproved. 2142 bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst, 2143 FullDependence &Result) const { 2144 // we have 3 possible situations here: 2145 // 1) [a*i + b] and [c*j + d] 2146 // 2) [a*i + c*j + b] and [d] 2147 // 3) [b] and [a*i + c*j + d] 2148 // We need to find what we've got and get organized 2149 2150 const SCEV *SrcConst, *DstConst; 2151 const SCEV *SrcCoeff, *DstCoeff; 2152 const Loop *SrcLoop, *DstLoop; 2153 2154 DEBUG(dbgs() << " src = " << *Src << "\n"); 2155 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2156 const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src); 2157 const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst); 2158 if (SrcAddRec && DstAddRec) { 2159 SrcConst = SrcAddRec->getStart(); 2160 SrcCoeff = SrcAddRec->getStepRecurrence(*SE); 2161 SrcLoop = SrcAddRec->getLoop(); 2162 DstConst = DstAddRec->getStart(); 2163 DstCoeff = DstAddRec->getStepRecurrence(*SE); 2164 DstLoop = DstAddRec->getLoop(); 2165 } 2166 else if (SrcAddRec) { 2167 if (const SCEVAddRecExpr *tmpAddRec = 2168 dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) { 2169 SrcConst = tmpAddRec->getStart(); 2170 SrcCoeff = tmpAddRec->getStepRecurrence(*SE); 2171 SrcLoop = tmpAddRec->getLoop(); 2172 DstConst = Dst; 2173 DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE)); 2174 DstLoop = SrcAddRec->getLoop(); 2175 } 2176 else 2177 llvm_unreachable("RDIV reached by surprising SCEVs"); 2178 } 2179 else if (DstAddRec) { 2180 if (const SCEVAddRecExpr *tmpAddRec = 2181 dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) { 2182 DstConst = tmpAddRec->getStart(); 2183 DstCoeff = tmpAddRec->getStepRecurrence(*SE); 2184 DstLoop = tmpAddRec->getLoop(); 2185 SrcConst = Src; 2186 SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE)); 2187 SrcLoop = DstAddRec->getLoop(); 2188 } 2189 else 2190 llvm_unreachable("RDIV reached by surprising SCEVs"); 2191 } 2192 else 2193 llvm_unreachable("RDIV expected at least one AddRec"); 2194 return exactRDIVtest(SrcCoeff, DstCoeff, 2195 SrcConst, DstConst, 2196 SrcLoop, DstLoop, 2197 Result) || 2198 gcdMIVtest(Src, Dst, Result) || 2199 symbolicRDIVtest(SrcCoeff, DstCoeff, 2200 SrcConst, DstConst, 2201 SrcLoop, DstLoop); 2202 } 2203 2204 2205 // Tests the single-subscript MIV pair (Src and Dst) for dependence. 2206 // Return true if dependence disproved. 2207 // Can sometimes refine direction vectors. 2208 bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, 2209 const SmallBitVector &Loops, 2210 FullDependence &Result) const { 2211 DEBUG(dbgs() << " src = " << *Src << "\n"); 2212 DEBUG(dbgs() << " dst = " << *Dst << "\n"); 2213 Result.Consistent = false; 2214 return gcdMIVtest(Src, Dst, Result) || 2215 banerjeeMIVtest(Src, Dst, Loops, Result); 2216 } 2217 2218 2219 // Given a product, e.g., 10*X*Y, returns the first constant operand, 2220 // in this case 10. If there is no constant part, returns NULL. 2221 static 2222 const SCEVConstant *getConstantPart(const SCEV *Expr) { 2223 if (const auto *Constant = dyn_cast<SCEVConstant>(Expr)) 2224 return Constant; 2225 else if (const auto *Product = dyn_cast<SCEVMulExpr>(Expr)) 2226 if (const auto *Constant = dyn_cast<SCEVConstant>(Product->getOperand(0))) 2227 return Constant; 2228 return nullptr; 2229 } 2230 2231 2232 //===----------------------------------------------------------------------===// 2233 // gcdMIVtest - 2234 // Tests an MIV subscript pair for dependence. 2235 // Returns true if any possible dependence is disproved. 2236 // Marks the result as inconsistent. 2237 // Can sometimes disprove the equal direction for 1 or more loops, 2238 // as discussed in Michael Wolfe's book, 2239 // High Performance Compilers for Parallel Computing, page 235. 2240 // 2241 // We spend some effort (code!) to handle cases like 2242 // [10*i + 5*N*j + 15*M + 6], where i and j are induction variables, 2243 // but M and N are just loop-invariant variables. 2244 // This should help us handle linearized subscripts; 2245 // also makes this test a useful backup to the various SIV tests. 2246 // 2247 // It occurs to me that the presence of loop-invariant variables 2248 // changes the nature of the test from "greatest common divisor" 2249 // to "a common divisor". 2250 bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, 2251 FullDependence &Result) const { 2252 DEBUG(dbgs() << "starting gcd\n"); 2253 ++GCDapplications; 2254 unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); 2255 APInt RunningGCD = APInt::getNullValue(BitWidth); 2256 2257 // Examine Src coefficients. 2258 // Compute running GCD and record source constant. 2259 // Because we're looking for the constant at the end of the chain, 2260 // we can't quit the loop just because the GCD == 1. 2261 const SCEV *Coefficients = Src; 2262 while (const SCEVAddRecExpr *AddRec = 2263 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2264 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2265 // If the coefficient is the product of a constant and other stuff, 2266 // we can use the constant in the GCD computation. 2267 const auto *Constant = getConstantPart(Coeff); 2268 if (!Constant) 2269 return false; 2270 APInt ConstCoeff = Constant->getAPInt(); 2271 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2272 Coefficients = AddRec->getStart(); 2273 } 2274 const SCEV *SrcConst = Coefficients; 2275 2276 // Examine Dst coefficients. 2277 // Compute running GCD and record destination constant. 2278 // Because we're looking for the constant at the end of the chain, 2279 // we can't quit the loop just because the GCD == 1. 2280 Coefficients = Dst; 2281 while (const SCEVAddRecExpr *AddRec = 2282 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2283 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2284 // If the coefficient is the product of a constant and other stuff, 2285 // we can use the constant in the GCD computation. 2286 const auto *Constant = getConstantPart(Coeff); 2287 if (!Constant) 2288 return false; 2289 APInt ConstCoeff = Constant->getAPInt(); 2290 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2291 Coefficients = AddRec->getStart(); 2292 } 2293 const SCEV *DstConst = Coefficients; 2294 2295 APInt ExtraGCD = APInt::getNullValue(BitWidth); 2296 const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); 2297 DEBUG(dbgs() << " Delta = " << *Delta << "\n"); 2298 const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta); 2299 if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) { 2300 // If Delta is a sum of products, we may be able to make further progress. 2301 for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) { 2302 const SCEV *Operand = Sum->getOperand(Op); 2303 if (isa<SCEVConstant>(Operand)) { 2304 assert(!Constant && "Surprised to find multiple constants"); 2305 Constant = cast<SCEVConstant>(Operand); 2306 } 2307 else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) { 2308 // Search for constant operand to participate in GCD; 2309 // If none found; return false. 2310 const SCEVConstant *ConstOp = getConstantPart(Product); 2311 if (!ConstOp) 2312 return false; 2313 APInt ConstOpValue = ConstOp->getAPInt(); 2314 ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, 2315 ConstOpValue.abs()); 2316 } 2317 else 2318 return false; 2319 } 2320 } 2321 if (!Constant) 2322 return false; 2323 APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt(); 2324 DEBUG(dbgs() << " ConstDelta = " << ConstDelta << "\n"); 2325 if (ConstDelta == 0) 2326 return false; 2327 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD); 2328 DEBUG(dbgs() << " RunningGCD = " << RunningGCD << "\n"); 2329 APInt Remainder = ConstDelta.srem(RunningGCD); 2330 if (Remainder != 0) { 2331 ++GCDindependence; 2332 return true; 2333 } 2334 2335 // Try to disprove equal directions. 2336 // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1], 2337 // the code above can't disprove the dependence because the GCD = 1. 2338 // So we consider what happen if i = i' and what happens if j = j'. 2339 // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1], 2340 // which is infeasible, so we can disallow the = direction for the i level. 2341 // Setting j = j' doesn't help matters, so we end up with a direction vector 2342 // of [<>, *] 2343 // 2344 // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5], 2345 // we need to remember that the constant part is 5 and the RunningGCD should 2346 // be initialized to ExtraGCD = 30. 2347 DEBUG(dbgs() << " ExtraGCD = " << ExtraGCD << '\n'); 2348 2349 bool Improved = false; 2350 Coefficients = Src; 2351 while (const SCEVAddRecExpr *AddRec = 2352 dyn_cast<SCEVAddRecExpr>(Coefficients)) { 2353 Coefficients = AddRec->getStart(); 2354 const Loop *CurLoop = AddRec->getLoop(); 2355 RunningGCD = ExtraGCD; 2356 const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE); 2357 const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff); 2358 const SCEV *Inner = Src; 2359 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2360 AddRec = cast<SCEVAddRecExpr>(Inner); 2361 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2362 if (CurLoop == AddRec->getLoop()) 2363 ; // SrcCoeff == Coeff 2364 else { 2365 // If the coefficient is the product of a constant and other stuff, 2366 // we can use the constant in the GCD computation. 2367 Constant = getConstantPart(Coeff); 2368 if (!Constant) 2369 return false; 2370 APInt ConstCoeff = Constant->getAPInt(); 2371 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2372 } 2373 Inner = AddRec->getStart(); 2374 } 2375 Inner = Dst; 2376 while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) { 2377 AddRec = cast<SCEVAddRecExpr>(Inner); 2378 const SCEV *Coeff = AddRec->getStepRecurrence(*SE); 2379 if (CurLoop == AddRec->getLoop()) 2380 DstCoeff = Coeff; 2381 else { 2382 // If the coefficient is the product of a constant and other stuff, 2383 // we can use the constant in the GCD computation. 2384 Constant = getConstantPart(Coeff); 2385 if (!Constant) 2386 return false; 2387 APInt ConstCoeff = Constant->getAPInt(); 2388 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2389 } 2390 Inner = AddRec->getStart(); 2391 } 2392 Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); 2393 // If the coefficient is the product of a constant and other stuff, 2394 // we can use the constant in the GCD computation. 2395 Constant = getConstantPart(Delta); 2396 if (!Constant) 2397 // The difference of the two coefficients might not be a product 2398 // or constant, in which case we give up on this direction. 2399 continue; 2400 APInt ConstCoeff = Constant->getAPInt(); 2401 RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs()); 2402 DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n"); 2403 if (RunningGCD != 0) { 2404 Remainder = ConstDelta.srem(RunningGCD); 2405 DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n"); 2406 if (Remainder != 0) { 2407 unsigned Level = mapSrcLoop(CurLoop); 2408 Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ); 2409 Improved = true; 2410 } 2411 } 2412 } 2413 if (Improved) 2414 ++GCDsuccesses; 2415 DEBUG(dbgs() << "all done\n"); 2416 return false; 2417 } 2418 2419 2420 //===----------------------------------------------------------------------===// 2421 // banerjeeMIVtest - 2422 // Use Banerjee's Inequalities to test an MIV subscript pair. 2423 // (Wolfe, in the race-car book, calls this the Extreme Value Test.) 2424 // Generally follows the discussion in Section 2.5.2 of 2425 // 2426 // Optimizing Supercompilers for Supercomputers 2427 // Michael Wolfe 2428 // 2429 // The inequalities given on page 25 are simplified in that loops are 2430 // normalized so that the lower bound is always 0 and the stride is always 1. 2431 // For example, Wolfe gives 2432 // 2433 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2434 // 2435 // where A_k is the coefficient of the kth index in the source subscript, 2436 // B_k is the coefficient of the kth index in the destination subscript, 2437 // U_k is the upper bound of the kth index, L_k is the lower bound of the Kth 2438 // index, and N_k is the stride of the kth index. Since all loops are normalized 2439 // by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the 2440 // equation to 2441 // 2442 // LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1 2443 // = (A^-_k - B_k)^- (U_k - 1) - B_k 2444 // 2445 // Similar simplifications are possible for the other equations. 2446 // 2447 // When we can't determine the number of iterations for a loop, 2448 // we use NULL as an indicator for the worst case, infinity. 2449 // When computing the upper bound, NULL denotes +inf; 2450 // for the lower bound, NULL denotes -inf. 2451 // 2452 // Return true if dependence disproved. 2453 bool DependenceInfo::banerjeeMIVtest(const SCEV *Src, const SCEV *Dst, 2454 const SmallBitVector &Loops, 2455 FullDependence &Result) const { 2456 DEBUG(dbgs() << "starting Banerjee\n"); 2457 ++BanerjeeApplications; 2458 DEBUG(dbgs() << " Src = " << *Src << '\n'); 2459 const SCEV *A0; 2460 CoefficientInfo *A = collectCoeffInfo(Src, true, A0); 2461 DEBUG(dbgs() << " Dst = " << *Dst << '\n'); 2462 const SCEV *B0; 2463 CoefficientInfo *B = collectCoeffInfo(Dst, false, B0); 2464 BoundInfo *Bound = new BoundInfo[MaxLevels + 1]; 2465 const SCEV *Delta = SE->getMinusSCEV(B0, A0); 2466 DEBUG(dbgs() << "\tDelta = " << *Delta << '\n'); 2467 2468 // Compute bounds for all the * directions. 2469 DEBUG(dbgs() << "\tBounds[*]\n"); 2470 for (unsigned K = 1; K <= MaxLevels; ++K) { 2471 Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations; 2472 Bound[K].Direction = Dependence::DVEntry::ALL; 2473 Bound[K].DirSet = Dependence::DVEntry::NONE; 2474 findBoundsALL(A, B, Bound, K); 2475 #ifndef NDEBUG 2476 DEBUG(dbgs() << "\t " << K << '\t'); 2477 if (Bound[K].Lower[Dependence::DVEntry::ALL]) 2478 DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t'); 2479 else 2480 DEBUG(dbgs() << "-inf\t"); 2481 if (Bound[K].Upper[Dependence::DVEntry::ALL]) 2482 DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n'); 2483 else 2484 DEBUG(dbgs() << "+inf\n"); 2485 #endif 2486 } 2487 2488 // Test the *, *, *, ... case. 2489 bool Disproved = false; 2490 if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) { 2491 // Explore the direction vector hierarchy. 2492 unsigned DepthExpanded = 0; 2493 unsigned NewDeps = exploreDirections(1, A, B, Bound, 2494 Loops, DepthExpanded, Delta); 2495 if (NewDeps > 0) { 2496 bool Improved = false; 2497 for (unsigned K = 1; K <= CommonLevels; ++K) { 2498 if (Loops[K]) { 2499 unsigned Old = Result.DV[K - 1].Direction; 2500 Result.DV[K - 1].Direction = Old & Bound[K].DirSet; 2501 Improved |= Old != Result.DV[K - 1].Direction; 2502 if (!Result.DV[K - 1].Direction) { 2503 Improved = false; 2504 Disproved = true; 2505 break; 2506 } 2507 } 2508 } 2509 if (Improved) 2510 ++BanerjeeSuccesses; 2511 } 2512 else { 2513 ++BanerjeeIndependence; 2514 Disproved = true; 2515 } 2516 } 2517 else { 2518 ++BanerjeeIndependence; 2519 Disproved = true; 2520 } 2521 delete [] Bound; 2522 delete [] A; 2523 delete [] B; 2524 return Disproved; 2525 } 2526 2527 2528 // Hierarchically expands the direction vector 2529 // search space, combining the directions of discovered dependences 2530 // in the DirSet field of Bound. Returns the number of distinct 2531 // dependences discovered. If the dependence is disproved, 2532 // it will return 0. 2533 unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A, 2534 CoefficientInfo *B, BoundInfo *Bound, 2535 const SmallBitVector &Loops, 2536 unsigned &DepthExpanded, 2537 const SCEV *Delta) const { 2538 if (Level > CommonLevels) { 2539 // record result 2540 DEBUG(dbgs() << "\t["); 2541 for (unsigned K = 1; K <= CommonLevels; ++K) { 2542 if (Loops[K]) { 2543 Bound[K].DirSet |= Bound[K].Direction; 2544 #ifndef NDEBUG 2545 switch (Bound[K].Direction) { 2546 case Dependence::DVEntry::LT: 2547 DEBUG(dbgs() << " <"); 2548 break; 2549 case Dependence::DVEntry::EQ: 2550 DEBUG(dbgs() << " ="); 2551 break; 2552 case Dependence::DVEntry::GT: 2553 DEBUG(dbgs() << " >"); 2554 break; 2555 case Dependence::DVEntry::ALL: 2556 DEBUG(dbgs() << " *"); 2557 break; 2558 default: 2559 llvm_unreachable("unexpected Bound[K].Direction"); 2560 } 2561 #endif 2562 } 2563 } 2564 DEBUG(dbgs() << " ]\n"); 2565 return 1; 2566 } 2567 if (Loops[Level]) { 2568 if (Level > DepthExpanded) { 2569 DepthExpanded = Level; 2570 // compute bounds for <, =, > at current level 2571 findBoundsLT(A, B, Bound, Level); 2572 findBoundsGT(A, B, Bound, Level); 2573 findBoundsEQ(A, B, Bound, Level); 2574 #ifndef NDEBUG 2575 DEBUG(dbgs() << "\tBound for level = " << Level << '\n'); 2576 DEBUG(dbgs() << "\t <\t"); 2577 if (Bound[Level].Lower[Dependence::DVEntry::LT]) 2578 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t'); 2579 else 2580 DEBUG(dbgs() << "-inf\t"); 2581 if (Bound[Level].Upper[Dependence::DVEntry::LT]) 2582 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n'); 2583 else 2584 DEBUG(dbgs() << "+inf\n"); 2585 DEBUG(dbgs() << "\t =\t"); 2586 if (Bound[Level].Lower[Dependence::DVEntry::EQ]) 2587 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t'); 2588 else 2589 DEBUG(dbgs() << "-inf\t"); 2590 if (Bound[Level].Upper[Dependence::DVEntry::EQ]) 2591 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n'); 2592 else 2593 DEBUG(dbgs() << "+inf\n"); 2594 DEBUG(dbgs() << "\t >\t"); 2595 if (Bound[Level].Lower[Dependence::DVEntry::GT]) 2596 DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t'); 2597 else 2598 DEBUG(dbgs() << "-inf\t"); 2599 if (Bound[Level].Upper[Dependence::DVEntry::GT]) 2600 DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n'); 2601 else 2602 DEBUG(dbgs() << "+inf\n"); 2603 #endif 2604 } 2605 2606 unsigned NewDeps = 0; 2607 2608 // test bounds for <, *, *, ... 2609 if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta)) 2610 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2611 Loops, DepthExpanded, Delta); 2612 2613 // Test bounds for =, *, *, ... 2614 if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta)) 2615 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2616 Loops, DepthExpanded, Delta); 2617 2618 // test bounds for >, *, *, ... 2619 if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta)) 2620 NewDeps += exploreDirections(Level + 1, A, B, Bound, 2621 Loops, DepthExpanded, Delta); 2622 2623 Bound[Level].Direction = Dependence::DVEntry::ALL; 2624 return NewDeps; 2625 } 2626 else 2627 return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta); 2628 } 2629 2630 2631 // Returns true iff the current bounds are plausible. 2632 bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, 2633 BoundInfo *Bound, const SCEV *Delta) const { 2634 Bound[Level].Direction = DirKind; 2635 if (const SCEV *LowerBound = getLowerBound(Bound)) 2636 if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) 2637 return false; 2638 if (const SCEV *UpperBound = getUpperBound(Bound)) 2639 if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) 2640 return false; 2641 return true; 2642 } 2643 2644 2645 // Computes the upper and lower bounds for level K 2646 // using the * direction. Records them in Bound. 2647 // Wolfe gives the equations 2648 // 2649 // LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k 2650 // UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k 2651 // 2652 // Since we normalize loops, we can simplify these equations to 2653 // 2654 // LB^*_k = (A^-_k - B^+_k)U_k 2655 // UB^*_k = (A^+_k - B^-_k)U_k 2656 // 2657 // We must be careful to handle the case where the upper bound is unknown. 2658 // Note that the lower bound is always <= 0 2659 // and the upper bound is always >= 0. 2660 void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, 2661 BoundInfo *Bound, unsigned K) const { 2662 Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. 2663 Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. 2664 if (Bound[K].Iterations) { 2665 Bound[K].Lower[Dependence::DVEntry::ALL] = 2666 SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), 2667 Bound[K].Iterations); 2668 Bound[K].Upper[Dependence::DVEntry::ALL] = 2669 SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart), 2670 Bound[K].Iterations); 2671 } 2672 else { 2673 // If the difference is 0, we won't need to know the number of iterations. 2674 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) 2675 Bound[K].Lower[Dependence::DVEntry::ALL] = 2676 SE->getZero(A[K].Coeff->getType()); 2677 if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) 2678 Bound[K].Upper[Dependence::DVEntry::ALL] = 2679 SE->getZero(A[K].Coeff->getType()); 2680 } 2681 } 2682 2683 2684 // Computes the upper and lower bounds for level K 2685 // using the = direction. Records them in Bound. 2686 // Wolfe gives the equations 2687 // 2688 // LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k 2689 // UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k 2690 // 2691 // Since we normalize loops, we can simplify these equations to 2692 // 2693 // LB^=_k = (A_k - B_k)^- U_k 2694 // UB^=_k = (A_k - B_k)^+ U_k 2695 // 2696 // We must be careful to handle the case where the upper bound is unknown. 2697 // Note that the lower bound is always <= 0 2698 // and the upper bound is always >= 0. 2699 void DependenceInfo::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, 2700 BoundInfo *Bound, unsigned K) const { 2701 Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. 2702 Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. 2703 if (Bound[K].Iterations) { 2704 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2705 const SCEV *NegativePart = getNegativePart(Delta); 2706 Bound[K].Lower[Dependence::DVEntry::EQ] = 2707 SE->getMulExpr(NegativePart, Bound[K].Iterations); 2708 const SCEV *PositivePart = getPositivePart(Delta); 2709 Bound[K].Upper[Dependence::DVEntry::EQ] = 2710 SE->getMulExpr(PositivePart, Bound[K].Iterations); 2711 } 2712 else { 2713 // If the positive/negative part of the difference is 0, 2714 // we won't need to know the number of iterations. 2715 const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); 2716 const SCEV *NegativePart = getNegativePart(Delta); 2717 if (NegativePart->isZero()) 2718 Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero 2719 const SCEV *PositivePart = getPositivePart(Delta); 2720 if (PositivePart->isZero()) 2721 Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero 2722 } 2723 } 2724 2725 2726 // Computes the upper and lower bounds for level K 2727 // using the < direction. Records them in Bound. 2728 // Wolfe gives the equations 2729 // 2730 // LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2731 // UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k 2732 // 2733 // Since we normalize loops, we can simplify these equations to 2734 // 2735 // LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k 2736 // UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k 2737 // 2738 // We must be careful to handle the case where the upper bound is unknown. 2739 void DependenceInfo::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, 2740 BoundInfo *Bound, unsigned K) const { 2741 Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. 2742 Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. 2743 if (Bound[K].Iterations) { 2744 const SCEV *Iter_1 = SE->getMinusSCEV( 2745 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2746 const SCEV *NegPart = 2747 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2748 Bound[K].Lower[Dependence::DVEntry::LT] = 2749 SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff); 2750 const SCEV *PosPart = 2751 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2752 Bound[K].Upper[Dependence::DVEntry::LT] = 2753 SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff); 2754 } 2755 else { 2756 // If the positive/negative part of the difference is 0, 2757 // we won't need to know the number of iterations. 2758 const SCEV *NegPart = 2759 getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff)); 2760 if (NegPart->isZero()) 2761 Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2762 const SCEV *PosPart = 2763 getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff)); 2764 if (PosPart->isZero()) 2765 Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff); 2766 } 2767 } 2768 2769 2770 // Computes the upper and lower bounds for level K 2771 // using the > direction. Records them in Bound. 2772 // Wolfe gives the equations 2773 // 2774 // LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2775 // UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k 2776 // 2777 // Since we normalize loops, we can simplify these equations to 2778 // 2779 // LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k 2780 // UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k 2781 // 2782 // We must be careful to handle the case where the upper bound is unknown. 2783 void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, 2784 BoundInfo *Bound, unsigned K) const { 2785 Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. 2786 Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. 2787 if (Bound[K].Iterations) { 2788 const SCEV *Iter_1 = SE->getMinusSCEV( 2789 Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType())); 2790 const SCEV *NegPart = 2791 getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2792 Bound[K].Lower[Dependence::DVEntry::GT] = 2793 SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff); 2794 const SCEV *PosPart = 2795 getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2796 Bound[K].Upper[Dependence::DVEntry::GT] = 2797 SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff); 2798 } 2799 else { 2800 // If the positive/negative part of the difference is 0, 2801 // we won't need to know the number of iterations. 2802 const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart)); 2803 if (NegPart->isZero()) 2804 Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff; 2805 const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart)); 2806 if (PosPart->isZero()) 2807 Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff; 2808 } 2809 } 2810 2811 2812 // X^+ = max(X, 0) 2813 const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { 2814 return SE->getSMaxExpr(X, SE->getZero(X->getType())); 2815 } 2816 2817 2818 // X^- = min(X, 0) 2819 const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { 2820 return SE->getSMinExpr(X, SE->getZero(X->getType())); 2821 } 2822 2823 2824 // Walks through the subscript, 2825 // collecting each coefficient, the associated loop bounds, 2826 // and recording its positive and negative parts for later use. 2827 DependenceInfo::CoefficientInfo * 2828 DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, 2829 const SCEV *&Constant) const { 2830 const SCEV *Zero = SE->getZero(Subscript->getType()); 2831 CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1]; 2832 for (unsigned K = 1; K <= MaxLevels; ++K) { 2833 CI[K].Coeff = Zero; 2834 CI[K].PosPart = Zero; 2835 CI[K].NegPart = Zero; 2836 CI[K].Iterations = nullptr; 2837 } 2838 while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) { 2839 const Loop *L = AddRec->getLoop(); 2840 unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L); 2841 CI[K].Coeff = AddRec->getStepRecurrence(*SE); 2842 CI[K].PosPart = getPositivePart(CI[K].Coeff); 2843 CI[K].NegPart = getNegativePart(CI[K].Coeff); 2844 CI[K].Iterations = collectUpperBound(L, Subscript->getType()); 2845 Subscript = AddRec->getStart(); 2846 } 2847 Constant = Subscript; 2848 #ifndef NDEBUG 2849 DEBUG(dbgs() << "\tCoefficient Info\n"); 2850 for (unsigned K = 1; K <= MaxLevels; ++K) { 2851 DEBUG(dbgs() << "\t " << K << "\t" << *CI[K].Coeff); 2852 DEBUG(dbgs() << "\tPos Part = "); 2853 DEBUG(dbgs() << *CI[K].PosPart); 2854 DEBUG(dbgs() << "\tNeg Part = "); 2855 DEBUG(dbgs() << *CI[K].NegPart); 2856 DEBUG(dbgs() << "\tUpper Bound = "); 2857 if (CI[K].Iterations) 2858 DEBUG(dbgs() << *CI[K].Iterations); 2859 else 2860 DEBUG(dbgs() << "+inf"); 2861 DEBUG(dbgs() << '\n'); 2862 } 2863 DEBUG(dbgs() << "\t Constant = " << *Subscript << '\n'); 2864 #endif 2865 return CI; 2866 } 2867 2868 2869 // Looks through all the bounds info and 2870 // computes the lower bound given the current direction settings 2871 // at each level. If the lower bound for any level is -inf, 2872 // the result is -inf. 2873 const SCEV *DependenceInfo::getLowerBound(BoundInfo *Bound) const { 2874 const SCEV *Sum = Bound[1].Lower[Bound[1].Direction]; 2875 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2876 if (Bound[K].Lower[Bound[K].Direction]) 2877 Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); 2878 else 2879 Sum = nullptr; 2880 } 2881 return Sum; 2882 } 2883 2884 2885 // Looks through all the bounds info and 2886 // computes the upper bound given the current direction settings 2887 // at each level. If the upper bound at any level is +inf, 2888 // the result is +inf. 2889 const SCEV *DependenceInfo::getUpperBound(BoundInfo *Bound) const { 2890 const SCEV *Sum = Bound[1].Upper[Bound[1].Direction]; 2891 for (unsigned K = 2; Sum && K <= MaxLevels; ++K) { 2892 if (Bound[K].Upper[Bound[K].Direction]) 2893 Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); 2894 else 2895 Sum = nullptr; 2896 } 2897 return Sum; 2898 } 2899 2900 2901 //===----------------------------------------------------------------------===// 2902 // Constraint manipulation for Delta test. 2903 2904 // Given a linear SCEV, 2905 // return the coefficient (the step) 2906 // corresponding to the specified loop. 2907 // If there isn't one, return 0. 2908 // For example, given a*i + b*j + c*k, finding the coefficient 2909 // corresponding to the j loop would yield b. 2910 const SCEV *DependenceInfo::findCoefficient(const SCEV *Expr, 2911 const Loop *TargetLoop) const { 2912 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2913 if (!AddRec) 2914 return SE->getZero(Expr->getType()); 2915 if (AddRec->getLoop() == TargetLoop) 2916 return AddRec->getStepRecurrence(*SE); 2917 return findCoefficient(AddRec->getStart(), TargetLoop); 2918 } 2919 2920 2921 // Given a linear SCEV, 2922 // return the SCEV given by zeroing out the coefficient 2923 // corresponding to the specified loop. 2924 // For example, given a*i + b*j + c*k, zeroing the coefficient 2925 // corresponding to the j loop would yield a*i + c*k. 2926 const SCEV *DependenceInfo::zeroCoefficient(const SCEV *Expr, 2927 const Loop *TargetLoop) const { 2928 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2929 if (!AddRec) 2930 return Expr; // ignore 2931 if (AddRec->getLoop() == TargetLoop) 2932 return AddRec->getStart(); 2933 return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop), 2934 AddRec->getStepRecurrence(*SE), 2935 AddRec->getLoop(), 2936 AddRec->getNoWrapFlags()); 2937 } 2938 2939 2940 // Given a linear SCEV Expr, 2941 // return the SCEV given by adding some Value to the 2942 // coefficient corresponding to the specified TargetLoop. 2943 // For example, given a*i + b*j + c*k, adding 1 to the coefficient 2944 // corresponding to the j loop would yield a*i + (b+1)*j + c*k. 2945 const SCEV *DependenceInfo::addToCoefficient(const SCEV *Expr, 2946 const Loop *TargetLoop, 2947 const SCEV *Value) const { 2948 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr); 2949 if (!AddRec) // create a new addRec 2950 return SE->getAddRecExpr(Expr, 2951 Value, 2952 TargetLoop, 2953 SCEV::FlagAnyWrap); // Worst case, with no info. 2954 if (AddRec->getLoop() == TargetLoop) { 2955 const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value); 2956 if (Sum->isZero()) 2957 return AddRec->getStart(); 2958 return SE->getAddRecExpr(AddRec->getStart(), 2959 Sum, 2960 AddRec->getLoop(), 2961 AddRec->getNoWrapFlags()); 2962 } 2963 if (SE->isLoopInvariant(AddRec, TargetLoop)) 2964 return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); 2965 return SE->getAddRecExpr( 2966 addToCoefficient(AddRec->getStart(), TargetLoop, Value), 2967 AddRec->getStepRecurrence(*SE), AddRec->getLoop(), 2968 AddRec->getNoWrapFlags()); 2969 } 2970 2971 2972 // Review the constraints, looking for opportunities 2973 // to simplify a subscript pair (Src and Dst). 2974 // Return true if some simplification occurs. 2975 // If the simplification isn't exact (that is, if it is conservative 2976 // in terms of dependence), set consistent to false. 2977 // Corresponds to Figure 5 from the paper 2978 // 2979 // Practical Dependence Testing 2980 // Goff, Kennedy, Tseng 2981 // PLDI 1991 2982 bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, 2983 SmallBitVector &Loops, 2984 SmallVectorImpl<Constraint> &Constraints, 2985 bool &Consistent) { 2986 bool Result = false; 2987 for (unsigned LI : Loops.set_bits()) { 2988 DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); 2989 DEBUG(Constraints[LI].dump(dbgs())); 2990 if (Constraints[LI].isDistance()) 2991 Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent); 2992 else if (Constraints[LI].isLine()) 2993 Result |= propagateLine(Src, Dst, Constraints[LI], Consistent); 2994 else if (Constraints[LI].isPoint()) 2995 Result |= propagatePoint(Src, Dst, Constraints[LI]); 2996 } 2997 return Result; 2998 } 2999 3000 3001 // Attempt to propagate a distance 3002 // constraint into a subscript pair (Src and Dst). 3003 // Return true if some simplification occurs. 3004 // If the simplification isn't exact (that is, if it is conservative 3005 // in terms of dependence), set consistent to false. 3006 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst, 3007 Constraint &CurConstraint, 3008 bool &Consistent) { 3009 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3010 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3011 const SCEV *A_K = findCoefficient(Src, CurLoop); 3012 if (A_K->isZero()) 3013 return false; 3014 const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD()); 3015 Src = SE->getMinusSCEV(Src, DA_K); 3016 Src = zeroCoefficient(Src, CurLoop); 3017 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3018 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3019 Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K)); 3020 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3021 if (!findCoefficient(Dst, CurLoop)->isZero()) 3022 Consistent = false; 3023 return true; 3024 } 3025 3026 3027 // Attempt to propagate a line 3028 // constraint into a subscript pair (Src and Dst). 3029 // Return true if some simplification occurs. 3030 // If the simplification isn't exact (that is, if it is conservative 3031 // in terms of dependence), set consistent to false. 3032 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst, 3033 Constraint &CurConstraint, 3034 bool &Consistent) { 3035 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3036 const SCEV *A = CurConstraint.getA(); 3037 const SCEV *B = CurConstraint.getB(); 3038 const SCEV *C = CurConstraint.getC(); 3039 DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n"); 3040 DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n"); 3041 DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n"); 3042 if (A->isZero()) { 3043 const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B); 3044 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3045 if (!Bconst || !Cconst) return false; 3046 APInt Beta = Bconst->getAPInt(); 3047 APInt Charlie = Cconst->getAPInt(); 3048 APInt CdivB = Charlie.sdiv(Beta); 3049 assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B"); 3050 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3051 // Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3052 Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB))); 3053 Dst = zeroCoefficient(Dst, CurLoop); 3054 if (!findCoefficient(Src, CurLoop)->isZero()) 3055 Consistent = false; 3056 } 3057 else if (B->isZero()) { 3058 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3059 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3060 if (!Aconst || !Cconst) return false; 3061 APInt Alpha = Aconst->getAPInt(); 3062 APInt Charlie = Cconst->getAPInt(); 3063 APInt CdivA = Charlie.sdiv(Alpha); 3064 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3065 const SCEV *A_K = findCoefficient(Src, CurLoop); 3066 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3067 Src = zeroCoefficient(Src, CurLoop); 3068 if (!findCoefficient(Dst, CurLoop)->isZero()) 3069 Consistent = false; 3070 } 3071 else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) { 3072 const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A); 3073 const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C); 3074 if (!Aconst || !Cconst) return false; 3075 APInt Alpha = Aconst->getAPInt(); 3076 APInt Charlie = Cconst->getAPInt(); 3077 APInt CdivA = Charlie.sdiv(Alpha); 3078 assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A"); 3079 const SCEV *A_K = findCoefficient(Src, CurLoop); 3080 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA))); 3081 Src = zeroCoefficient(Src, CurLoop); 3082 Dst = addToCoefficient(Dst, CurLoop, A_K); 3083 if (!findCoefficient(Dst, CurLoop)->isZero()) 3084 Consistent = false; 3085 } 3086 else { 3087 // paper is incorrect here, or perhaps just misleading 3088 const SCEV *A_K = findCoefficient(Src, CurLoop); 3089 Src = SE->getMulExpr(Src, A); 3090 Dst = SE->getMulExpr(Dst, A); 3091 Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C)); 3092 Src = zeroCoefficient(Src, CurLoop); 3093 Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B)); 3094 if (!findCoefficient(Dst, CurLoop)->isZero()) 3095 Consistent = false; 3096 } 3097 DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n"); 3098 DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n"); 3099 return true; 3100 } 3101 3102 3103 // Attempt to propagate a point 3104 // constraint into a subscript pair (Src and Dst). 3105 // Return true if some simplification occurs. 3106 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst, 3107 Constraint &CurConstraint) { 3108 const Loop *CurLoop = CurConstraint.getAssociatedLoop(); 3109 const SCEV *A_K = findCoefficient(Src, CurLoop); 3110 const SCEV *AP_K = findCoefficient(Dst, CurLoop); 3111 const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX()); 3112 const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY()); 3113 DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n"); 3114 Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K)); 3115 Src = zeroCoefficient(Src, CurLoop); 3116 DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n"); 3117 DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n"); 3118 Dst = zeroCoefficient(Dst, CurLoop); 3119 DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n"); 3120 return true; 3121 } 3122 3123 3124 // Update direction vector entry based on the current constraint. 3125 void DependenceInfo::updateDirection(Dependence::DVEntry &Level, 3126 const Constraint &CurConstraint) const { 3127 DEBUG(dbgs() << "\tUpdate direction, constraint ="); 3128 DEBUG(CurConstraint.dump(dbgs())); 3129 if (CurConstraint.isAny()) 3130 ; // use defaults 3131 else if (CurConstraint.isDistance()) { 3132 // this one is consistent, the others aren't 3133 Level.Scalar = false; 3134 Level.Distance = CurConstraint.getD(); 3135 unsigned NewDirection = Dependence::DVEntry::NONE; 3136 if (!SE->isKnownNonZero(Level.Distance)) // if may be zero 3137 NewDirection = Dependence::DVEntry::EQ; 3138 if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive 3139 NewDirection |= Dependence::DVEntry::LT; 3140 if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative 3141 NewDirection |= Dependence::DVEntry::GT; 3142 Level.Direction &= NewDirection; 3143 } 3144 else if (CurConstraint.isLine()) { 3145 Level.Scalar = false; 3146 Level.Distance = nullptr; 3147 // direction should be accurate 3148 } 3149 else if (CurConstraint.isPoint()) { 3150 Level.Scalar = false; 3151 Level.Distance = nullptr; 3152 unsigned NewDirection = Dependence::DVEntry::NONE; 3153 if (!isKnownPredicate(CmpInst::ICMP_NE, 3154 CurConstraint.getY(), 3155 CurConstraint.getX())) 3156 // if X may be = Y 3157 NewDirection |= Dependence::DVEntry::EQ; 3158 if (!isKnownPredicate(CmpInst::ICMP_SLE, 3159 CurConstraint.getY(), 3160 CurConstraint.getX())) 3161 // if Y may be > X 3162 NewDirection |= Dependence::DVEntry::LT; 3163 if (!isKnownPredicate(CmpInst::ICMP_SGE, 3164 CurConstraint.getY(), 3165 CurConstraint.getX())) 3166 // if Y may be < X 3167 NewDirection |= Dependence::DVEntry::GT; 3168 Level.Direction &= NewDirection; 3169 } 3170 else 3171 llvm_unreachable("constraint has unexpected kind"); 3172 } 3173 3174 /// Check if we can delinearize the subscripts. If the SCEVs representing the 3175 /// source and destination array references are recurrences on a nested loop, 3176 /// this function flattens the nested recurrences into separate recurrences 3177 /// for each loop level. 3178 bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, 3179 SmallVectorImpl<Subscript> &Pair) { 3180 Value *SrcPtr = getPointerOperand(Src); 3181 Value *DstPtr = getPointerOperand(Dst); 3182 3183 Loop *SrcLoop = LI->getLoopFor(Src->getParent()); 3184 Loop *DstLoop = LI->getLoopFor(Dst->getParent()); 3185 3186 // Below code mimics the code in Delinearization.cpp 3187 const SCEV *SrcAccessFn = 3188 SE->getSCEVAtScope(SrcPtr, SrcLoop); 3189 const SCEV *DstAccessFn = 3190 SE->getSCEVAtScope(DstPtr, DstLoop); 3191 3192 const SCEVUnknown *SrcBase = 3193 dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn)); 3194 const SCEVUnknown *DstBase = 3195 dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn)); 3196 3197 if (!SrcBase || !DstBase || SrcBase != DstBase) 3198 return false; 3199 3200 const SCEV *ElementSize = SE->getElementSize(Src); 3201 if (ElementSize != SE->getElementSize(Dst)) 3202 return false; 3203 3204 const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); 3205 const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); 3206 3207 const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); 3208 const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); 3209 if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) 3210 return false; 3211 3212 // First step: collect parametric terms in both array references. 3213 SmallVector<const SCEV *, 4> Terms; 3214 SE->collectParametricTerms(SrcAR, Terms); 3215 SE->collectParametricTerms(DstAR, Terms); 3216 3217 // Second step: find subscript sizes. 3218 SmallVector<const SCEV *, 4> Sizes; 3219 SE->findArrayDimensions(Terms, Sizes, ElementSize); 3220 3221 // Third step: compute the access functions for each subscript. 3222 SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts; 3223 SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes); 3224 SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); 3225 3226 // Fail when there is only a subscript: that's a linearized access function. 3227 if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || 3228 SrcSubscripts.size() != DstSubscripts.size()) 3229 return false; 3230 3231 int size = SrcSubscripts.size(); 3232 3233 DEBUG({ 3234 dbgs() << "\nSrcSubscripts: "; 3235 for (int i = 0; i < size; i++) 3236 dbgs() << *SrcSubscripts[i]; 3237 dbgs() << "\nDstSubscripts: "; 3238 for (int i = 0; i < size; i++) 3239 dbgs() << *DstSubscripts[i]; 3240 }); 3241 3242 // The delinearization transforms a single-subscript MIV dependence test into 3243 // a multi-subscript SIV dependence test that is easier to compute. So we 3244 // resize Pair to contain as many pairs of subscripts as the delinearization 3245 // has found, and then initialize the pairs following the delinearization. 3246 Pair.resize(size); 3247 for (int i = 0; i < size; ++i) { 3248 Pair[i].Src = SrcSubscripts[i]; 3249 Pair[i].Dst = DstSubscripts[i]; 3250 unifySubscriptType(&Pair[i]); 3251 3252 // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the 3253 // delinearization has found, and add these constraints to the dependence 3254 // check to avoid memory accesses overflow from one dimension into another. 3255 // This is related to the problem of determining the existence of data 3256 // dependences in array accesses using a different number of subscripts: in 3257 // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. 3258 } 3259 3260 return true; 3261 } 3262 3263 //===----------------------------------------------------------------------===// 3264 3265 #ifndef NDEBUG 3266 // For debugging purposes, dump a small bit vector to dbgs(). 3267 static void dumpSmallBitVector(SmallBitVector &BV) { 3268 dbgs() << "{"; 3269 for (unsigned VI : BV.set_bits()) { 3270 dbgs() << VI; 3271 if (BV.find_next(VI) >= 0) 3272 dbgs() << ' '; 3273 } 3274 dbgs() << "}\n"; 3275 } 3276 #endif 3277 3278 // depends - 3279 // Returns NULL if there is no dependence. 3280 // Otherwise, return a Dependence with as many details as possible. 3281 // Corresponds to Section 3.1 in the paper 3282 // 3283 // Practical Dependence Testing 3284 // Goff, Kennedy, Tseng 3285 // PLDI 1991 3286 // 3287 // Care is required to keep the routine below, getSplitIteration(), 3288 // up to date with respect to this routine. 3289 std::unique_ptr<Dependence> 3290 DependenceInfo::depends(Instruction *Src, Instruction *Dst, 3291 bool PossiblyLoopIndependent) { 3292 if (Src == Dst) 3293 PossiblyLoopIndependent = false; 3294 3295 if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || 3296 (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) 3297 // if both instructions don't reference memory, there's no dependence 3298 return nullptr; 3299 3300 if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { 3301 // can only analyze simple loads and stores, i.e., no calls, invokes, etc. 3302 DEBUG(dbgs() << "can only handle simple loads and stores\n"); 3303 return make_unique<Dependence>(Src, Dst); 3304 } 3305 3306 Value *SrcPtr = getPointerOperand(Src); 3307 Value *DstPtr = getPointerOperand(Dst); 3308 3309 switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3310 SrcPtr)) { 3311 case MayAlias: 3312 case PartialAlias: 3313 // cannot analyse objects if we don't understand their aliasing. 3314 DEBUG(dbgs() << "can't analyze may or partial alias\n"); 3315 return make_unique<Dependence>(Src, Dst); 3316 case NoAlias: 3317 // If the objects noalias, they are distinct, accesses are independent. 3318 DEBUG(dbgs() << "no alias\n"); 3319 return nullptr; 3320 case MustAlias: 3321 break; // The underlying objects alias; test accesses for dependence. 3322 } 3323 3324 // establish loop nesting levels 3325 establishNestingLevels(Src, Dst); 3326 DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); 3327 DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); 3328 3329 FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); 3330 ++TotalArrayPairs; 3331 3332 // See if there are GEPs we can use. 3333 bool UsefulGEP = false; 3334 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3335 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3336 if (SrcGEP && DstGEP && 3337 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3338 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3339 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3340 DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); 3341 DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); 3342 3343 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3344 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3345 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3346 } 3347 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3348 SmallVector<Subscript, 4> Pair(Pairs); 3349 if (UsefulGEP) { 3350 DEBUG(dbgs() << " using GEPs\n"); 3351 unsigned P = 0; 3352 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3353 SrcEnd = SrcGEP->idx_end(), 3354 DstIdx = DstGEP->idx_begin(); 3355 SrcIdx != SrcEnd; 3356 ++SrcIdx, ++DstIdx, ++P) { 3357 Pair[P].Src = SE->getSCEV(*SrcIdx); 3358 Pair[P].Dst = SE->getSCEV(*DstIdx); 3359 unifySubscriptType(&Pair[P]); 3360 } 3361 } 3362 else { 3363 DEBUG(dbgs() << " ignoring GEPs\n"); 3364 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3365 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3366 DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); 3367 DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); 3368 Pair[0].Src = SrcSCEV; 3369 Pair[0].Dst = DstSCEV; 3370 } 3371 3372 if (Delinearize && CommonLevels > 1) { 3373 if (tryDelinearize(Src, Dst, Pair)) { 3374 DEBUG(dbgs() << " delinerized GEP\n"); 3375 Pairs = Pair.size(); 3376 } 3377 } 3378 3379 for (unsigned P = 0; P < Pairs; ++P) { 3380 Pair[P].Loops.resize(MaxLevels + 1); 3381 Pair[P].GroupLoops.resize(MaxLevels + 1); 3382 Pair[P].Group.resize(Pairs); 3383 removeMatchingExtensions(&Pair[P]); 3384 Pair[P].Classification = 3385 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3386 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3387 Pair[P].Loops); 3388 Pair[P].GroupLoops = Pair[P].Loops; 3389 Pair[P].Group.set(P); 3390 DEBUG(dbgs() << " subscript " << P << "\n"); 3391 DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); 3392 DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); 3393 DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); 3394 DEBUG(dbgs() << "\tloops = "); 3395 DEBUG(dumpSmallBitVector(Pair[P].Loops)); 3396 } 3397 3398 SmallBitVector Separable(Pairs); 3399 SmallBitVector Coupled(Pairs); 3400 3401 // Partition subscripts into separable and minimally-coupled groups 3402 // Algorithm in paper is algorithmically better; 3403 // this may be faster in practice. Check someday. 3404 // 3405 // Here's an example of how it works. Consider this code: 3406 // 3407 // for (i = ...) { 3408 // for (j = ...) { 3409 // for (k = ...) { 3410 // for (l = ...) { 3411 // for (m = ...) { 3412 // A[i][j][k][m] = ...; 3413 // ... = A[0][j][l][i + j]; 3414 // } 3415 // } 3416 // } 3417 // } 3418 // } 3419 // 3420 // There are 4 subscripts here: 3421 // 0 [i] and [0] 3422 // 1 [j] and [j] 3423 // 2 [k] and [l] 3424 // 3 [m] and [i + j] 3425 // 3426 // We've already classified each subscript pair as ZIV, SIV, etc., 3427 // and collected all the loops mentioned by pair P in Pair[P].Loops. 3428 // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops 3429 // and set Pair[P].Group = {P}. 3430 // 3431 // Src Dst Classification Loops GroupLoops Group 3432 // 0 [i] [0] SIV {1} {1} {0} 3433 // 1 [j] [j] SIV {2} {2} {1} 3434 // 2 [k] [l] RDIV {3,4} {3,4} {2} 3435 // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} 3436 // 3437 // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. 3438 // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. 3439 // 3440 // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. 3441 // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. 3442 // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, 3443 // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added 3444 // to either Separable or Coupled). 3445 // 3446 // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. 3447 // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty, 3448 // so Pair[3].Group = {0, 1, 3} and Done = false. 3449 // 3450 // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. 3451 // Since Done remains true, we add 2 to the set of Separable pairs. 3452 // 3453 // Finally, we consider 3. There's nothing to compare it with, 3454 // so Done remains true and we add it to the Coupled set. 3455 // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. 3456 // 3457 // In the end, we've got 1 separable subscript and 1 coupled group. 3458 for (unsigned SI = 0; SI < Pairs; ++SI) { 3459 if (Pair[SI].Classification == Subscript::NonLinear) { 3460 // ignore these, but collect loops for later 3461 ++NonlinearSubscriptPairs; 3462 collectCommonLoops(Pair[SI].Src, 3463 LI->getLoopFor(Src->getParent()), 3464 Pair[SI].Loops); 3465 collectCommonLoops(Pair[SI].Dst, 3466 LI->getLoopFor(Dst->getParent()), 3467 Pair[SI].Loops); 3468 Result.Consistent = false; 3469 } else if (Pair[SI].Classification == Subscript::ZIV) { 3470 // always separable 3471 Separable.set(SI); 3472 } 3473 else { 3474 // SIV, RDIV, or MIV, so check for coupled group 3475 bool Done = true; 3476 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3477 SmallBitVector Intersection = Pair[SI].GroupLoops; 3478 Intersection &= Pair[SJ].GroupLoops; 3479 if (Intersection.any()) { 3480 // accumulate set of all the loops in group 3481 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3482 // accumulate set of all subscripts in group 3483 Pair[SJ].Group |= Pair[SI].Group; 3484 Done = false; 3485 } 3486 } 3487 if (Done) { 3488 if (Pair[SI].Group.count() == 1) { 3489 Separable.set(SI); 3490 ++SeparableSubscriptPairs; 3491 } 3492 else { 3493 Coupled.set(SI); 3494 ++CoupledSubscriptPairs; 3495 } 3496 } 3497 } 3498 } 3499 3500 DEBUG(dbgs() << " Separable = "); 3501 DEBUG(dumpSmallBitVector(Separable)); 3502 DEBUG(dbgs() << " Coupled = "); 3503 DEBUG(dumpSmallBitVector(Coupled)); 3504 3505 Constraint NewConstraint; 3506 NewConstraint.setAny(SE); 3507 3508 // test separable subscripts 3509 for (unsigned SI : Separable.set_bits()) { 3510 DEBUG(dbgs() << "testing subscript " << SI); 3511 switch (Pair[SI].Classification) { 3512 case Subscript::ZIV: 3513 DEBUG(dbgs() << ", ZIV\n"); 3514 if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3515 return nullptr; 3516 break; 3517 case Subscript::SIV: { 3518 DEBUG(dbgs() << ", SIV\n"); 3519 unsigned Level; 3520 const SCEV *SplitIter = nullptr; 3521 if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, 3522 SplitIter)) 3523 return nullptr; 3524 break; 3525 } 3526 case Subscript::RDIV: 3527 DEBUG(dbgs() << ", RDIV\n"); 3528 if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) 3529 return nullptr; 3530 break; 3531 case Subscript::MIV: 3532 DEBUG(dbgs() << ", MIV\n"); 3533 if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) 3534 return nullptr; 3535 break; 3536 default: 3537 llvm_unreachable("subscript has unexpected classification"); 3538 } 3539 } 3540 3541 if (Coupled.count()) { 3542 // test coupled subscript groups 3543 DEBUG(dbgs() << "starting on coupled subscripts\n"); 3544 DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); 3545 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3546 for (unsigned II = 0; II <= MaxLevels; ++II) 3547 Constraints[II].setAny(SE); 3548 for (unsigned SI : Coupled.set_bits()) { 3549 DEBUG(dbgs() << "testing subscript group " << SI << " { "); 3550 SmallBitVector Group(Pair[SI].Group); 3551 SmallBitVector Sivs(Pairs); 3552 SmallBitVector Mivs(Pairs); 3553 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3554 SmallVector<Subscript *, 4> PairsInGroup; 3555 for (unsigned SJ : Group.set_bits()) { 3556 DEBUG(dbgs() << SJ << " "); 3557 if (Pair[SJ].Classification == Subscript::SIV) 3558 Sivs.set(SJ); 3559 else 3560 Mivs.set(SJ); 3561 PairsInGroup.push_back(&Pair[SJ]); 3562 } 3563 unifySubscriptType(PairsInGroup); 3564 DEBUG(dbgs() << "}\n"); 3565 while (Sivs.any()) { 3566 bool Changed = false; 3567 for (unsigned SJ : Sivs.set_bits()) { 3568 DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); 3569 // SJ is an SIV subscript that's part of the current coupled group 3570 unsigned Level; 3571 const SCEV *SplitIter = nullptr; 3572 DEBUG(dbgs() << "SIV\n"); 3573 if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, 3574 SplitIter)) 3575 return nullptr; 3576 ConstrainedLevels.set(Level); 3577 if (intersectConstraints(&Constraints[Level], &NewConstraint)) { 3578 if (Constraints[Level].isEmpty()) { 3579 ++DeltaIndependence; 3580 return nullptr; 3581 } 3582 Changed = true; 3583 } 3584 Sivs.reset(SJ); 3585 } 3586 if (Changed) { 3587 // propagate, possibly creating new SIVs and ZIVs 3588 DEBUG(dbgs() << " propagating\n"); 3589 DEBUG(dbgs() << "\tMivs = "); 3590 DEBUG(dumpSmallBitVector(Mivs)); 3591 for (unsigned SJ : Mivs.set_bits()) { 3592 // SJ is an MIV subscript that's part of the current coupled group 3593 DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); 3594 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, 3595 Constraints, Result.Consistent)) { 3596 DEBUG(dbgs() << "\t Changed\n"); 3597 ++DeltaPropagations; 3598 Pair[SJ].Classification = 3599 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3600 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3601 Pair[SJ].Loops); 3602 switch (Pair[SJ].Classification) { 3603 case Subscript::ZIV: 3604 DEBUG(dbgs() << "ZIV\n"); 3605 if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3606 return nullptr; 3607 Mivs.reset(SJ); 3608 break; 3609 case Subscript::SIV: 3610 Sivs.set(SJ); 3611 Mivs.reset(SJ); 3612 break; 3613 case Subscript::RDIV: 3614 case Subscript::MIV: 3615 break; 3616 default: 3617 llvm_unreachable("bad subscript classification"); 3618 } 3619 } 3620 } 3621 } 3622 } 3623 3624 // test & propagate remaining RDIVs 3625 for (unsigned SJ : Mivs.set_bits()) { 3626 if (Pair[SJ].Classification == Subscript::RDIV) { 3627 DEBUG(dbgs() << "RDIV test\n"); 3628 if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) 3629 return nullptr; 3630 // I don't yet understand how to propagate RDIV results 3631 Mivs.reset(SJ); 3632 } 3633 } 3634 3635 // test remaining MIVs 3636 // This code is temporary. 3637 // Better to somehow test all remaining subscripts simultaneously. 3638 for (unsigned SJ : Mivs.set_bits()) { 3639 if (Pair[SJ].Classification == Subscript::MIV) { 3640 DEBUG(dbgs() << "MIV test\n"); 3641 if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) 3642 return nullptr; 3643 } 3644 else 3645 llvm_unreachable("expected only MIV subscripts at this point"); 3646 } 3647 3648 // update Result.DV from constraint vector 3649 DEBUG(dbgs() << " updating\n"); 3650 for (unsigned SJ : ConstrainedLevels.set_bits()) { 3651 if (SJ > CommonLevels) 3652 break; 3653 updateDirection(Result.DV[SJ - 1], Constraints[SJ]); 3654 if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) 3655 return nullptr; 3656 } 3657 } 3658 } 3659 3660 // Make sure the Scalar flags are set correctly. 3661 SmallBitVector CompleteLoops(MaxLevels + 1); 3662 for (unsigned SI = 0; SI < Pairs; ++SI) 3663 CompleteLoops |= Pair[SI].Loops; 3664 for (unsigned II = 1; II <= CommonLevels; ++II) 3665 if (CompleteLoops[II]) 3666 Result.DV[II - 1].Scalar = false; 3667 3668 if (PossiblyLoopIndependent) { 3669 // Make sure the LoopIndependent flag is set correctly. 3670 // All directions must include equal, otherwise no 3671 // loop-independent dependence is possible. 3672 for (unsigned II = 1; II <= CommonLevels; ++II) { 3673 if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { 3674 Result.LoopIndependent = false; 3675 break; 3676 } 3677 } 3678 } 3679 else { 3680 // On the other hand, if all directions are equal and there's no 3681 // loop-independent dependence possible, then no dependence exists. 3682 bool AllEqual = true; 3683 for (unsigned II = 1; II <= CommonLevels; ++II) { 3684 if (Result.getDirection(II) != Dependence::DVEntry::EQ) { 3685 AllEqual = false; 3686 break; 3687 } 3688 } 3689 if (AllEqual) 3690 return nullptr; 3691 } 3692 3693 return make_unique<FullDependence>(std::move(Result)); 3694 } 3695 3696 3697 3698 //===----------------------------------------------------------------------===// 3699 // getSplitIteration - 3700 // Rather than spend rarely-used space recording the splitting iteration 3701 // during the Weak-Crossing SIV test, we re-compute it on demand. 3702 // The re-computation is basically a repeat of the entire dependence test, 3703 // though simplified since we know that the dependence exists. 3704 // It's tedious, since we must go through all propagations, etc. 3705 // 3706 // Care is required to keep this code up to date with respect to the routine 3707 // above, depends(). 3708 // 3709 // Generally, the dependence analyzer will be used to build 3710 // a dependence graph for a function (basically a map from instructions 3711 // to dependences). Looking for cycles in the graph shows us loops 3712 // that cannot be trivially vectorized/parallelized. 3713 // 3714 // We can try to improve the situation by examining all the dependences 3715 // that make up the cycle, looking for ones we can break. 3716 // Sometimes, peeling the first or last iteration of a loop will break 3717 // dependences, and we've got flags for those possibilities. 3718 // Sometimes, splitting a loop at some other iteration will do the trick, 3719 // and we've got a flag for that case. Rather than waste the space to 3720 // record the exact iteration (since we rarely know), we provide 3721 // a method that calculates the iteration. It's a drag that it must work 3722 // from scratch, but wonderful in that it's possible. 3723 // 3724 // Here's an example: 3725 // 3726 // for (i = 0; i < 10; i++) 3727 // A[i] = ... 3728 // ... = A[11 - i] 3729 // 3730 // There's a loop-carried flow dependence from the store to the load, 3731 // found by the weak-crossing SIV test. The dependence will have a flag, 3732 // indicating that the dependence can be broken by splitting the loop. 3733 // Calling getSplitIteration will return 5. 3734 // Splitting the loop breaks the dependence, like so: 3735 // 3736 // for (i = 0; i <= 5; i++) 3737 // A[i] = ... 3738 // ... = A[11 - i] 3739 // for (i = 6; i < 10; i++) 3740 // A[i] = ... 3741 // ... = A[11 - i] 3742 // 3743 // breaks the dependence and allows us to vectorize/parallelize 3744 // both loops. 3745 const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, 3746 unsigned SplitLevel) { 3747 assert(Dep.isSplitable(SplitLevel) && 3748 "Dep should be splitable at SplitLevel"); 3749 Instruction *Src = Dep.getSrc(); 3750 Instruction *Dst = Dep.getDst(); 3751 assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); 3752 assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); 3753 assert(isLoadOrStore(Src)); 3754 assert(isLoadOrStore(Dst)); 3755 Value *SrcPtr = getPointerOperand(Src); 3756 Value *DstPtr = getPointerOperand(Dst); 3757 assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr, 3758 SrcPtr) == MustAlias); 3759 3760 // establish loop nesting levels 3761 establishNestingLevels(Src, Dst); 3762 3763 FullDependence Result(Src, Dst, false, CommonLevels); 3764 3765 // See if there are GEPs we can use. 3766 bool UsefulGEP = false; 3767 GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); 3768 GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); 3769 if (SrcGEP && DstGEP && 3770 SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { 3771 const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); 3772 const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); 3773 UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && 3774 isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && 3775 (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); 3776 } 3777 unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; 3778 SmallVector<Subscript, 4> Pair(Pairs); 3779 if (UsefulGEP) { 3780 unsigned P = 0; 3781 for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), 3782 SrcEnd = SrcGEP->idx_end(), 3783 DstIdx = DstGEP->idx_begin(); 3784 SrcIdx != SrcEnd; 3785 ++SrcIdx, ++DstIdx, ++P) { 3786 Pair[P].Src = SE->getSCEV(*SrcIdx); 3787 Pair[P].Dst = SE->getSCEV(*DstIdx); 3788 } 3789 } 3790 else { 3791 const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); 3792 const SCEV *DstSCEV = SE->getSCEV(DstPtr); 3793 Pair[0].Src = SrcSCEV; 3794 Pair[0].Dst = DstSCEV; 3795 } 3796 3797 if (Delinearize && CommonLevels > 1) { 3798 if (tryDelinearize(Src, Dst, Pair)) { 3799 DEBUG(dbgs() << " delinerized GEP\n"); 3800 Pairs = Pair.size(); 3801 } 3802 } 3803 3804 for (unsigned P = 0; P < Pairs; ++P) { 3805 Pair[P].Loops.resize(MaxLevels + 1); 3806 Pair[P].GroupLoops.resize(MaxLevels + 1); 3807 Pair[P].Group.resize(Pairs); 3808 removeMatchingExtensions(&Pair[P]); 3809 Pair[P].Classification = 3810 classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), 3811 Pair[P].Dst, LI->getLoopFor(Dst->getParent()), 3812 Pair[P].Loops); 3813 Pair[P].GroupLoops = Pair[P].Loops; 3814 Pair[P].Group.set(P); 3815 } 3816 3817 SmallBitVector Separable(Pairs); 3818 SmallBitVector Coupled(Pairs); 3819 3820 // partition subscripts into separable and minimally-coupled groups 3821 for (unsigned SI = 0; SI < Pairs; ++SI) { 3822 if (Pair[SI].Classification == Subscript::NonLinear) { 3823 // ignore these, but collect loops for later 3824 collectCommonLoops(Pair[SI].Src, 3825 LI->getLoopFor(Src->getParent()), 3826 Pair[SI].Loops); 3827 collectCommonLoops(Pair[SI].Dst, 3828 LI->getLoopFor(Dst->getParent()), 3829 Pair[SI].Loops); 3830 Result.Consistent = false; 3831 } 3832 else if (Pair[SI].Classification == Subscript::ZIV) 3833 Separable.set(SI); 3834 else { 3835 // SIV, RDIV, or MIV, so check for coupled group 3836 bool Done = true; 3837 for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { 3838 SmallBitVector Intersection = Pair[SI].GroupLoops; 3839 Intersection &= Pair[SJ].GroupLoops; 3840 if (Intersection.any()) { 3841 // accumulate set of all the loops in group 3842 Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; 3843 // accumulate set of all subscripts in group 3844 Pair[SJ].Group |= Pair[SI].Group; 3845 Done = false; 3846 } 3847 } 3848 if (Done) { 3849 if (Pair[SI].Group.count() == 1) 3850 Separable.set(SI); 3851 else 3852 Coupled.set(SI); 3853 } 3854 } 3855 } 3856 3857 Constraint NewConstraint; 3858 NewConstraint.setAny(SE); 3859 3860 // test separable subscripts 3861 for (unsigned SI : Separable.set_bits()) { 3862 switch (Pair[SI].Classification) { 3863 case Subscript::SIV: { 3864 unsigned Level; 3865 const SCEV *SplitIter = nullptr; 3866 (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, 3867 Result, NewConstraint, SplitIter); 3868 if (Level == SplitLevel) { 3869 assert(SplitIter != nullptr); 3870 return SplitIter; 3871 } 3872 break; 3873 } 3874 case Subscript::ZIV: 3875 case Subscript::RDIV: 3876 case Subscript::MIV: 3877 break; 3878 default: 3879 llvm_unreachable("subscript has unexpected classification"); 3880 } 3881 } 3882 3883 if (Coupled.count()) { 3884 // test coupled subscript groups 3885 SmallVector<Constraint, 4> Constraints(MaxLevels + 1); 3886 for (unsigned II = 0; II <= MaxLevels; ++II) 3887 Constraints[II].setAny(SE); 3888 for (unsigned SI : Coupled.set_bits()) { 3889 SmallBitVector Group(Pair[SI].Group); 3890 SmallBitVector Sivs(Pairs); 3891 SmallBitVector Mivs(Pairs); 3892 SmallBitVector ConstrainedLevels(MaxLevels + 1); 3893 for (unsigned SJ : Group.set_bits()) { 3894 if (Pair[SJ].Classification == Subscript::SIV) 3895 Sivs.set(SJ); 3896 else 3897 Mivs.set(SJ); 3898 } 3899 while (Sivs.any()) { 3900 bool Changed = false; 3901 for (unsigned SJ : Sivs.set_bits()) { 3902 // SJ is an SIV subscript that's part of the current coupled group 3903 unsigned Level; 3904 const SCEV *SplitIter = nullptr; 3905 (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, 3906 Result, NewConstraint, SplitIter); 3907 if (Level == SplitLevel && SplitIter) 3908 return SplitIter; 3909 ConstrainedLevels.set(Level); 3910 if (intersectConstraints(&Constraints[Level], &NewConstraint)) 3911 Changed = true; 3912 Sivs.reset(SJ); 3913 } 3914 if (Changed) { 3915 // propagate, possibly creating new SIVs and ZIVs 3916 for (unsigned SJ : Mivs.set_bits()) { 3917 // SJ is an MIV subscript that's part of the current coupled group 3918 if (propagate(Pair[SJ].Src, Pair[SJ].Dst, 3919 Pair[SJ].Loops, Constraints, Result.Consistent)) { 3920 Pair[SJ].Classification = 3921 classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), 3922 Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), 3923 Pair[SJ].Loops); 3924 switch (Pair[SJ].Classification) { 3925 case Subscript::ZIV: 3926 Mivs.reset(SJ); 3927 break; 3928 case Subscript::SIV: 3929 Sivs.set(SJ); 3930 Mivs.reset(SJ); 3931 break; 3932 case Subscript::RDIV: 3933 case Subscript::MIV: 3934 break; 3935 default: 3936 llvm_unreachable("bad subscript classification"); 3937 } 3938 } 3939 } 3940 } 3941 } 3942 } 3943 } 3944 llvm_unreachable("somehow reached end of routine"); 3945 return nullptr; 3946 } 3947