1 //===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Detect the maximal Scops of a function. 11 // 12 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 13 // that only has statically known control flow and can therefore be described 14 // within the polyhedral model. 15 // 16 // Every Scop fullfills these restrictions: 17 // 18 // * It is a single entry single exit region 19 // 20 // * Only affine linear bounds in the loops 21 // 22 // Every natural loop in a Scop must have a number of loop iterations that can 23 // be described as an affine linear function in surrounding loop iterators or 24 // parameters. (A parameter is a scalar that does not change its value during 25 // execution of the Scop). 26 // 27 // * Only comparisons of affine linear expressions in conditions 28 // 29 // * All loops and conditions perfectly nested 30 // 31 // The control flow needs to be structured such that it could be written using 32 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 33 // 'continue'. 34 // 35 // * Side effect free functions call 36 // 37 // Only function calls and intrinsics that do not have side effects are allowed 38 // (readnone). 39 // 40 // The Scop detection finds the largest Scops by checking if the largest 41 // region is a Scop. If this is not the case, its canonical subregions are 42 // checked until a region is a Scop. It is now tried to extend this Scop by 43 // creating a larger non canonical region. 44 // 45 //===----------------------------------------------------------------------===// 46 47 #include "polly/ScopDetection.h" 48 49 #include "polly/LinkAllPasses.h" 50 #include "polly/Support/ScopHelper.h" 51 #include "polly/Support/AffineSCEVIterator.h" 52 53 #include "llvm/LLVMContext.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/RegionIterator.h" 57 #include "llvm/Support/CommandLine.h" 58 #include "llvm/Assembly/Writer.h" 59 60 #define DEBUG_TYPE "polly-detect" 61 #include "llvm/Support/Debug.h" 62 63 using namespace llvm; 64 using namespace polly; 65 66 //===----------------------------------------------------------------------===// 67 // Statistics. 68 69 STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); 70 71 #define BADSCOP_STAT(NAME, DESC) STATISTIC(Bad##NAME##ForScop, \ 72 "Number of bad regions for Scop: "\ 73 DESC) 74 75 #define STATSCOP(NAME); assert(!Context.Verifying && #NAME); \ 76 if (!Context.Verifying) ++Bad##NAME##ForScop; 77 78 BADSCOP_STAT(CFG, "CFG too complex"); 79 BADSCOP_STAT(IndVar, "Non canonical induction variable in loop"); 80 BADSCOP_STAT(LoopBound, "Loop bounds can not be computed"); 81 BADSCOP_STAT(FuncCall, "Function call with side effects appeared"); 82 BADSCOP_STAT(AffFunc, "Expression not affine"); 83 BADSCOP_STAT(Scalar, "Found scalar dependency"); 84 BADSCOP_STAT(Alias, "Found base address alias"); 85 BADSCOP_STAT(SimpleRegion, "Region not simple"); 86 BADSCOP_STAT(Other, "Others"); 87 88 //===----------------------------------------------------------------------===// 89 // ScopDetection. 90 91 bool ScopDetection::isMaxRegionInScop(const Region &R) const { 92 // The Region is valid only if it could be found in the set. 93 return ValidRegions.count(&R); 94 } 95 96 bool ScopDetection::isValidAffineFunction(const SCEV *S, Region &RefRegion, 97 Value **BasePtr) const { 98 assert(S && "S must not be null!"); 99 bool isMemoryAccess = (BasePtr != 0); 100 if (isMemoryAccess) *BasePtr = 0; 101 DEBUG(dbgs() << "Checking " << *S << " ... "); 102 103 if (isa<SCEVCouldNotCompute>(S)) { 104 DEBUG(dbgs() << "Non Affine: SCEV could not be computed\n"); 105 return false; 106 } 107 108 for (AffineSCEVIterator I = affine_begin(S, SE), E = affine_end(); I != E; 109 ++I) { 110 // The constant part must be a SCEVConstant. 111 // TODO: support sizeof in coefficient. 112 if (!isa<SCEVConstant>(I->second)) { 113 DEBUG(dbgs() << "Non Affine: Right hand side is not constant\n"); 114 return false; 115 } 116 117 const SCEV *Var = I->first; 118 119 // A constant offset is affine. 120 if(isa<SCEVConstant>(Var)) 121 continue; 122 123 // Memory accesses are allowed to have a base pointer. 124 if (Var->getType()->isPointerTy()) { 125 if (!isMemoryAccess) { 126 DEBUG(dbgs() << "Non Affine: Pointer in non memory access\n"); 127 return false; 128 } 129 130 assert(I->second->isOne() && "Only one as pointer coefficient allowed.\n"); 131 const SCEVUnknown *BaseAddr = dyn_cast<SCEVUnknown>(Var); 132 133 if (!BaseAddr || isa<UndefValue>(BaseAddr->getValue())){ 134 DEBUG(dbgs() << "Cannot handle base: " << *Var << "\n"); 135 return false; 136 } 137 138 // BaseAddr must be invariant in Scop. 139 if (!isParameter(BaseAddr, RefRegion, *LI, *SE)) { 140 DEBUG(dbgs() << "Non Affine: Base address not invariant in SCoP\n"); 141 return false; 142 } 143 144 assert(*BasePtr == 0 && "Found second base pointer.\n"); 145 *BasePtr = BaseAddr->getValue(); 146 continue; 147 } 148 149 if (isParameter(Var, RefRegion, *LI, *SE) 150 || isIndVar(Var, RefRegion, *LI, *SE)) 151 continue; 152 153 DEBUG(dbgs() << "Non Affine: " ; 154 Var->print(dbgs()); 155 dbgs() << " is neither parameter nor induction variable\n"); 156 return false; 157 } 158 159 DEBUG(dbgs() << " is affine.\n"); 160 return !isMemoryAccess || (*BasePtr != 0); 161 } 162 163 bool ScopDetection::isValidCFG(BasicBlock &BB, DetectionContext &Context) const 164 { 165 Region &RefRegion = Context.CurRegion; 166 TerminatorInst *TI = BB.getTerminator(); 167 168 // Return instructions are only valid if the region is the top level region. 169 if (isa<ReturnInst>(TI) && !RefRegion.getExit() && TI->getNumOperands() == 0) 170 return true; 171 172 BranchInst *Br = dyn_cast<BranchInst>(TI); 173 174 if (!Br) { 175 DEBUG(dbgs() << "Non branch instruction as terminator of BB: "; 176 WriteAsOperand(dbgs(), &BB, false); 177 dbgs() << "\n"); 178 STATSCOP(CFG); 179 return false; 180 } 181 182 if (Br->isUnconditional()) return true; 183 184 Value *Condition = Br->getCondition(); 185 186 // UndefValue is not allowed as condition. 187 if (isa<UndefValue>(Condition)) { 188 DEBUG(dbgs() << "Undefined value in branch instruction of BB: "; 189 WriteAsOperand(dbgs(), &BB, false); 190 dbgs() << "\n"); 191 STATSCOP(AffFunc); 192 return false; 193 } 194 195 // Only Constant and ICmpInst are allowed as condition. 196 if (!(isa<Constant>(Condition) || isa<ICmpInst>(Condition))) { 197 DEBUG(dbgs() << "Non Constant and non ICmpInst instruction in BB: "; 198 WriteAsOperand(dbgs(), &BB, false); 199 dbgs() << "\n"); 200 STATSCOP(AffFunc); 201 return false; 202 } 203 204 // Allow perfectly nested conditions. 205 assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors"); 206 207 if (ICmpInst *ICmp = dyn_cast<ICmpInst>(Condition)) { 208 // Unsigned comparisons are not allowed. They trigger overflow problems 209 // in the code generation. 210 // 211 // TODO: This is not sufficient and just hides bugs. However it does pretty 212 // well. 213 if(ICmp->isUnsigned()) 214 return false; 215 216 // Are both operands of the ICmp affine? 217 if (isa<UndefValue>(ICmp->getOperand(0)) 218 || isa<UndefValue>(ICmp->getOperand(1))) { 219 DEBUG(dbgs() << "Undefined operand in branch instruction of BB: "; 220 WriteAsOperand(dbgs(), &BB, false); 221 dbgs() << "\n"); 222 STATSCOP(AffFunc); 223 return false; 224 } 225 226 const SCEV *ScevLHS = SE->getSCEV(ICmp->getOperand(0)); 227 const SCEV *ScevRHS = SE->getSCEV(ICmp->getOperand(1)); 228 229 bool affineLHS = isValidAffineFunction(ScevLHS, RefRegion); 230 bool affineRHS = isValidAffineFunction(ScevRHS, RefRegion); 231 232 if (!affineLHS || !affineRHS) { 233 DEBUG(dbgs() << "Non affine branch instruction in BB: "; 234 WriteAsOperand(dbgs(), &BB, false); 235 dbgs() << "\n"); 236 STATSCOP(AffFunc); 237 return false; 238 } 239 } 240 241 // Allow loop exit conditions. 242 Loop *L = LI->getLoopFor(&BB); 243 if (L && L->getExitingBlock() == &BB) 244 return true; 245 246 // Allow perfectly nested conditions. 247 Region *R = RI->getRegionFor(&BB); 248 if (R->getEntry() != &BB) { 249 DEBUG(dbgs() << "Non well structured condition starting at BB: "; 250 WriteAsOperand(dbgs(), &BB, false); 251 dbgs() << "\n"); 252 STATSCOP(CFG); 253 return false; 254 } 255 256 return true; 257 } 258 259 bool ScopDetection::isValidCallInst(CallInst &CI) { 260 if (CI.mayHaveSideEffects() || CI.doesNotReturn()) 261 return false; 262 263 if (CI.doesNotAccessMemory()) 264 return true; 265 266 Function *CalledFunction = CI.getCalledFunction(); 267 268 // Indirect calls are not supported. 269 if (CalledFunction == 0) 270 return false; 271 272 // TODO: Intrinsics. 273 return false; 274 } 275 276 bool ScopDetection::isValidMemoryAccess(Instruction &Inst, 277 DetectionContext &Context) const { 278 Value *Ptr = getPointerOperand(Inst), *BasePtr; 279 const SCEV *AccessFunction = SE->getSCEV(Ptr); 280 281 if (!isValidAffineFunction(AccessFunction, Context.CurRegion, &BasePtr)) { 282 DEBUG(dbgs() << "Bad memory addr " << *AccessFunction << "\n"); 283 STATSCOP(AffFunc); 284 return false; 285 } 286 287 // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions 288 // created by IndependentBlocks Pass. 289 if (isa<IntToPtrInst>(BasePtr)) { 290 DEBUG(dbgs() << "Find bad intoptr prt: " << *BasePtr << '\n'); 291 STATSCOP(Other); 292 return false; 293 } 294 295 // Check if the base pointer of the memory access does alias with 296 // any other pointer. This cannot be handled at the moment. 297 AliasSet &AS = 298 Context.AST.getAliasSetForPointer(BasePtr, AliasAnalysis::UnknownSize, 299 Inst.getMetadata(LLVMContext::MD_tbaa)); 300 if (!AS.isMustAlias()) { 301 DEBUG(dbgs() << "Bad pointer alias found:" << *BasePtr << "\nAS:\n" << AS); 302 303 // STATSCOP triggers an assertion if we are in verifying mode. 304 // This is generally good to check that we do not change the SCoP after we 305 // run the SCoP detection and consequently to ensure that we can still 306 // represent that SCoP. However, in case of aliasing this does not work. 307 // The independent blocks pass may create memory references which seem to 308 // alias, if -basicaa is not available. They actually do not. As we do not 309 // not know this and we would fail here if we verify it. 310 if (!Context.Verifying) { 311 STATSCOP(Alias); 312 } 313 314 return false; 315 } 316 317 return true; 318 } 319 320 321 bool ScopDetection::hasScalarDependency(Instruction &Inst, 322 Region &RefRegion) const { 323 for (Instruction::use_iterator UI = Inst.use_begin(), UE = Inst.use_end(); 324 UI != UE; ++UI) 325 if (Instruction *Use = dyn_cast<Instruction>(*UI)) 326 if (!RefRegion.contains(Use->getParent())) { 327 // DirtyHack 1: PHINode user outside the Scop is not allow, if this 328 // PHINode is induction variable, the scalar to array transform may 329 // break it and introduce a non-indvar PHINode, which is not allow in 330 // Scop. 331 // This can be fix by: 332 // Introduce a IndependentBlockPrepare pass, which translate all 333 // PHINodes not in Scop to array. 334 // The IndependentBlockPrepare pass can also split the entry block of 335 // the function to hold the alloca instruction created by scalar to 336 // array. and split the exit block of the Scop so the new create load 337 // instruction for escape users will not break other Scops. 338 if (isa<PHINode>(Use)) 339 return true; 340 } 341 342 return false; 343 } 344 345 bool ScopDetection::isValidInstruction(Instruction &Inst, 346 DetectionContext &Context) const { 347 // Only canonical IVs are allowed. 348 if (PHINode *PN = dyn_cast<PHINode>(&Inst)) 349 if (!isIndVar(PN, LI)) { 350 DEBUG(dbgs() << "Non canonical PHI node found: "; 351 WriteAsOperand(dbgs(), &Inst, false); 352 dbgs() << "\n"); 353 return false; 354 } 355 356 // Scalar dependencies are not allowed. 357 if (hasScalarDependency(Inst, Context.CurRegion)) { 358 DEBUG(dbgs() << "Scalar dependency found: "; 359 WriteAsOperand(dbgs(), &Inst, false); 360 dbgs() << "\n"); 361 STATSCOP(Scalar); 362 return false; 363 } 364 365 // We only check the call instruction but not invoke instruction. 366 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 367 if (isValidCallInst(*CI)) 368 return true; 369 370 DEBUG(dbgs() << "Bad call Inst: "; 371 WriteAsOperand(dbgs(), &Inst, false); 372 dbgs() << "\n"); 373 STATSCOP(FuncCall); 374 return false; 375 } 376 377 if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { 378 // Handle cast instruction. 379 if (isa<IntToPtrInst>(Inst) || isa<BitCastInst>(Inst)) { 380 DEBUG(dbgs() << "Bad cast Inst!\n"); 381 STATSCOP(Other); 382 return false; 383 } 384 385 if (isa<AllocaInst>(Inst)) { 386 DEBUG(dbgs() << "AllocaInst is not allowed!!\n"); 387 STATSCOP(Other); 388 return false; 389 } 390 391 return true; 392 } 393 394 // Check the access function. 395 if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) 396 return isValidMemoryAccess(Inst, Context); 397 398 // We do not know this instruction, therefore we assume it is invalid. 399 DEBUG(dbgs() << "Bad instruction found: "; 400 WriteAsOperand(dbgs(), &Inst, false); 401 dbgs() << "\n"); 402 STATSCOP(Other); 403 return false; 404 } 405 406 bool ScopDetection::isValidBasicBlock(BasicBlock &BB, 407 DetectionContext &Context) const { 408 if (!isValidCFG(BB, Context)) 409 return false; 410 411 // Check all instructions, except the terminator instruction. 412 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 413 if (!isValidInstruction(*I, Context)) 414 return false; 415 416 Loop *L = LI->getLoopFor(&BB); 417 if (L && L->getHeader() == &BB && !isValidLoop(L, Context)) 418 return false; 419 420 return true; 421 } 422 423 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { 424 PHINode *IndVar = L->getCanonicalInductionVariable(); 425 // No canonical induction variable. 426 if (!IndVar) { 427 DEBUG(dbgs() << "No canonical iv for loop: "; 428 WriteAsOperand(dbgs(), L->getHeader(), false); 429 dbgs() << "\n"); 430 STATSCOP(IndVar); 431 return false; 432 } 433 434 // Is the loop count affine? 435 const SCEV *LoopCount = SE->getBackedgeTakenCount(L); 436 if (!isValidAffineFunction(LoopCount, Context.CurRegion)) { 437 DEBUG(dbgs() << "Non affine loop bound for loop: "; 438 WriteAsOperand(dbgs(), L->getHeader(), false); 439 dbgs() << "\n"); 440 STATSCOP(LoopBound); 441 return false; 442 } 443 444 return true; 445 } 446 447 Region *ScopDetection::expandRegion(Region &R) { 448 Region *CurrentRegion = &R; 449 Region *TmpRegion = R.getExpandedRegion(); 450 451 DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 452 453 while (TmpRegion) { 454 DetectionContext Context(*TmpRegion, *AA, false /*verifying*/); 455 DEBUG(dbgs() << "\t\tTrying " << TmpRegion->getNameStr() << "\n"); 456 457 if (!allBlocksValid(Context)) 458 break; 459 460 if (isValidExit(Context)) { 461 if (CurrentRegion != &R) 462 delete CurrentRegion; 463 464 CurrentRegion = TmpRegion; 465 } 466 467 Region *TmpRegion2 = TmpRegion->getExpandedRegion(); 468 469 if (TmpRegion != &R && TmpRegion != CurrentRegion) 470 delete TmpRegion; 471 472 TmpRegion = TmpRegion2; 473 } 474 475 if (&R == CurrentRegion) 476 return NULL; 477 478 DEBUG(dbgs() << "\tto " << CurrentRegion->getNameStr() << "\n"); 479 480 return CurrentRegion; 481 } 482 483 484 void ScopDetection::findScops(Region &R) { 485 DetectionContext Context(R, *AA, false /*verifying*/); 486 487 if (isValidRegion(Context)) { 488 ++ValidRegion; 489 ValidRegions.insert(&R); 490 return; 491 } 492 493 for (Region::iterator I = R.begin(), E = R.end(); I != E; ++I) 494 findScops(**I); 495 496 // Try to expand regions. 497 // 498 // As the region tree normally only contains canonical regions, non canonical 499 // regions that form a Scop are not found. Therefore, those non canonical 500 // regions are checked by expanding the canonical ones. 501 502 std::vector<Region*> ToExpand; 503 504 for (Region::iterator I = R.begin(), E = R.end(); I != E; ++I) 505 ToExpand.push_back(*I); 506 507 for (std::vector<Region*>::iterator RI = ToExpand.begin(), 508 RE = ToExpand.end(); RI != RE; ++RI) { 509 Region *CurrentRegion = *RI; 510 511 // Skip invalid regions. Regions may become invalid, if they are element of 512 // an already expanded region. 513 if (ValidRegions.find(CurrentRegion) == ValidRegions.end()) 514 continue; 515 516 Region *ExpandedR = expandRegion(*CurrentRegion); 517 518 if (!ExpandedR) 519 continue; 520 521 R.addSubRegion(ExpandedR, true); 522 ValidRegions.insert(ExpandedR); 523 ValidRegions.erase(CurrentRegion); 524 525 for (Region::iterator I = ExpandedR->begin(), E = ExpandedR->end(); I != E; 526 ++I) 527 ValidRegions.erase(*I); 528 } 529 } 530 531 bool ScopDetection::allBlocksValid(DetectionContext &Context) const { 532 Region &R = Context.CurRegion; 533 534 for (Region::block_iterator I = R.block_begin(), E = R.block_end(); I != E; 535 ++I) 536 if (!isValidBasicBlock(*(I->getNodeAs<BasicBlock>()), Context)) 537 return false; 538 539 return true; 540 } 541 542 bool ScopDetection::isValidExit(DetectionContext &Context) const { 543 Region &R = Context.CurRegion; 544 545 // PHI nodes are not allowed in the exit basic block. 546 if (BasicBlock *Exit = R.getExit()) { 547 BasicBlock::iterator I = Exit->begin(); 548 if (I != Exit->end() && isa<PHINode> (*I)) { 549 DEBUG(dbgs() << "PHI node in exit"; 550 dbgs() << "\n"); 551 STATSCOP(Other); 552 return false; 553 } 554 } 555 556 return true; 557 } 558 559 bool ScopDetection::isValidRegion(DetectionContext &Context) const { 560 Region &R = Context.CurRegion; 561 562 DEBUG(dbgs() << "Checking region: " << R.getNameStr() << "\n\t"); 563 564 // The toplevel region is no valid region. 565 if (!R.getParent()) { 566 DEBUG(dbgs() << "Top level region is invalid"; 567 dbgs() << "\n"); 568 return false; 569 } 570 571 // SCoP can not contains the entry block of the function, because we need 572 // to insert alloca instruction there when translate scalar to array. 573 if (R.getEntry() == &(R.getEntry()->getParent()->getEntryBlock())) { 574 DEBUG(dbgs() << "Region containing entry block of function is invalid!\n"); 575 STATSCOP(Other); 576 return false; 577 } 578 579 // Only a simple region is allowed. 580 if (!R.isSimple()) { 581 DEBUG(dbgs() << "Region not simple: " << R.getNameStr() << '\n'); 582 STATSCOP(SimpleRegion); 583 return false; 584 } 585 586 if (!allBlocksValid(Context)) 587 return false; 588 589 if (!isValidExit(Context)) 590 return false; 591 592 DEBUG(dbgs() << "OK\n"); 593 return true; 594 } 595 596 bool ScopDetection::isValidFunction(llvm::Function &F) { 597 return !InvalidFunctions.count(&F); 598 } 599 600 bool ScopDetection::runOnFunction(llvm::Function &F) { 601 AA = &getAnalysis<AliasAnalysis>(); 602 SE = &getAnalysis<ScalarEvolution>(); 603 LI = &getAnalysis<LoopInfo>(); 604 RI = &getAnalysis<RegionInfo>(); 605 Region *TopRegion = RI->getTopLevelRegion(); 606 607 if(!isValidFunction(F)) 608 return false; 609 610 findScops(*TopRegion); 611 return false; 612 } 613 614 615 void polly::ScopDetection::verifyRegion(const Region &R) const { 616 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 617 DetectionContext Context(const_cast<Region&>(R), *AA, true /*verifying*/); 618 isValidRegion(Context); 619 } 620 621 void polly::ScopDetection::verifyAnalysis() const { 622 for (RegionSet::const_iterator I = ValidRegions.begin(), 623 E = ValidRegions.end(); I != E; ++I) 624 verifyRegion(**I); 625 } 626 627 void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { 628 AU.addRequired<DominatorTree>(); 629 AU.addRequired<PostDominatorTree>(); 630 AU.addRequired<LoopInfo>(); 631 AU.addRequired<ScalarEvolution>(); 632 // We also need AA and RegionInfo when we are verifying analysis. 633 AU.addRequiredTransitive<AliasAnalysis>(); 634 AU.addRequiredTransitive<RegionInfo>(); 635 AU.setPreservesAll(); 636 } 637 638 void ScopDetection::print(raw_ostream &OS, const Module *) const { 639 for (RegionSet::const_iterator I = ValidRegions.begin(), 640 E = ValidRegions.end(); I != E; ++I) 641 OS << "Valid Region for Scop: " << (*I)->getNameStr() << '\n'; 642 643 OS << "\n"; 644 } 645 646 void ScopDetection::releaseMemory() { 647 ValidRegions.clear(); 648 // Do not clear the invalid function set. 649 } 650 651 char ScopDetection::ID = 0; 652 653 static RegisterPass<ScopDetection> 654 X("polly-detect", "Polly - Detect Scops in functions"); 655 656 Pass *polly::createScopDetectionPass() { 657 return new ScopDetection(); 658 } 659