1 //===------ CodeGeneration.cpp - Code generate the Scops using ISL. ----======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using the ISL code generator. 12 // 13 // The Scop describes the high level memory behavior of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. ISL is used to generate an abstract syntax tree that reflects 16 // the updated execution order. This clast is used to create new LLVM-IR that is 17 // computationally equivalent to the original control flow region, but executes 18 // its code in the new execution order defined by the changed schedule. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IslAst.h" 24 #include "polly/CodeGen/IslNodeBuilder.h" 25 #include "polly/CodeGen/PerfMonitor.h" 26 #include "polly/CodeGen/Utils.h" 27 #include "polly/DependenceInfo.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/Options.h" 30 #include "polly/ScopInfo.h" 31 #include "polly/Support/ScopHelper.h" 32 #include "llvm/Analysis/AliasAnalysis.h" 33 #include "llvm/Analysis/BasicAliasAnalysis.h" 34 #include "llvm/Analysis/GlobalsModRef.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 37 #include "llvm/IR/Module.h" 38 #include "llvm/IR/PassManager.h" 39 #include "llvm/IR/Verifier.h" 40 #include "llvm/Support/Debug.h" 41 42 using namespace polly; 43 using namespace llvm; 44 45 #define DEBUG_TYPE "polly-codegen" 46 47 static cl::opt<bool> Verify("polly-codegen-verify", 48 cl::desc("Verify the function generated by Polly"), 49 cl::Hidden, cl::init(false), cl::ZeroOrMore, 50 cl::cat(PollyCategory)); 51 52 bool polly::PerfMonitoring; 53 static cl::opt<bool, true> 54 XPerfMonitoring("polly-codegen-perf-monitoring", 55 cl::desc("Add run-time performance monitoring"), cl::Hidden, 56 cl::location(polly::PerfMonitoring), cl::init(false), 57 cl::ZeroOrMore, cl::cat(PollyCategory)); 58 59 STATISTIC(ScopsProcessed, "Number of SCoP processed"); 60 STATISTIC(CodegenedScops, "Number of successfully generated SCoPs"); 61 STATISTIC(CodegenedAffineLoops, 62 "Number of original affine loops in SCoPs that have been generated"); 63 STATISTIC(CodegenedBoxedLoops, 64 "Number of original boxed loops in SCoPs that have been generated"); 65 66 namespace polly { 67 /// Mark a basic block unreachable. 68 /// 69 /// Marks the basic block @p Block unreachable by equipping it with an 70 /// UnreachableInst. 71 void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { 72 auto *OrigTerminator = Block.getTerminator(); 73 Builder.SetInsertPoint(OrigTerminator); 74 Builder.CreateUnreachable(); 75 OrigTerminator->eraseFromParent(); 76 } 77 78 } // namespace polly 79 80 namespace { 81 82 static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) { 83 if (!Verify || !verifyFunction(F, &errs())) 84 return; 85 86 DEBUG({ 87 errs() << "== ISL Codegen created an invalid function ==\n\n== The " 88 "SCoP ==\n"; 89 errs() << S; 90 errs() << "\n== The isl AST ==\n"; 91 AI.print(errs()); 92 errs() << "\n== The invalid function ==\n"; 93 F.print(errs()); 94 }); 95 96 llvm_unreachable("Polly generated function could not be verified. Add " 97 "-polly-codegen-verify=false to disable this assertion."); 98 } 99 100 // CodeGeneration adds a lot of BBs without updating the RegionInfo 101 // We make all created BBs belong to the scop's parent region without any 102 // nested structure to keep the RegionInfo verifier happy. 103 static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) { 104 for (BasicBlock &BB : F) { 105 if (RI.getRegionFor(&BB)) 106 continue; 107 108 RI.setRegionFor(&BB, &ParentRegion); 109 } 110 } 111 112 /// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from 113 /// @R. 114 /// 115 /// CodeGeneration does not copy lifetime markers into the optimized SCoP, 116 /// which would leave the them only in the original path. This can transform 117 /// code such as 118 /// 119 /// llvm.lifetime.start(%p) 120 /// llvm.lifetime.end(%p) 121 /// 122 /// into 123 /// 124 /// if (RTC) { 125 /// // generated code 126 /// } else { 127 /// // original code 128 /// llvm.lifetime.start(%p) 129 /// } 130 /// llvm.lifetime.end(%p) 131 /// 132 /// The current StackColoring algorithm cannot handle if some, but not all, 133 /// paths from the end marker to the entry block cross the start marker. Same 134 /// for start markers that do not always cross the end markers. We avoid any 135 /// issues by removing all lifetime markers, even from the original code. 136 /// 137 /// A better solution could be to hoist all llvm.lifetime.start to the split 138 /// node and all llvm.lifetime.end to the merge node, which should be 139 /// conservatively correct. 140 static void removeLifetimeMarkers(Region *R) { 141 for (auto *BB : R->blocks()) { 142 auto InstIt = BB->begin(); 143 auto InstEnd = BB->end(); 144 145 while (InstIt != InstEnd) { 146 auto NextIt = InstIt; 147 ++NextIt; 148 149 if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) { 150 switch (IT->getIntrinsicID()) { 151 case llvm::Intrinsic::lifetime_start: 152 case llvm::Intrinsic::lifetime_end: 153 BB->getInstList().erase(InstIt); 154 break; 155 default: 156 break; 157 } 158 } 159 160 InstIt = NextIt; 161 } 162 } 163 } 164 165 static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT, 166 ScalarEvolution &SE, RegionInfo &RI) { 167 // Check if we created an isl_ast root node, otherwise exit. 168 isl_ast_node *AstRoot = AI.getAst(); 169 if (!AstRoot) 170 return false; 171 172 // Collect statistics. Do it before we modify the IR to avoid having it any 173 // influence on the result. 174 auto ScopStats = S.getStatistics(); 175 ScopsProcessed++; 176 177 auto &DL = S.getFunction().getParent()->getDataLayout(); 178 Region *R = &S.getRegion(); 179 assert(!R->isTopLevelRegion() && "Top level regions are not supported"); 180 181 ScopAnnotator Annotator; 182 183 simplifyRegion(R, &DT, &LI, &RI); 184 assert(R->isSimple()); 185 BasicBlock *EnteringBB = S.getEnteringBlock(); 186 assert(EnteringBB); 187 PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); 188 189 // Only build the run-time condition and parameters _after_ having 190 // introduced the conditional branch. This is important as the conditional 191 // branch will guard the original scop from new induction variables that 192 // the SCEVExpander may introduce while code generating the parameters and 193 // which may introduce scalar dependences that prevent us from correctly 194 // code generating this scop. 195 BBPair StartExitBlocks = 196 std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI)); 197 BasicBlock *StartBlock = std::get<0>(StartExitBlocks); 198 BasicBlock *ExitBlock = std::get<1>(StartExitBlocks); 199 200 removeLifetimeMarkers(R); 201 auto *SplitBlock = StartBlock->getSinglePredecessor(); 202 203 IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock); 204 205 // All arrays must have their base pointers known before 206 // ScopAnnotator::buildAliasScopes. 207 NodeBuilder.allocateNewArrays(StartExitBlocks); 208 Annotator.buildAliasScopes(S); 209 210 if (PerfMonitoring) { 211 PerfMonitor P(S, EnteringBB->getParent()->getParent()); 212 P.initialize(); 213 P.insertRegionStart(SplitBlock->getTerminator()); 214 215 BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor(); 216 P.insertRegionEnd(MergeBlock->getTerminator()); 217 } 218 219 // First generate code for the hoisted invariant loads and transitively the 220 // parameters they reference. Afterwards, for the remaining parameters that 221 // might reference the hoisted loads. Finally, build the runtime check 222 // that might reference both hoisted loads as well as parameters. 223 // If the hoisting fails we have to bail and execute the original code. 224 Builder.SetInsertPoint(SplitBlock->getTerminator()); 225 if (!NodeBuilder.preloadInvariantLoads()) { 226 227 // Patch the introduced branch condition to ensure that we always execute 228 // the original SCoP. 229 auto *FalseI1 = Builder.getFalse(); 230 auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator(); 231 SplitBBTerm->setOperand(0, FalseI1); 232 233 // Since the other branch is hence ignored we mark it as unreachable and 234 // adjust the dominator tree accordingly. 235 auto *ExitingBlock = StartBlock->getUniqueSuccessor(); 236 assert(ExitingBlock); 237 auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); 238 assert(MergeBlock); 239 markBlockUnreachable(*StartBlock, Builder); 240 markBlockUnreachable(*ExitingBlock, Builder); 241 auto *ExitingBB = S.getExitingBlock(); 242 assert(ExitingBB); 243 DT.changeImmediateDominator(MergeBlock, ExitingBB); 244 DT.eraseNode(ExitingBlock); 245 246 isl_ast_node_free(AstRoot); 247 } else { 248 NodeBuilder.addParameters(S.getContext().release()); 249 Value *RTC = NodeBuilder.createRTC(AI.getRunCondition()); 250 251 Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); 252 253 // Explicitly set the insert point to the end of the block to avoid that a 254 // split at the builder's current 255 // insert position would move the malloc calls to the wrong BasicBlock. 256 // Ideally we would just split the block during allocation of the new 257 // arrays, but this would break the assumption that there are no blocks 258 // between polly.start and polly.exiting (at this point). 259 Builder.SetInsertPoint(StartBlock->getTerminator()); 260 261 NodeBuilder.create(AstRoot); 262 NodeBuilder.finalize(); 263 fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI); 264 265 CodegenedScops++; 266 CodegenedAffineLoops += ScopStats.NumAffineLoops; 267 CodegenedBoxedLoops += ScopStats.NumBoxedLoops; 268 } 269 270 Function *F = EnteringBB->getParent(); 271 verifyGeneratedFunction(S, *F, AI); 272 for (auto *SubF : NodeBuilder.getParallelSubfunctions()) 273 verifyGeneratedFunction(S, *SubF, AI); 274 275 // Mark the function such that we run additional cleanup passes on this 276 // function (e.g. mem2reg to rediscover phi nodes). 277 F->addFnAttr("polly-optimized"); 278 return true; 279 } 280 281 class CodeGeneration : public ScopPass { 282 public: 283 static char ID; 284 285 CodeGeneration() : ScopPass(ID) {} 286 287 /// The data layout used. 288 const DataLayout *DL; 289 290 /// @name The analysis passes we need to generate code. 291 /// 292 ///{ 293 LoopInfo *LI; 294 IslAstInfo *AI; 295 DominatorTree *DT; 296 ScalarEvolution *SE; 297 RegionInfo *RI; 298 ///} 299 300 /// Generate LLVM-IR for the SCoP @p S. 301 bool runOnScop(Scop &S) override { 302 // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration. 303 if (S.isToBeSkipped()) 304 return false; 305 306 AI = &getAnalysis<IslAstInfoWrapperPass>().getAI(); 307 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 308 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 309 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 310 DL = &S.getFunction().getParent()->getDataLayout(); 311 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 312 return CodeGen(S, *AI, *LI, *DT, *SE, *RI); 313 } 314 315 /// Register all analyses and transformation required. 316 void getAnalysisUsage(AnalysisUsage &AU) const override { 317 AU.addRequired<DominatorTreeWrapperPass>(); 318 AU.addRequired<IslAstInfoWrapperPass>(); 319 AU.addRequired<RegionInfoPass>(); 320 AU.addRequired<ScalarEvolutionWrapperPass>(); 321 AU.addRequired<ScopDetectionWrapperPass>(); 322 AU.addRequired<ScopInfoRegionPass>(); 323 AU.addRequired<LoopInfoWrapperPass>(); 324 325 AU.addPreserved<DependenceInfo>(); 326 327 AU.addPreserved<AAResultsWrapperPass>(); 328 AU.addPreserved<BasicAAWrapperPass>(); 329 AU.addPreserved<LoopInfoWrapperPass>(); 330 AU.addPreserved<DominatorTreeWrapperPass>(); 331 AU.addPreserved<GlobalsAAWrapperPass>(); 332 AU.addPreserved<IslAstInfoWrapperPass>(); 333 AU.addPreserved<ScopDetectionWrapperPass>(); 334 AU.addPreserved<ScalarEvolutionWrapperPass>(); 335 AU.addPreserved<SCEVAAWrapperPass>(); 336 337 // FIXME: We do not yet add regions for the newly generated code to the 338 // region tree. 339 AU.addPreserved<RegionInfoPass>(); 340 AU.addPreserved<ScopInfoRegionPass>(); 341 } 342 }; 343 } // namespace 344 345 PreservedAnalyses 346 polly::CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM, 347 ScopStandardAnalysisResults &AR, SPMUpdater &U) { 348 auto &AI = SAM.getResult<IslAstAnalysis>(S, AR); 349 if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) { 350 U.invalidateScop(S); 351 return PreservedAnalyses::none(); 352 } 353 354 return PreservedAnalyses::all(); 355 } 356 357 char CodeGeneration::ID = 1; 358 359 Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); } 360 361 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 362 "Polly - Create LLVM-IR from SCoPs", false, false); 363 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 364 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 365 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 366 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 367 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 368 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 369 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 370 "Polly - Create LLVM-IR from SCoPs", false, false) 371