1 //===------ CodeGeneration.cpp - Code generate the Scops using ISL. ----======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using the ISL code generator. 12 // 13 // The Scop describes the high level memory behavior of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. ISL is used to generate an abstract syntax tree that reflects 16 // the updated execution order. This clast is used to create new LLVM-IR that is 17 // computationally equivalent to the original control flow region, but executes 18 // its code in the new execution order defined by the changed schedule. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IslAst.h" 24 #include "polly/CodeGen/IslNodeBuilder.h" 25 #include "polly/CodeGen/PerfMonitor.h" 26 #include "polly/CodeGen/Utils.h" 27 #include "polly/DependenceInfo.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/Options.h" 30 #include "polly/ScopInfo.h" 31 #include "polly/Support/ScopHelper.h" 32 #include "llvm/Analysis/AliasAnalysis.h" 33 #include "llvm/Analysis/BasicAliasAnalysis.h" 34 #include "llvm/Analysis/GlobalsModRef.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 37 #include "llvm/IR/Module.h" 38 #include "llvm/IR/PassManager.h" 39 #include "llvm/IR/Verifier.h" 40 #include "llvm/Support/Debug.h" 41 42 using namespace polly; 43 using namespace llvm; 44 45 #define DEBUG_TYPE "polly-codegen" 46 47 static cl::opt<bool> Verify("polly-codegen-verify", 48 cl::desc("Verify the function generated by Polly"), 49 cl::Hidden, cl::init(false), cl::ZeroOrMore, 50 cl::cat(PollyCategory)); 51 52 bool polly::PerfMonitoring; 53 static cl::opt<bool, true> 54 XPerfMonitoring("polly-codegen-perf-monitoring", 55 cl::desc("Add run-time performance monitoring"), cl::Hidden, 56 cl::location(polly::PerfMonitoring), cl::init(false), 57 cl::ZeroOrMore, cl::cat(PollyCategory)); 58 59 namespace polly { 60 /// Mark a basic block unreachable. 61 /// 62 /// Marks the basic block @p Block unreachable by equipping it with an 63 /// UnreachableInst. 64 void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { 65 auto *OrigTerminator = Block.getTerminator(); 66 Builder.SetInsertPoint(OrigTerminator); 67 Builder.CreateUnreachable(); 68 OrigTerminator->eraseFromParent(); 69 } 70 71 } // namespace polly 72 73 namespace { 74 75 static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) { 76 if (!Verify || !verifyFunction(F, &errs())) 77 return; 78 79 DEBUG({ 80 errs() << "== ISL Codegen created an invalid function ==\n\n== The " 81 "SCoP ==\n"; 82 errs() << S; 83 errs() << "\n== The isl AST ==\n"; 84 AI.print(errs()); 85 errs() << "\n== The invalid function ==\n"; 86 F.print(errs()); 87 }); 88 89 llvm_unreachable("Polly generated function could not be verified. Add " 90 "-polly-codegen-verify=false to disable this assertion."); 91 } 92 93 // CodeGeneration adds a lot of BBs without updating the RegionInfo 94 // We make all created BBs belong to the scop's parent region without any 95 // nested structure to keep the RegionInfo verifier happy. 96 static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) { 97 for (BasicBlock &BB : F) { 98 if (RI.getRegionFor(&BB)) 99 continue; 100 101 RI.setRegionFor(&BB, &ParentRegion); 102 } 103 } 104 105 /// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from 106 /// @R. 107 /// 108 /// CodeGeneration does not copy lifetime markers into the optimized SCoP, 109 /// which would leave the them only in the original path. This can transform 110 /// code such as 111 /// 112 /// llvm.lifetime.start(%p) 113 /// llvm.lifetime.end(%p) 114 /// 115 /// into 116 /// 117 /// if (RTC) { 118 /// // generated code 119 /// } else { 120 /// // original code 121 /// llvm.lifetime.start(%p) 122 /// } 123 /// llvm.lifetime.end(%p) 124 /// 125 /// The current StackColoring algorithm cannot handle if some, but not all, 126 /// paths from the end marker to the entry block cross the start marker. Same 127 /// for start markers that do not always cross the end markers. We avoid any 128 /// issues by removing all lifetime markers, even from the original code. 129 /// 130 /// A better solution could be to hoist all llvm.lifetime.start to the split 131 /// node and all llvm.lifetime.end to the merge node, which should be 132 /// conservatively correct. 133 static void removeLifetimeMarkers(Region *R) { 134 for (auto *BB : R->blocks()) { 135 auto InstIt = BB->begin(); 136 auto InstEnd = BB->end(); 137 138 while (InstIt != InstEnd) { 139 auto NextIt = InstIt; 140 ++NextIt; 141 142 if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) { 143 switch (IT->getIntrinsicID()) { 144 case llvm::Intrinsic::lifetime_start: 145 case llvm::Intrinsic::lifetime_end: 146 BB->getInstList().erase(InstIt); 147 break; 148 default: 149 break; 150 } 151 } 152 153 InstIt = NextIt; 154 } 155 } 156 } 157 158 static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT, 159 ScalarEvolution &SE, RegionInfo &RI) { 160 // Check if we created an isl_ast root node, otherwise exit. 161 isl_ast_node *AstRoot = AI.getAst(); 162 if (!AstRoot) 163 return false; 164 165 auto &DL = S.getFunction().getParent()->getDataLayout(); 166 Region *R = &S.getRegion(); 167 assert(!R->isTopLevelRegion() && "Top level regions are not supported"); 168 169 ScopAnnotator Annotator; 170 171 simplifyRegion(R, &DT, &LI, &RI); 172 assert(R->isSimple()); 173 BasicBlock *EnteringBB = S.getEnteringBlock(); 174 assert(EnteringBB); 175 PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); 176 177 // Only build the run-time condition and parameters _after_ having 178 // introduced the conditional branch. This is important as the conditional 179 // branch will guard the original scop from new induction variables that 180 // the SCEVExpander may introduce while code generating the parameters and 181 // which may introduce scalar dependences that prevent us from correctly 182 // code generating this scop. 183 BBPair StartExitBlocks = 184 std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI)); 185 BasicBlock *StartBlock = std::get<0>(StartExitBlocks); 186 BasicBlock *ExitBlock = std::get<1>(StartExitBlocks); 187 188 removeLifetimeMarkers(R); 189 auto *SplitBlock = StartBlock->getSinglePredecessor(); 190 191 IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock); 192 193 // All arrays must have their base pointers known before 194 // ScopAnnotator::buildAliasScopes. 195 NodeBuilder.allocateNewArrays(StartExitBlocks); 196 Annotator.buildAliasScopes(S); 197 198 if (PerfMonitoring) { 199 PerfMonitor P(S, EnteringBB->getParent()->getParent()); 200 P.initialize(); 201 P.insertRegionStart(SplitBlock->getTerminator()); 202 203 BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor(); 204 P.insertRegionEnd(MergeBlock->getTerminator()); 205 } 206 207 // First generate code for the hoisted invariant loads and transitively the 208 // parameters they reference. Afterwards, for the remaining parameters that 209 // might reference the hoisted loads. Finally, build the runtime check 210 // that might reference both hoisted loads as well as parameters. 211 // If the hoisting fails we have to bail and execute the original code. 212 Builder.SetInsertPoint(SplitBlock->getTerminator()); 213 if (!NodeBuilder.preloadInvariantLoads()) { 214 215 // Patch the introduced branch condition to ensure that we always execute 216 // the original SCoP. 217 auto *FalseI1 = Builder.getFalse(); 218 auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator(); 219 SplitBBTerm->setOperand(0, FalseI1); 220 221 // Since the other branch is hence ignored we mark it as unreachable and 222 // adjust the dominator tree accordingly. 223 auto *ExitingBlock = StartBlock->getUniqueSuccessor(); 224 assert(ExitingBlock); 225 auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); 226 assert(MergeBlock); 227 markBlockUnreachable(*StartBlock, Builder); 228 markBlockUnreachable(*ExitingBlock, Builder); 229 auto *ExitingBB = S.getExitingBlock(); 230 assert(ExitingBB); 231 DT.changeImmediateDominator(MergeBlock, ExitingBB); 232 DT.eraseNode(ExitingBlock); 233 234 isl_ast_node_free(AstRoot); 235 } else { 236 NodeBuilder.addParameters(S.getContext().release()); 237 Value *RTC = NodeBuilder.createRTC(AI.getRunCondition()); 238 239 Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); 240 241 // Explicitly set the insert point to the end of the block to avoid that a 242 // split at the builder's current 243 // insert position would move the malloc calls to the wrong BasicBlock. 244 // Ideally we would just split the block during allocation of the new 245 // arrays, but this would break the assumption that there are no blocks 246 // between polly.start and polly.exiting (at this point). 247 Builder.SetInsertPoint(StartBlock->getTerminator()); 248 249 NodeBuilder.create(AstRoot); 250 NodeBuilder.finalize(); 251 fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI); 252 } 253 254 Function *F = EnteringBB->getParent(); 255 verifyGeneratedFunction(S, *F, AI); 256 for (auto *SubF : NodeBuilder.getParallelSubfunctions()) 257 verifyGeneratedFunction(S, *SubF, AI); 258 259 // Mark the function such that we run additional cleanup passes on this 260 // function (e.g. mem2reg to rediscover phi nodes). 261 F->addFnAttr("polly-optimized"); 262 return true; 263 } 264 265 class CodeGeneration : public ScopPass { 266 public: 267 static char ID; 268 269 CodeGeneration() : ScopPass(ID) {} 270 271 /// The data layout used. 272 const DataLayout *DL; 273 274 /// @name The analysis passes we need to generate code. 275 /// 276 ///{ 277 LoopInfo *LI; 278 IslAstInfo *AI; 279 DominatorTree *DT; 280 ScalarEvolution *SE; 281 RegionInfo *RI; 282 ///} 283 284 /// Generate LLVM-IR for the SCoP @p S. 285 bool runOnScop(Scop &S) override { 286 // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration. 287 if (S.isToBeSkipped()) 288 return false; 289 290 AI = &getAnalysis<IslAstInfoWrapperPass>().getAI(); 291 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 292 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 293 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 294 DL = &S.getFunction().getParent()->getDataLayout(); 295 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 296 return CodeGen(S, *AI, *LI, *DT, *SE, *RI); 297 } 298 299 /// Register all analyses and transformation required. 300 void getAnalysisUsage(AnalysisUsage &AU) const override { 301 AU.addRequired<DominatorTreeWrapperPass>(); 302 AU.addRequired<IslAstInfoWrapperPass>(); 303 AU.addRequired<RegionInfoPass>(); 304 AU.addRequired<ScalarEvolutionWrapperPass>(); 305 AU.addRequired<ScopDetectionWrapperPass>(); 306 AU.addRequired<ScopInfoRegionPass>(); 307 AU.addRequired<LoopInfoWrapperPass>(); 308 309 AU.addPreserved<DependenceInfo>(); 310 311 AU.addPreserved<AAResultsWrapperPass>(); 312 AU.addPreserved<BasicAAWrapperPass>(); 313 AU.addPreserved<LoopInfoWrapperPass>(); 314 AU.addPreserved<DominatorTreeWrapperPass>(); 315 AU.addPreserved<GlobalsAAWrapperPass>(); 316 AU.addPreserved<IslAstInfoWrapperPass>(); 317 AU.addPreserved<ScopDetectionWrapperPass>(); 318 AU.addPreserved<ScalarEvolutionWrapperPass>(); 319 AU.addPreserved<SCEVAAWrapperPass>(); 320 321 // FIXME: We do not yet add regions for the newly generated code to the 322 // region tree. 323 AU.addPreserved<RegionInfoPass>(); 324 AU.addPreserved<ScopInfoRegionPass>(); 325 } 326 }; 327 } // namespace 328 329 PreservedAnalyses 330 polly::CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM, 331 ScopStandardAnalysisResults &AR, SPMUpdater &U) { 332 auto &AI = SAM.getResult<IslAstAnalysis>(S, AR); 333 if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) { 334 U.invalidateScop(S); 335 return PreservedAnalyses::none(); 336 } 337 338 return PreservedAnalyses::all(); 339 } 340 341 char CodeGeneration::ID = 1; 342 343 Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); } 344 345 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 346 "Polly - Create LLVM-IR from SCoPs", false, false); 347 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 348 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 349 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 350 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 351 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 352 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 353 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 354 "Polly - Create LLVM-IR from SCoPs", false, false) 355