1 //===------ CodeGeneration.cpp - Code generate the Scops using ISL. ----======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using the ISL code generator. 12 // 13 // The Scop describes the high level memory behaviour of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. ISL is used to generate an abstract syntax tree that reflects 16 // the updated execution order. This clast is used to create new LLVM-IR that is 17 // computationally equivalent to the original control flow region, but executes 18 // its code in the new execution order defined by the changed schedule. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/CodeGen/IslAst.h" 23 #include "polly/CodeGen/IslNodeBuilder.h" 24 #include "polly/CodeGen/PerfMonitor.h" 25 #include "polly/CodeGen/Utils.h" 26 #include "polly/DependenceInfo.h" 27 #include "polly/LinkAllPasses.h" 28 #include "polly/Options.h" 29 #include "polly/ScopInfo.h" 30 #include "polly/Support/ScopHelper.h" 31 #include "llvm/Analysis/AliasAnalysis.h" 32 #include "llvm/Analysis/BasicAliasAnalysis.h" 33 #include "llvm/Analysis/GlobalsModRef.h" 34 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 35 #include "llvm/IR/Module.h" 36 #include "llvm/IR/Verifier.h" 37 #include "llvm/Support/Debug.h" 38 39 using namespace polly; 40 using namespace llvm; 41 42 #define DEBUG_TYPE "polly-codegen" 43 44 static cl::opt<bool> Verify("polly-codegen-verify", 45 cl::desc("Verify the function generated by Polly"), 46 cl::Hidden, cl::init(false), cl::ZeroOrMore, 47 cl::cat(PollyCategory)); 48 49 static cl::opt<bool> 50 PerfMonitoring("polly-codegen-perf-monitoring", 51 cl::desc("Add run-time performance monitoring"), cl::Hidden, 52 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 53 54 namespace { 55 class CodeGeneration : public ScopPass { 56 public: 57 static char ID; 58 59 CodeGeneration() : ScopPass(ID) {} 60 61 /// The datalayout used 62 const DataLayout *DL; 63 64 /// @name The analysis passes we need to generate code. 65 /// 66 ///{ 67 LoopInfo *LI; 68 IslAstInfo *AI; 69 DominatorTree *DT; 70 ScalarEvolution *SE; 71 RegionInfo *RI; 72 ///} 73 74 void verifyGeneratedFunction(Scop &S, Function &F) { 75 if (!Verify || !verifyFunction(F, &errs())) 76 return; 77 78 DEBUG({ 79 errs() << "== ISL Codegen created an invalid function ==\n\n== The " 80 "SCoP ==\n"; 81 S.print(errs()); 82 errs() << "\n== The isl AST ==\n"; 83 AI->printScop(errs(), S); 84 errs() << "\n== The invalid function ==\n"; 85 F.print(errs()); 86 }); 87 88 llvm_unreachable("Polly generated function could not be verified. Add " 89 "-polly-codegen-verify=false to disable this assertion."); 90 } 91 92 // CodeGeneration adds a lot of BBs without updating the RegionInfo 93 // We make all created BBs belong to the scop's parent region without any 94 // nested structure to keep the RegionInfo verifier happy. 95 void fixRegionInfo(Function *F, Region *ParentRegion) { 96 for (BasicBlock &BB : *F) { 97 if (RI->getRegionFor(&BB)) 98 continue; 99 100 RI->setRegionFor(&BB, ParentRegion); 101 } 102 } 103 104 /// Mark a basic block unreachable. 105 /// 106 /// Marks the basic block @p Block unreachable by equipping it with an 107 /// UnreachableInst. 108 void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { 109 auto *OrigTerminator = Block.getTerminator(); 110 Builder.SetInsertPoint(OrigTerminator); 111 Builder.CreateUnreachable(); 112 OrigTerminator->eraseFromParent(); 113 } 114 115 /// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from 116 /// @R. 117 /// 118 /// CodeGeneration does not copy lifetime markers into the optimized SCoP, 119 /// which would leave the them only in the original path. This can transform 120 /// code such as 121 /// 122 /// llvm.lifetime.start(%p) 123 /// llvm.lifetime.end(%p) 124 /// 125 /// into 126 /// 127 /// if (RTC) { 128 /// // generated code 129 /// } else { 130 /// // original code 131 /// llvm.lifetime.start(%p) 132 /// } 133 /// llvm.lifetime.end(%p) 134 /// 135 /// The current StackColoring algorithm cannot handle if some, but not all, 136 /// paths from the end marker to the entry block cross the start marker. Same 137 /// for start markers that do not always cross the end markers. We avoid any 138 /// issues by removing all lifetime markers, even from the original code. 139 /// 140 /// A better solution could be to hoist all llvm.lifetime.start to the split 141 /// node and all llvm.lifetime.end to the merge node, which should be 142 /// conservatively correct. 143 void removeLifetimeMarkers(Region *R) { 144 for (auto *BB : R->blocks()) { 145 auto InstIt = BB->begin(); 146 auto InstEnd = BB->end(); 147 148 while (InstIt != InstEnd) { 149 auto NextIt = InstIt; 150 ++NextIt; 151 152 if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) { 153 switch (IT->getIntrinsicID()) { 154 case llvm::Intrinsic::lifetime_start: 155 case llvm::Intrinsic::lifetime_end: 156 BB->getInstList().erase(InstIt); 157 break; 158 default: 159 break; 160 } 161 } 162 163 InstIt = NextIt; 164 } 165 } 166 } 167 168 /// Generate LLVM-IR for the SCoP @p S. 169 bool runOnScop(Scop &S) override { 170 AI = &getAnalysis<IslAstInfo>(); 171 172 // Check if we created an isl_ast root node, otherwise exit. 173 isl_ast_node *AstRoot = AI->getAst(); 174 if (!AstRoot) 175 return false; 176 177 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 178 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 179 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 180 DL = &S.getFunction().getParent()->getDataLayout(); 181 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 182 Region *R = &S.getRegion(); 183 assert(!R->isTopLevelRegion() && "Top level regions are not supported"); 184 185 ScopAnnotator Annotator; 186 Annotator.buildAliasScopes(S); 187 188 simplifyRegion(R, DT, LI, RI); 189 assert(R->isSimple()); 190 BasicBlock *EnteringBB = S.getEnteringBlock(); 191 assert(EnteringBB); 192 PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); 193 194 // Only build the run-time condition and parameters _after_ having 195 // introduced the conditional branch. This is important as the conditional 196 // branch will guard the original scop from new induction variables that 197 // the SCEVExpander may introduce while code generating the parameters and 198 // which may introduce scalar dependences that prevent us from correctly 199 // code generating this scop. 200 BasicBlock *StartBlock = 201 executeScopConditionally(S, Builder.getTrue(), *DT, *RI, *LI); 202 removeLifetimeMarkers(R); 203 auto *SplitBlock = StartBlock->getSinglePredecessor(); 204 205 IslNodeBuilder NodeBuilder(Builder, Annotator, *DL, *LI, *SE, *DT, S, 206 StartBlock); 207 208 if (PerfMonitoring) { 209 PerfMonitor P(EnteringBB->getParent()->getParent()); 210 P.initialize(); 211 P.insertRegionStart(SplitBlock->getTerminator()); 212 213 BasicBlock *MergeBlock = SplitBlock->getTerminator() 214 ->getSuccessor(0) 215 ->getUniqueSuccessor() 216 ->getUniqueSuccessor(); 217 P.insertRegionEnd(MergeBlock->getTerminator()); 218 } 219 220 // First generate code for the hoisted invariant loads and transitively the 221 // parameters they reference. Afterwards, for the remaining parameters that 222 // might reference the hoisted loads. Finally, build the runtime check 223 // that might reference both hoisted loads as well as parameters. 224 // If the hoisting fails we have to bail and execute the original code. 225 Builder.SetInsertPoint(SplitBlock->getTerminator()); 226 if (!NodeBuilder.preloadInvariantLoads()) { 227 228 // Patch the introduced branch condition to ensure that we always execute 229 // the original SCoP. 230 auto *FalseI1 = Builder.getFalse(); 231 auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator(); 232 SplitBBTerm->setOperand(0, FalseI1); 233 234 // Since the other branch is hence ignored we mark it as unreachable and 235 // adjust the dominator tree accordingly. 236 auto *ExitingBlock = StartBlock->getUniqueSuccessor(); 237 assert(ExitingBlock); 238 auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); 239 assert(MergeBlock); 240 markBlockUnreachable(*StartBlock, Builder); 241 markBlockUnreachable(*ExitingBlock, Builder); 242 auto *ExitingBB = S.getExitingBlock(); 243 assert(ExitingBB); 244 DT->changeImmediateDominator(MergeBlock, ExitingBB); 245 DT->eraseNode(ExitingBlock); 246 247 isl_ast_node_free(AstRoot); 248 } else { 249 NodeBuilder.allocateNewArrays(); 250 NodeBuilder.addParameters(S.getContext()); 251 Value *RTC = NodeBuilder.createRTC(AI->getRunCondition()); 252 253 Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); 254 Builder.SetInsertPoint(&StartBlock->front()); 255 256 NodeBuilder.create(AstRoot); 257 NodeBuilder.finalize(); 258 fixRegionInfo(EnteringBB->getParent(), R->getParent()); 259 } 260 261 Function *F = EnteringBB->getParent(); 262 verifyGeneratedFunction(S, *F); 263 for (auto *SubF : NodeBuilder.getParallelSubfunctions()) 264 verifyGeneratedFunction(S, *SubF); 265 266 // Mark the function such that we run additional cleanup passes on this 267 // function (e.g. mem2reg to rediscover phi nodes). 268 F->addFnAttr("polly-optimized"); 269 270 return true; 271 } 272 273 /// Register all analyses and transformation required. 274 void getAnalysisUsage(AnalysisUsage &AU) const override { 275 AU.addRequired<DominatorTreeWrapperPass>(); 276 AU.addRequired<IslAstInfo>(); 277 AU.addRequired<RegionInfoPass>(); 278 AU.addRequired<ScalarEvolutionWrapperPass>(); 279 AU.addRequired<ScopDetection>(); 280 AU.addRequired<ScopInfoRegionPass>(); 281 AU.addRequired<LoopInfoWrapperPass>(); 282 283 AU.addPreserved<DependenceInfo>(); 284 285 AU.addPreserved<AAResultsWrapperPass>(); 286 AU.addPreserved<BasicAAWrapperPass>(); 287 AU.addPreserved<LoopInfoWrapperPass>(); 288 AU.addPreserved<DominatorTreeWrapperPass>(); 289 AU.addPreserved<GlobalsAAWrapperPass>(); 290 AU.addPreserved<IslAstInfo>(); 291 AU.addPreserved<ScopDetection>(); 292 AU.addPreserved<ScalarEvolutionWrapperPass>(); 293 AU.addPreserved<SCEVAAWrapperPass>(); 294 295 // FIXME: We do not yet add regions for the newly generated code to the 296 // region tree. 297 AU.addPreserved<RegionInfoPass>(); 298 AU.addPreserved<ScopInfoRegionPass>(); 299 } 300 }; 301 } // namespace 302 303 char CodeGeneration::ID = 1; 304 305 Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); } 306 307 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 308 "Polly - Create LLVM-IR from SCoPs", false, false); 309 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 310 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 311 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 312 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 313 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 314 INITIALIZE_PASS_DEPENDENCY(ScopDetection); 315 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 316 "Polly - Create LLVM-IR from SCoPs", false, false) 317