1 //===------ CodeGeneration.cpp - Code generate the Scops. -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using Cloog. 12 // 13 // The Scop describes the high level memory behaviour of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. Cloog is used to generate an abstract syntax tree (clast) that 16 // reflects the updated execution order. This clast is used to create new 17 // LLVM-IR that is computational equivalent to the original control flow region, 18 // but executes its code in the new execution order defined by the changed 19 // scattering. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "polly-codegen" 24 25 #include "polly/Cloog.h" 26 #include "polly/Dependences.h" 27 #include "polly/LinkAllPasses.h" 28 #include "polly/ScopInfo.h" 29 #include "polly/TempScopInfo.h" 30 #include "polly/CodeGen/CodeGeneration.h" 31 #include "polly/CodeGen/BlockGenerators.h" 32 #include "polly/CodeGen/LoopGenerators.h" 33 #include "polly/Support/GICHelper.h" 34 35 #include "llvm/Module.h" 36 #include "llvm/ADT/SetVector.h" 37 #include "llvm/ADT/PostOrderIterator.h" 38 #include "llvm/Analysis/LoopInfo.h" 39 #include "llvm/Analysis/ScalarEvolutionExpander.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Target/TargetData.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 45 #define CLOOG_INT_GMP 1 46 #include "cloog/cloog.h" 47 #include "cloog/isl/cloog.h" 48 49 #include "isl/aff.h" 50 51 #include <vector> 52 #include <utility> 53 54 using namespace polly; 55 using namespace llvm; 56 57 struct isl_set; 58 59 namespace polly { 60 61 bool EnablePollyVector; 62 63 static cl::opt<bool, true> 64 Vector("enable-polly-vector", 65 cl::desc("Enable polly vector code generation"), cl::Hidden, 66 cl::location(EnablePollyVector), cl::init(false), cl::ZeroOrMore); 67 68 static cl::opt<bool> 69 OpenMP("enable-polly-openmp", 70 cl::desc("Generate OpenMP parallel code"), cl::Hidden, 71 cl::value_desc("OpenMP code generation enabled if true"), 72 cl::init(false), cl::ZeroOrMore); 73 74 static cl::opt<bool> 75 AtLeastOnce("enable-polly-atLeastOnce", 76 cl::desc("Give polly the hint, that every loop is executed at least" 77 "once"), cl::Hidden, 78 cl::value_desc("OpenMP code generation enabled if true"), 79 cl::init(false), cl::ZeroOrMore); 80 81 typedef DenseMap<const char*, Value*> CharMapT; 82 83 /// Class to generate LLVM-IR that calculates the value of a clast_expr. 84 class ClastExpCodeGen { 85 IRBuilder<> &Builder; 86 const CharMapT &IVS; 87 88 Value *codegen(const clast_name *e, Type *Ty); 89 Value *codegen(const clast_term *e, Type *Ty); 90 Value *codegen(const clast_binary *e, Type *Ty); 91 Value *codegen(const clast_reduction *r, Type *Ty); 92 public: 93 94 // A generator for clast expressions. 95 // 96 // @param B The IRBuilder that defines where the code to calculate the 97 // clast expressions should be inserted. 98 // @param IVMAP A Map that translates strings describing the induction 99 // variables to the Values* that represent these variables 100 // on the LLVM side. 101 ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap); 102 103 // Generates code to calculate a given clast expression. 104 // 105 // @param e The expression to calculate. 106 // @return The Value that holds the result. 107 Value *codegen(const clast_expr *e, Type *Ty); 108 }; 109 110 Value *ClastExpCodeGen::codegen(const clast_name *e, Type *Ty) { 111 CharMapT::const_iterator I = IVS.find(e->name); 112 113 assert(I != IVS.end() && "Clast name not found"); 114 115 return Builder.CreateSExtOrBitCast(I->second, Ty); 116 } 117 118 Value *ClastExpCodeGen::codegen(const clast_term *e, Type *Ty) { 119 APInt a = APInt_from_MPZ(e->val); 120 121 Value *ConstOne = ConstantInt::get(Builder.getContext(), a); 122 ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty); 123 124 if (!e->var) 125 return ConstOne; 126 127 Value *var = codegen(e->var, Ty); 128 return Builder.CreateMul(ConstOne, var); 129 } 130 131 Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) { 132 Value *LHS = codegen(e->LHS, Ty); 133 134 APInt RHS_AP = APInt_from_MPZ(e->RHS); 135 136 Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP); 137 RHS = Builder.CreateSExtOrBitCast(RHS, Ty); 138 139 switch (e->type) { 140 case clast_bin_mod: 141 return Builder.CreateSRem(LHS, RHS); 142 case clast_bin_fdiv: 143 { 144 // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d 145 Value *One = ConstantInt::get(Ty, 1); 146 Value *Zero = ConstantInt::get(Ty, 0); 147 Value *Sum1 = Builder.CreateSub(LHS, RHS); 148 Value *Sum2 = Builder.CreateAdd(Sum1, One); 149 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 150 Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS); 151 return Builder.CreateSDiv(Dividend, RHS); 152 } 153 case clast_bin_cdiv: 154 { 155 // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d 156 Value *One = ConstantInt::get(Ty, 1); 157 Value *Zero = ConstantInt::get(Ty, 0); 158 Value *Sum1 = Builder.CreateAdd(LHS, RHS); 159 Value *Sum2 = Builder.CreateSub(Sum1, One); 160 Value *isNegative = Builder.CreateICmpSLT(LHS, Zero); 161 Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2); 162 return Builder.CreateSDiv(Dividend, RHS); 163 } 164 case clast_bin_div: 165 return Builder.CreateSDiv(LHS, RHS); 166 }; 167 168 llvm_unreachable("Unknown clast binary expression type"); 169 } 170 171 Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) { 172 assert(( r->type == clast_red_min 173 || r->type == clast_red_max 174 || r->type == clast_red_sum) 175 && "Clast reduction type not supported"); 176 Value *old = codegen(r->elts[0], Ty); 177 178 for (int i=1; i < r->n; ++i) { 179 Value *exprValue = codegen(r->elts[i], Ty); 180 181 switch (r->type) { 182 case clast_red_min: 183 { 184 Value *cmp = Builder.CreateICmpSLT(old, exprValue); 185 old = Builder.CreateSelect(cmp, old, exprValue); 186 break; 187 } 188 case clast_red_max: 189 { 190 Value *cmp = Builder.CreateICmpSGT(old, exprValue); 191 old = Builder.CreateSelect(cmp, old, exprValue); 192 break; 193 } 194 case clast_red_sum: 195 old = Builder.CreateAdd(old, exprValue); 196 break; 197 } 198 } 199 200 return old; 201 } 202 203 ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap) 204 : Builder(B), IVS(IVMap) {} 205 206 Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) { 207 switch(e->type) { 208 case clast_expr_name: 209 return codegen((const clast_name *)e, Ty); 210 case clast_expr_term: 211 return codegen((const clast_term *)e, Ty); 212 case clast_expr_bin: 213 return codegen((const clast_binary *)e, Ty); 214 case clast_expr_red: 215 return codegen((const clast_reduction *)e, Ty); 216 } 217 218 llvm_unreachable("Unknown clast expression!"); 219 } 220 221 class ClastStmtCodeGen { 222 public: 223 const std::vector<std::string> &getParallelLoops(); 224 225 private: 226 // The Scop we code generate. 227 Scop *S; 228 Pass *P; 229 230 // The Builder specifies the current location to code generate at. 231 IRBuilder<> &Builder; 232 233 // Map the Values from the old code to their counterparts in the new code. 234 ValueMapT ValueMap; 235 236 // clastVars maps from the textual representation of a clast variable to its 237 // current *Value. clast variables are scheduling variables, original 238 // induction variables or parameters. They are used either in loop bounds or 239 // to define the statement instance that is executed. 240 // 241 // for (s = 0; s < n + 3; ++i) 242 // for (t = s; t < m; ++j) 243 // Stmt(i = s + 3 * m, j = t); 244 // 245 // {s,t,i,j,n,m} is the set of clast variables in this clast. 246 CharMapT ClastVars; 247 248 // Codegenerator for clast expressions. 249 ClastExpCodeGen ExpGen; 250 251 // Do we currently generate parallel code? 252 bool parallelCodeGeneration; 253 254 std::vector<std::string> parallelLoops; 255 256 void codegen(const clast_assignment *a); 257 258 void codegen(const clast_assignment *a, ScopStmt *Statement, 259 unsigned Dimension, int vectorDim, 260 std::vector<ValueMapT> *VectorVMap = 0); 261 262 void codegenSubstitutions(const clast_stmt *Assignment, 263 ScopStmt *Statement, int vectorDim = 0, 264 std::vector<ValueMapT> *VectorVMap = 0); 265 266 void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL, 267 const char *iterator = NULL, isl_set *scatteringDomain = 0); 268 269 void codegen(const clast_block *b); 270 271 /// @brief Create a classical sequential loop. 272 void codegenForSequential(const clast_for *f); 273 274 /// @brief Create OpenMP structure values. 275 /// 276 /// Create a list of values that has to be stored into the OpenMP subfuncition 277 /// structure. 278 SetVector<Value*> getOMPValues(); 279 280 /// @brief Update the internal structures according to a Value Map. 281 /// 282 /// @param VMap A map from old to new values. 283 /// @param Reverse If true, we assume the update should be reversed. 284 void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 285 bool Reverse); 286 287 /// @brief Create an OpenMP parallel for loop. 288 /// 289 /// This loop reflects a loop as if it would have been created by an OpenMP 290 /// statement. 291 void codegenForOpenMP(const clast_for *f); 292 293 bool isInnermostLoop(const clast_for *f); 294 295 /// @brief Get the number of loop iterations for this loop. 296 /// @param f The clast for loop to check. 297 int getNumberOfIterations(const clast_for *f); 298 299 /// @brief Create vector instructions for this loop. 300 void codegenForVector(const clast_for *f); 301 302 void codegen(const clast_for *f); 303 304 Value *codegen(const clast_equation *eq); 305 306 void codegen(const clast_guard *g); 307 308 void codegen(const clast_stmt *stmt); 309 310 void addParameters(const CloogNames *names); 311 312 IntegerType *getIntPtrTy(); 313 314 public: 315 void codegen(const clast_root *r); 316 317 ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P); 318 }; 319 } 320 321 IntegerType *ClastStmtCodeGen::getIntPtrTy() { 322 return P->getAnalysis<TargetData>().getIntPtrType(Builder.getContext()); 323 } 324 325 const std::vector<std::string> &ClastStmtCodeGen::getParallelLoops() { 326 return parallelLoops; 327 } 328 329 void ClastStmtCodeGen::codegen(const clast_assignment *a) { 330 Value *V= ExpGen.codegen(a->RHS, getIntPtrTy()); 331 ClastVars[a->LHS] = V; 332 } 333 334 void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt, 335 unsigned Dim, int VectorDim, 336 std::vector<ValueMapT> *VectorVMap) { 337 const PHINode *PN; 338 Value *RHS; 339 340 assert(!A->LHS && "Statement assignments do not have left hand side"); 341 342 PN = Stmt->getInductionVariableForDimension(Dim); 343 RHS = ExpGen.codegen(A->RHS, Builder.getInt64Ty()); 344 RHS = Builder.CreateTruncOrBitCast(RHS, PN->getType()); 345 346 if (VectorVMap) 347 (*VectorVMap)[VectorDim][PN] = RHS; 348 349 ValueMap[PN] = RHS; 350 } 351 352 void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment, 353 ScopStmt *Statement, int vectorDim, 354 std::vector<ValueMapT> *VectorVMap) { 355 int Dimension = 0; 356 357 while (Assignment) { 358 assert(CLAST_STMT_IS_A(Assignment, stmt_ass) 359 && "Substitions are expected to be assignments"); 360 codegen((const clast_assignment *)Assignment, Statement, Dimension, 361 vectorDim, VectorVMap); 362 Assignment = Assignment->next; 363 Dimension++; 364 } 365 } 366 367 void ClastStmtCodeGen::codegen(const clast_user_stmt *u, 368 std::vector<Value*> *IVS , const char *iterator, 369 isl_set *Domain) { 370 ScopStmt *Statement = (ScopStmt *)u->statement->usr; 371 372 if (u->substitutions) 373 codegenSubstitutions(u->substitutions, Statement); 374 375 int VectorDimensions = IVS ? IVS->size() : 1; 376 377 if (VectorDimensions == 1) { 378 BlockGenerator::generate(Builder, *Statement, ValueMap, P); 379 return; 380 } 381 382 VectorValueMapT VectorMap(VectorDimensions); 383 384 if (IVS) { 385 assert (u->substitutions && "Substitutions expected!"); 386 int i = 0; 387 for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end(); 388 II != IE; ++II) { 389 ClastVars[iterator] = *II; 390 codegenSubstitutions(u->substitutions, Statement, i, &VectorMap); 391 i++; 392 } 393 } 394 395 VectorBlockGenerator::generate(Builder, *Statement, VectorMap, Domain, P); 396 } 397 398 void ClastStmtCodeGen::codegen(const clast_block *b) { 399 if (b->body) 400 codegen(b->body); 401 } 402 403 void ClastStmtCodeGen::codegenForSequential(const clast_for *f) { 404 Value *LowerBound, *UpperBound, *IV, *Stride; 405 BasicBlock *AfterBB; 406 Type *IntPtrTy = getIntPtrTy(); 407 408 LowerBound = ExpGen.codegen(f->LB, IntPtrTy); 409 UpperBound = ExpGen.codegen(f->UB, IntPtrTy); 410 Stride = Builder.getInt(APInt_from_MPZ(f->stride)); 411 412 IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB); 413 414 // Add loop iv to symbols. 415 ClastVars[f->iterator] = IV; 416 417 if (f->body) 418 codegen(f->body); 419 420 // Loop is finished, so remove its iv from the live symbols. 421 ClastVars.erase(f->iterator); 422 Builder.SetInsertPoint(AfterBB->begin()); 423 } 424 425 SetVector<Value*> ClastStmtCodeGen::getOMPValues() { 426 SetVector<Value*> Values; 427 428 // The clast variables 429 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 430 I != E; I++) 431 Values.insert(I->second); 432 433 // The memory reference base addresses 434 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) { 435 ScopStmt *Stmt = *SI; 436 for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(), 437 E = Stmt->memacc_end(); I != E; ++I) { 438 Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr()); 439 Values.insert((BaseAddr)); 440 } 441 } 442 443 return Values; 444 } 445 446 void ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap, 447 bool Reverse) { 448 std::set<Value*> Inserted; 449 450 if (Reverse) { 451 OMPGenerator::ValueToValueMapTy ReverseMap; 452 453 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 454 I != E; ++I) 455 ReverseMap.insert(std::make_pair(I->second, I->first)); 456 457 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 458 I != E; I++) { 459 ClastVars[I->first] = ReverseMap[I->second]; 460 Inserted.insert(I->second); 461 } 462 463 /// FIXME: At the moment we do not reverse the update of the ValueMap. 464 /// This is incomplet, but the failure should be obvious, such that 465 /// we can fix this later. 466 return; 467 } 468 469 for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end(); 470 I != E; I++) { 471 ClastVars[I->first] = VMap[I->second]; 472 Inserted.insert(I->second); 473 } 474 475 for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end(); 476 I != E; ++I) { 477 if (Inserted.count(I->first)) 478 continue; 479 480 ValueMap[I->first] = I->second; 481 } 482 } 483 484 static void clearDomtree(Function *F, DominatorTree &DT) { 485 DomTreeNode *N = DT.getNode(&F->getEntryBlock()); 486 std::vector<BasicBlock*> Nodes; 487 for (po_iterator<DomTreeNode*> I = po_begin(N), E = po_end(N); I != E; ++I) 488 Nodes.push_back(I->getBlock()); 489 490 for (std::vector<BasicBlock*>::iterator I = Nodes.begin(), E = Nodes.end(); 491 I != E; ++I) 492 DT.eraseNode(*I); 493 } 494 495 void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) { 496 Value *Stride, *LB, *UB, *IV; 497 BasicBlock::iterator LoopBody; 498 IntegerType *IntPtrTy = getIntPtrTy(); 499 SetVector<Value*> Values; 500 OMPGenerator::ValueToValueMapTy VMap; 501 OMPGenerator OMPGen(Builder, P); 502 503 Stride = Builder.getInt(APInt_from_MPZ(For->stride)); 504 Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy); 505 LB = ExpGen.codegen(For->LB, IntPtrTy); 506 UB = ExpGen.codegen(For->UB, IntPtrTy); 507 508 Values = getOMPValues(); 509 510 IV = OMPGen.createParallelLoop(LB, UB, Stride, Values, VMap, &LoopBody); 511 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 512 Builder.SetInsertPoint(LoopBody); 513 514 updateWithValueMap(VMap, /* reverse */ false); 515 ClastVars[For->iterator] = IV; 516 517 if (For->body) 518 codegen(For->body); 519 520 ClastVars.erase(For->iterator); 521 updateWithValueMap(VMap, /* reverse */ true); 522 523 clearDomtree((*LoopBody).getParent()->getParent(), 524 P->getAnalysis<DominatorTree>()); 525 526 Builder.SetInsertPoint(AfterLoop); 527 } 528 529 bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) { 530 const clast_stmt *stmt = f->body; 531 532 while (stmt) { 533 if (!CLAST_STMT_IS_A(stmt, stmt_user)) 534 return false; 535 536 stmt = stmt->next; 537 } 538 539 return true; 540 } 541 542 int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) { 543 isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain)); 544 isl_set *tmp = isl_set_copy(loopDomain); 545 546 // Calculate a map similar to the identity map, but with the last input 547 // and output dimension not related. 548 // [i0, i1, i2, i3] -> [i0, i1, i2, o0] 549 isl_space *Space = isl_set_get_space(loopDomain); 550 Space = isl_space_drop_outputs(Space, 551 isl_set_dim(loopDomain, isl_dim_set) - 2, 1); 552 Space = isl_space_map_from_set(Space); 553 isl_map *identity = isl_map_identity(Space); 554 identity = isl_map_add_dims(identity, isl_dim_in, 1); 555 identity = isl_map_add_dims(identity, isl_dim_out, 1); 556 557 isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain); 558 map = isl_map_intersect(map, identity); 559 560 isl_map *lexmax = isl_map_lexmax(isl_map_copy(map)); 561 isl_map *lexmin = isl_map_lexmin(map); 562 isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin)); 563 564 isl_set *elements = isl_map_range(sub); 565 566 if (!isl_set_is_singleton(elements)) { 567 isl_set_free(elements); 568 return -1; 569 } 570 571 isl_point *p = isl_set_sample_point(elements); 572 573 isl_int v; 574 isl_int_init(v); 575 isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v); 576 int numberIterations = isl_int_get_si(v); 577 isl_int_clear(v); 578 isl_point_free(p); 579 580 return (numberIterations) / isl_int_get_si(f->stride) + 1; 581 } 582 583 void ClastStmtCodeGen::codegenForVector(const clast_for *F) { 584 DEBUG(dbgs() << "Vectorizing loop '" << F->iterator << "'\n";); 585 int VectorWidth = getNumberOfIterations(F); 586 587 Value *LB = ExpGen.codegen(F->LB, getIntPtrTy()); 588 589 APInt Stride = APInt_from_MPZ(F->stride); 590 IntegerType *LoopIVType = dyn_cast<IntegerType>(LB->getType()); 591 Stride = Stride.zext(LoopIVType->getBitWidth()); 592 Value *StrideValue = ConstantInt::get(LoopIVType, Stride); 593 594 std::vector<Value*> IVS(VectorWidth); 595 IVS[0] = LB; 596 597 for (int i = 1; i < VectorWidth; i++) 598 IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv"); 599 600 isl_set *Domain = isl_set_from_cloog_domain(F->domain); 601 602 // Add loop iv to symbols. 603 ClastVars[F->iterator] = LB; 604 605 const clast_stmt *Stmt = F->body; 606 607 while (Stmt) { 608 codegen((const clast_user_stmt *)Stmt, &IVS, F->iterator, 609 isl_set_copy(Domain)); 610 Stmt = Stmt->next; 611 } 612 613 // Loop is finished, so remove its iv from the live symbols. 614 isl_set_free(Domain); 615 ClastVars.erase(F->iterator); 616 } 617 618 void ClastStmtCodeGen::codegen(const clast_for *f) { 619 if ((Vector || OpenMP) && P->getAnalysis<Dependences>().isParallelFor(f)) { 620 if (Vector && isInnermostLoop(f) && (-1 != getNumberOfIterations(f)) 621 && (getNumberOfIterations(f) <= 16)) { 622 codegenForVector(f); 623 return; 624 } 625 626 if (OpenMP && !parallelCodeGeneration) { 627 parallelCodeGeneration = true; 628 parallelLoops.push_back(f->iterator); 629 codegenForOpenMP(f); 630 parallelCodeGeneration = false; 631 return; 632 } 633 } 634 635 codegenForSequential(f); 636 } 637 638 Value *ClastStmtCodeGen::codegen(const clast_equation *eq) { 639 Value *LHS = ExpGen.codegen(eq->LHS, getIntPtrTy()); 640 Value *RHS = ExpGen.codegen(eq->RHS, getIntPtrTy()); 641 CmpInst::Predicate P; 642 643 if (eq->sign == 0) 644 P = ICmpInst::ICMP_EQ; 645 else if (eq->sign > 0) 646 P = ICmpInst::ICMP_SGE; 647 else 648 P = ICmpInst::ICMP_SLE; 649 650 return Builder.CreateICmp(P, LHS, RHS); 651 } 652 653 void ClastStmtCodeGen::codegen(const clast_guard *g) { 654 Function *F = Builder.GetInsertBlock()->getParent(); 655 LLVMContext &Context = F->getContext(); 656 657 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 658 Builder.GetInsertPoint(), P); 659 CondBB->setName("polly.cond"); 660 BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P); 661 MergeBB->setName("polly.merge"); 662 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 663 664 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 665 DT.addNewBlock(ThenBB, CondBB); 666 DT.changeImmediateDominator(MergeBB, CondBB); 667 668 CondBB->getTerminator()->eraseFromParent(); 669 670 Builder.SetInsertPoint(CondBB); 671 672 Value *Predicate = codegen(&(g->eq[0])); 673 674 for (int i = 1; i < g->n; ++i) { 675 Value *TmpPredicate = codegen(&(g->eq[i])); 676 Predicate = Builder.CreateAnd(Predicate, TmpPredicate); 677 } 678 679 Builder.CreateCondBr(Predicate, ThenBB, MergeBB); 680 Builder.SetInsertPoint(ThenBB); 681 Builder.CreateBr(MergeBB); 682 Builder.SetInsertPoint(ThenBB->begin()); 683 684 codegen(g->then); 685 686 Builder.SetInsertPoint(MergeBB->begin()); 687 } 688 689 void ClastStmtCodeGen::codegen(const clast_stmt *stmt) { 690 if (CLAST_STMT_IS_A(stmt, stmt_root)) 691 assert(false && "No second root statement expected"); 692 else if (CLAST_STMT_IS_A(stmt, stmt_ass)) 693 codegen((const clast_assignment *)stmt); 694 else if (CLAST_STMT_IS_A(stmt, stmt_user)) 695 codegen((const clast_user_stmt *)stmt); 696 else if (CLAST_STMT_IS_A(stmt, stmt_block)) 697 codegen((const clast_block *)stmt); 698 else if (CLAST_STMT_IS_A(stmt, stmt_for)) 699 codegen((const clast_for *)stmt); 700 else if (CLAST_STMT_IS_A(stmt, stmt_guard)) 701 codegen((const clast_guard *)stmt); 702 703 if (stmt->next) 704 codegen(stmt->next); 705 } 706 707 void ClastStmtCodeGen::addParameters(const CloogNames *names) { 708 SCEVExpander Rewriter(P->getAnalysis<ScalarEvolution>(), "polly"); 709 710 int i = 0; 711 for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end(); 712 PI != PE; ++PI) { 713 assert(i < names->nb_parameters && "Not enough parameter names"); 714 715 const SCEV *Param = *PI; 716 Type *Ty = Param->getType(); 717 718 Instruction *insertLocation = --(Builder.GetInsertBlock()->end()); 719 Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation); 720 ClastVars[names->parameters[i]] = V; 721 722 ++i; 723 } 724 } 725 726 void ClastStmtCodeGen::codegen(const clast_root *r) { 727 addParameters(r->names); 728 729 parallelCodeGeneration = false; 730 731 const clast_stmt *stmt = (const clast_stmt*) r; 732 if (stmt->next) 733 codegen(stmt->next); 734 } 735 736 ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P) : 737 S(scop), P(P), Builder(B), ExpGen(Builder, ClastVars) {} 738 739 namespace { 740 class CodeGeneration : public ScopPass { 741 Region *region; 742 Scop *S; 743 DominatorTree *DT; 744 RegionInfo *RI; 745 746 std::vector<std::string> parallelLoops; 747 748 public: 749 static char ID; 750 751 CodeGeneration() : ScopPass(ID) {} 752 753 // Split the entry edge of the region and generate a new basic block on this 754 // edge. This function also updates ScopInfo and RegionInfo. 755 // 756 // @param region The region where the entry edge will be splitted. 757 BasicBlock *splitEdgeAdvanced(Region *region) { 758 BasicBlock *newBlock; 759 BasicBlock *splitBlock; 760 761 newBlock = SplitEdge(region->getEnteringBlock(), region->getEntry(), this); 762 763 if (DT->dominates(region->getEntry(), newBlock)) { 764 BasicBlock *OldBlock = region->getEntry(); 765 std::string OldName = OldBlock->getName(); 766 767 // Update ScopInfo. 768 for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) 769 if ((*SI)->getBasicBlock() == OldBlock) { 770 (*SI)->setBasicBlock(newBlock); 771 break; 772 } 773 774 // Update RegionInfo. 775 splitBlock = OldBlock; 776 OldBlock->setName("polly.split"); 777 newBlock->setName(OldName); 778 region->replaceEntry(newBlock); 779 RI->setRegionFor(newBlock, region); 780 } else { 781 RI->setRegionFor(newBlock, region->getParent()); 782 splitBlock = newBlock; 783 } 784 785 return splitBlock; 786 } 787 788 // Create a split block that branches either to the old code or to a new basic 789 // block where the new code can be inserted. 790 // 791 // @param Builder A builder that will be set to point to a basic block, where 792 // the new code can be generated. 793 // @return The split basic block. 794 BasicBlock *addSplitAndStartBlock(IRBuilder<> *Builder) { 795 BasicBlock *StartBlock, *SplitBlock; 796 797 SplitBlock = splitEdgeAdvanced(region); 798 SplitBlock->setName("polly.split_new_and_old"); 799 Function *F = SplitBlock->getParent(); 800 StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F); 801 SplitBlock->getTerminator()->eraseFromParent(); 802 Builder->SetInsertPoint(SplitBlock); 803 Builder->CreateCondBr(Builder->getTrue(), StartBlock, region->getEntry()); 804 DT->addNewBlock(StartBlock, SplitBlock); 805 Builder->SetInsertPoint(StartBlock); 806 return SplitBlock; 807 } 808 809 // Merge the control flow of the newly generated code with the existing code. 810 // 811 // @param SplitBlock The basic block where the control flow was split between 812 // old and new version of the Scop. 813 // @param Builder An IRBuilder that points to the last instruction of the 814 // newly generated code. 815 void mergeControlFlow(BasicBlock *SplitBlock, IRBuilder<> *Builder) { 816 BasicBlock *MergeBlock; 817 Region *R = region; 818 819 if (R->getExit()->getSinglePredecessor()) 820 // No splitEdge required. A block with a single predecessor cannot have 821 // PHI nodes that would complicate life. 822 MergeBlock = R->getExit(); 823 else { 824 MergeBlock = SplitEdge(R->getExitingBlock(), R->getExit(), this); 825 // SplitEdge will never split R->getExit(), as R->getExit() has more than 826 // one predecessor. Hence, mergeBlock is always a newly generated block. 827 R->replaceExit(MergeBlock); 828 } 829 830 Builder->CreateBr(MergeBlock); 831 MergeBlock->setName("polly.merge_new_and_old"); 832 833 if (DT->dominates(SplitBlock, MergeBlock)) 834 DT->changeImmediateDominator(MergeBlock, SplitBlock); 835 } 836 837 bool runOnScop(Scop &scop) { 838 S = &scop; 839 region = &S->getRegion(); 840 DT = &getAnalysis<DominatorTree>(); 841 RI = &getAnalysis<RegionInfo>(); 842 843 parallelLoops.clear(); 844 845 assert(region->isSimple() && "Only simple regions are supported"); 846 847 // In the CFG the optimized code of the SCoP is generated next to the 848 // original code. Both the new and the original version of the code remain 849 // in the CFG. A branch statement decides which version is executed. 850 // For now, we always execute the new version (the old one is dead code 851 // eliminated by the cleanup passes). In the future we may decide to execute 852 // the new version only if certain run time checks succeed. This will be 853 // useful to support constructs for which we cannot prove all assumptions at 854 // compile time. 855 // 856 // Before transformation: 857 // 858 // bb0 859 // | 860 // orig_scop 861 // | 862 // bb1 863 // 864 // After transformation: 865 // bb0 866 // | 867 // polly.splitBlock 868 // / \. 869 // | startBlock 870 // | | 871 // orig_scop new_scop 872 // \ / 873 // \ / 874 // bb1 (joinBlock) 875 IRBuilder<> builder(region->getEntry()); 876 877 // The builder will be set to startBlock. 878 BasicBlock *splitBlock = addSplitAndStartBlock(&builder); 879 BasicBlock *StartBlock = builder.GetInsertBlock(); 880 881 mergeControlFlow(splitBlock, &builder); 882 builder.SetInsertPoint(StartBlock->begin()); 883 884 ClastStmtCodeGen CodeGen(S, builder, this); 885 CloogInfo &C = getAnalysis<CloogInfo>(); 886 CodeGen.codegen(C.getClast()); 887 888 parallelLoops.insert(parallelLoops.begin(), 889 CodeGen.getParallelLoops().begin(), 890 CodeGen.getParallelLoops().end()); 891 892 return true; 893 } 894 895 virtual void printScop(raw_ostream &OS) const { 896 for (std::vector<std::string>::const_iterator PI = parallelLoops.begin(), 897 PE = parallelLoops.end(); PI != PE; ++PI) 898 OS << "Parallel loop with iterator '" << *PI << "' generated\n"; 899 } 900 901 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 902 AU.addRequired<CloogInfo>(); 903 AU.addRequired<Dependences>(); 904 AU.addRequired<DominatorTree>(); 905 AU.addRequired<RegionInfo>(); 906 AU.addRequired<ScalarEvolution>(); 907 AU.addRequired<ScopDetection>(); 908 AU.addRequired<ScopInfo>(); 909 AU.addRequired<TargetData>(); 910 911 AU.addPreserved<CloogInfo>(); 912 AU.addPreserved<Dependences>(); 913 914 // FIXME: We do not create LoopInfo for the newly generated loops. 915 AU.addPreserved<LoopInfo>(); 916 AU.addPreserved<DominatorTree>(); 917 AU.addPreserved<ScopDetection>(); 918 AU.addPreserved<ScalarEvolution>(); 919 920 // FIXME: We do not yet add regions for the newly generated code to the 921 // region tree. 922 AU.addPreserved<RegionInfo>(); 923 AU.addPreserved<TempScopInfo>(); 924 AU.addPreserved<ScopInfo>(); 925 AU.addPreservedID(IndependentBlocksID); 926 } 927 }; 928 } 929 930 char CodeGeneration::ID = 1; 931 932 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 933 "Polly - Create LLVM-IR from SCoPs", false, false) 934 INITIALIZE_PASS_DEPENDENCY(CloogInfo) 935 INITIALIZE_PASS_DEPENDENCY(Dependences) 936 INITIALIZE_PASS_DEPENDENCY(DominatorTree) 937 INITIALIZE_PASS_DEPENDENCY(RegionInfo) 938 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 939 INITIALIZE_PASS_DEPENDENCY(ScopDetection) 940 INITIALIZE_PASS_DEPENDENCY(TargetData) 941 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 942 "Polly - Create LLVM-IR from SCoPs", false, false) 943 944 Pass *polly::createCodeGenerationPass() { 945 return new CodeGeneration(); 946 } 947