1 //===------ CodeGeneration.cpp - Code generate the Scops. -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it
11 // back to LLVM-IR using Cloog.
12 //
13 // The Scop describes the high level memory behaviour of a control flow region.
14 // Transformation passes can update the schedule (execution order) of statements
15 // in the Scop. Cloog is used to generate an abstract syntax tree (clast) that
16 // reflects the updated execution order. This clast is used to create new
17 // LLVM-IR that is computational equivalent to the original control flow region,
18 // but executes its code in the new execution order defined by the changed
19 // scattering.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #define DEBUG_TYPE "polly-codegen"
24 
25 #include "polly/Cloog.h"
26 #include "polly/CodeGeneration.h"
27 #include "polly/Dependences.h"
28 #include "polly/LinkAllPasses.h"
29 #include "polly/ScopInfo.h"
30 #include "polly/TempScopInfo.h"
31 #include "polly/Support/GICHelper.h"
32 #include "polly/LoopGenerators.h"
33 
34 #include "llvm/Module.h"
35 #include "llvm/ADT/SetVector.h"
36 #include "llvm/ADT/PostOrderIterator.h"
37 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/Analysis/ScalarEvolutionExpander.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/IRBuilder.h"
42 #include "llvm/Target/TargetData.h"
43 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
44 
45 #define CLOOG_INT_GMP 1
46 #include "cloog/cloog.h"
47 #include "cloog/isl/cloog.h"
48 
49 #include "isl/aff.h"
50 
51 #include <vector>
52 #include <utility>
53 
54 using namespace polly;
55 using namespace llvm;
56 
57 struct isl_set;
58 
59 namespace polly {
60 
61 bool EnablePollyVector;
62 
63 static cl::opt<bool, true>
64 Vector("enable-polly-vector",
65        cl::desc("Enable polly vector code generation"), cl::Hidden,
66        cl::location(EnablePollyVector), cl::init(false), cl::ZeroOrMore);
67 
68 static cl::opt<bool>
69 OpenMP("enable-polly-openmp",
70        cl::desc("Generate OpenMP parallel code"), cl::Hidden,
71        cl::value_desc("OpenMP code generation enabled if true"),
72        cl::init(false), cl::ZeroOrMore);
73 
74 static cl::opt<bool>
75 AtLeastOnce("enable-polly-atLeastOnce",
76        cl::desc("Give polly the hint, that every loop is executed at least"
77                 "once"), cl::Hidden,
78        cl::value_desc("OpenMP code generation enabled if true"),
79        cl::init(false), cl::ZeroOrMore);
80 
81 static cl::opt<bool>
82 Aligned("enable-polly-aligned",
83        cl::desc("Assumed aligned memory accesses."), cl::Hidden,
84        cl::value_desc("OpenMP code generation enabled if true"),
85        cl::init(false), cl::ZeroOrMore);
86 
87 typedef DenseMap<const Value*, Value*> ValueMapT;
88 typedef DenseMap<const char*, Value*> CharMapT;
89 typedef std::vector<ValueMapT> VectorValueMapT;
90 
91 class IslGenerator {
92 public:
93   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
94     Builder(Builder), IVS(IVS) {}
95   Value *generateIslInt(__isl_take isl_int Int);
96   Value *generateIslAff(__isl_take isl_aff *Aff);
97   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
98 
99 private:
100   typedef struct {
101     Value *Result;
102     class IslGenerator *Generator;
103   } IslGenInfo;
104 
105   IRBuilder<> &Builder;
106   std::vector<Value *> &IVS;
107   static int mergeIslAffValues(__isl_take isl_set *Set,
108                                __isl_take isl_aff *Aff, void *User);
109 };
110 
111 Value *IslGenerator::generateIslInt(isl_int Int) {
112   mpz_t IntMPZ;
113   mpz_init(IntMPZ);
114   isl_int_get_gmp(Int, IntMPZ);
115   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
116   mpz_clear(IntMPZ);
117   return IntValue;
118 }
119 
120 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
121   Value *Result;
122   Value *ConstValue;
123   isl_int ConstIsl;
124 
125   isl_int_init(ConstIsl);
126   isl_aff_get_constant(Aff, &ConstIsl);
127   ConstValue = generateIslInt(ConstIsl);
128   Type *Ty = Builder.getInt64Ty();
129 
130   // FIXME: We should give the constant and coefficients the right type. Here
131   // we force it into i64.
132   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
133 
134   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
135 
136   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
137          "must match the dimension of the affine space.");
138 
139   isl_int CoefficientIsl;
140   isl_int_init(CoefficientIsl);
141 
142   for (unsigned int i = 0; i < NbInputDims; ++i) {
143     Value *CoefficientValue;
144     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
145 
146     if (isl_int_is_zero(CoefficientIsl))
147       continue;
148 
149     CoefficientValue = generateIslInt(CoefficientIsl);
150     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
151     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
152     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
153     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
154   }
155 
156   isl_int_clear(CoefficientIsl);
157   isl_int_clear(ConstIsl);
158   isl_aff_free(Aff);
159 
160   return Result;
161 }
162 
163 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
164                                     __isl_take isl_aff *Aff, void *User) {
165   IslGenInfo *GenInfo = (IslGenInfo *)User;
166 
167   assert((GenInfo->Result == NULL) && "Result is already set."
168          "Currently only single isl_aff is supported");
169   assert(isl_set_plain_is_universe(Set)
170          && "Code generation failed because the set is not universe");
171 
172   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
173 
174   isl_set_free(Set);
175   return 0;
176 }
177 
178 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
179   IslGenInfo User;
180   User.Result = NULL;
181   User.Generator = this;
182   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
183   assert(User.Result && "Code generation for isl_pw_aff failed");
184 
185   isl_pw_aff_free(PwAff);
186   return User.Result;
187 }
188 
189 /// @brief Generate a new basic block for a polyhedral statement.
190 ///
191 /// The only public function exposed is generate().
192 class BlockGenerator {
193 public:
194   /// @brief Generate a new BasicBlock for a ScopStmt.
195   ///
196   /// @param Builder   The LLVM-IR Builder used to generate the statement. The
197   ///                  code is generated at the location, the Builder points to.
198   /// @param Stmt      The statement to code generate.
199   /// @param GlobalMap A map that defines for certain Values referenced from the
200   ///                  original code new Values they should be replaced with.
201   /// @param P         A reference to the pass this function is called from.
202   ///                  The pass is needed to update other analysis.
203   static void generate(IRBuilder<> &Builder, ScopStmt &Stmt,
204                        ValueMapT &GlobalMap, Pass *P) {
205     BlockGenerator Generator(Builder, Stmt, P);
206     Generator.copyBB(GlobalMap);
207   }
208 
209 protected:
210   IRBuilder<> &Builder;
211   ScopStmt &Statement;
212   Pass *P;
213 
214   BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P);
215 
216   /// @brief Get the new version of a Value.
217   ///
218   /// @param Old       The old Value.
219   /// @param BBMap     A mapping from old values to their new values
220   ///                  (for values recalculated within this basic block).
221   /// @param GlobalMap A mapping from old values to their new values
222   ///                  (for values recalculated in the new ScoP, but not
223   ///                   within this basic block).
224   ///
225   /// @returns  o The old value, if it is still valid.
226   ///           o The new value, if available.
227   ///           o NULL, if no value is found.
228   Value *getNewValue(const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap);
229 
230   void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
231                       ValueMapT &GlobalMap);
232 
233   /// @brief Get the memory access offset to be added to the base address
234   std::vector<Value*> getMemoryAccessIndex(__isl_keep isl_map *AccessRelation,
235                                            Value *BaseAddress, ValueMapT &BBMap,
236                                            ValueMapT &GlobalMap);
237 
238   /// @brief Get the new operand address according to the changed access in
239   ///        JSCOP file.
240   Value *getNewAccessOperand(__isl_keep isl_map *NewAccessRelation,
241                              Value *BaseAddress, ValueMapT &BBMap,
242                              ValueMapT &GlobalMap);
243 
244   /// @brief Generate the operand address
245   Value *generateLocationAccessed(const Instruction *Inst,
246                                   const Value *Pointer, ValueMapT &BBMap,
247                                   ValueMapT &GlobalMap);
248 
249   Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap,
250                             ValueMapT &GlobalMap);
251 
252   Value *generateScalarStore(const StoreInst *store, ValueMapT &BBMap,
253                              ValueMapT &GlobalMap);
254 
255   /// @brief Copy a single Instruction.
256   ///
257   /// This copies a single Instruction and updates references to old values
258   /// with references to new values, as defined by GlobalMap and BBMap.
259   ///
260   /// @param BBMap     A mapping from old values to their new values
261   ///                  (for values recalculated within this basic block).
262   /// @param GlobalMap A mapping from old values to their new values
263   ///                  (for values recalculated in the new ScoP, but not
264   ///                  within this basic block).
265   void copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
266                        ValueMapT &GlobalMap);
267 
268   /// @brief Copy the basic block.
269   ///
270   /// This copies the entire basic block and updates references to old values
271   /// with references to new values, as defined by GlobalMap.
272   ///
273   /// @param GlobalMap A mapping from old values to their new values
274   ///                  (for values recalculated in the new ScoP, but not
275   ///                  within this basic block).
276   void copyBB(ValueMapT &GlobalMap);
277 };
278 
279 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
280   Builder(B), Statement(Stmt), P(P) {}
281 
282 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
283                                    ValueMapT &GlobalMap) {
284   // We assume constants never change.
285   // This avoids map lookups for many calls to this function.
286   if (isa<Constant>(Old))
287     return const_cast<Value*>(Old);
288 
289   if (GlobalMap.count(Old)) {
290     Value *New = GlobalMap[Old];
291 
292     if (Old->getType()->getScalarSizeInBits()
293         < New->getType()->getScalarSizeInBits())
294       New = Builder.CreateTruncOrBitCast(New, Old->getType());
295 
296     return New;
297   }
298 
299   if (BBMap.count(Old)) {
300     return BBMap[Old];
301   }
302 
303   // 'Old' is within the original SCoP, but was not rewritten.
304   //
305   // Such values appear, if they only calculate information already available in
306   // the polyhedral description (e.g.  an induction variable increment). They
307   // can be safely ignored.
308   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
309     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
310       return NULL;
311 
312   // Everything else is probably a scop-constant value defined as global,
313   // function parameter or an instruction not within the scop.
314   return const_cast<Value*>(Old);
315 }
316 
317 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
318                                     ValueMapT &GlobalMap) {
319   Instruction *NewInst = Inst->clone();
320 
321   // Replace old operands with the new ones.
322   for (Instruction::const_op_iterator OI = Inst->op_begin(),
323        OE = Inst->op_end(); OI != OE; ++OI) {
324     Value *OldOperand = *OI;
325     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
326 
327     if (!NewOperand) {
328       assert(!isa<StoreInst>(NewInst)
329              && "Store instructions are always needed!");
330       delete NewInst;
331       return;
332     }
333 
334     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
335   }
336 
337   Builder.Insert(NewInst);
338   BBMap[Inst] = NewInst;
339 
340   if (!NewInst->getType()->isVoidTy())
341     NewInst->setName("p_" + Inst->getName());
342 }
343 
344 std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
345   __isl_keep isl_map *AccessRelation, Value *BaseAddress,
346   ValueMapT &BBMap, ValueMapT &GlobalMap) {
347 
348   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
349          && "Only single dimensional access functions supported");
350 
351   std::vector<Value *> IVS;
352   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
353     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
354     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
355     IVS.push_back(NewIV);
356   }
357 
358   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
359   IslGenerator IslGen(Builder, IVS);
360   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
361 
362   Type *Ty = Builder.getInt64Ty();
363   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
364 
365   std::vector<Value*> IndexArray;
366   Value *NullValue = Constant::getNullValue(Ty);
367   IndexArray.push_back(NullValue);
368   IndexArray.push_back(OffsetValue);
369   return IndexArray;
370 }
371 
372 Value *BlockGenerator::getNewAccessOperand(
373   __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
374   ValueMapT &BBMap, ValueMapT &GlobalMap) {
375   std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
376                                                         BaseAddress,
377                                                         BBMap, GlobalMap);
378   Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
379                                         "p_newarrayidx_");
380   return NewOperand;
381 }
382 
383 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
384                                                 const Value *Pointer,
385                                                 ValueMapT &BBMap,
386                                                 ValueMapT &GlobalMap) {
387   MemoryAccess &Access = Statement.getAccessFor(Inst);
388   isl_map *CurrentAccessRelation = Access.getAccessRelation();
389   isl_map *NewAccessRelation = Access.getNewAccessRelation();
390 
391   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
392          && "Current and new access function use different spaces");
393 
394   Value *NewPointer;
395 
396   if (!NewAccessRelation) {
397     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
398   } else {
399     Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
400     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
401                                      BBMap, GlobalMap);
402   }
403 
404   isl_map_free(CurrentAccessRelation);
405   isl_map_free(NewAccessRelation);
406   return NewPointer;
407 }
408 
409 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
410                                           ValueMapT &BBMap,
411                                           ValueMapT &GlobalMap) {
412   const Value *Pointer = Load->getPointerOperand();
413   const Instruction *Inst = dyn_cast<Instruction>(Load);
414   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
415   Value *ScalarLoad = Builder.CreateLoad(NewPointer,
416                                          Load->getName() + "_p_scalar_");
417   return ScalarLoad;
418 }
419 
420 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
421                                            ValueMapT &BBMap,
422                                            ValueMapT &GlobalMap) {
423   const Value *Pointer = Store->getPointerOperand();
424   Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
425                                                GlobalMap);
426   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
427 
428   return Builder.CreateStore(ValueOperand, NewPointer);
429 }
430 
431 void BlockGenerator::copyInstruction(const Instruction *Inst,
432                                      ValueMapT &BBMap, ValueMapT &GlobalMap) {
433   // Terminator instructions control the control flow. They are explicitly
434   // expressed in the clast and do not need to be copied.
435   if (Inst->isTerminator())
436     return;
437 
438   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
439     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
440     return;
441   }
442 
443   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
444     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
445     return;
446   }
447 
448   copyInstScalar(Inst, BBMap, GlobalMap);
449 }
450 
451 
452 void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
453   BasicBlock *BB = Statement.getBasicBlock();
454   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
455                                   Builder.GetInsertPoint(), P);
456   CopyBB->setName("polly.stmt." + BB->getName());
457   Builder.SetInsertPoint(CopyBB->begin());
458 
459   ValueMapT BBMap;
460 
461   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
462        ++II)
463       copyInstruction(II, BBMap, GlobalMap);
464 }
465 
466 /// @brief Generate a new vector basic block for a polyhedral statement.
467 ///
468 /// The only public function exposed is generate().
469 class VectorBlockGenerator : BlockGenerator {
470 public:
471   /// @brief Generate a new vector basic block for a ScoPStmt.
472   ///
473   /// This code generation is similar to the normal, scalar code generation,
474   /// except that each instruction is code generated for several vector lanes
475   /// at a time. If possible instructions are issued as actual vector
476   /// instructions, but e.g. for address calculation instructions we currently
477   /// generate scalar instructions for each vector lane.
478   ///
479   /// @param Builder    The LLVM-IR Builder used to generate the statement. The
480   ///                   code is generated at the location, the builder points
481   ///                   to.
482   /// @param Stmt       The statement to code generate.
483   /// @param GlobalMaps A vector of maps that define for certain Values
484   ///                   referenced from the original code new Values they should
485   ///                   be replaced with. Each map in the vector of maps is
486   ///                   used for one vector lane. The number of elements in the
487   ///                   vector defines the width of the generated vector
488   ///                   instructions.
489   /// @param P          A reference to the pass this function is called from.
490   ///                   The pass is needed to update other analysis.
491   static void generate(IRBuilder<> &B, ScopStmt &Stmt,
492                        VectorValueMapT &GlobalMaps, __isl_keep isl_set *Domain,
493                        Pass *P) {
494     VectorBlockGenerator Generator(B, GlobalMaps, Stmt, Domain, P);
495     Generator.copyBB();
496   }
497 
498 private:
499   // This is a vector of global value maps.  The first map is used for the first
500   // vector lane, ...
501   // Each map, contains information about Instructions in the old ScoP, which
502   // are recalculated in the new SCoP. When copying the basic block, we replace
503   // all referenes to the old instructions with their recalculated values.
504   VectorValueMapT &GlobalMaps;
505 
506   isl_set *Domain;
507 
508   VectorBlockGenerator(IRBuilder<> &B, VectorValueMapT &GlobalMaps,
509                        ScopStmt &Stmt, __isl_keep isl_set *Domain, Pass *P);
510 
511   int getVectorWidth();
512 
513   Value *getVectorValue(const Value *Old, ValueMapT &VectorMap,
514                         VectorValueMapT &ScalarMaps);
515 
516   Type *getVectorPtrTy(const Value *V, int Width);
517 
518   /// @brief Load a vector from a set of adjacent scalars
519   ///
520   /// In case a set of scalars is known to be next to each other in memory,
521   /// create a vector load that loads those scalars
522   ///
523   /// %vector_ptr= bitcast double* %p to <4 x double>*
524   /// %vec_full = load <4 x double>* %vector_ptr
525   ///
526   Value *generateStrideOneLoad(const LoadInst *Load, ValueMapT &BBMap);
527 
528   /// @brief Load a vector initialized from a single scalar in memory
529   ///
530   /// In case all elements of a vector are initialized to the same
531   /// scalar value, this value is loaded and shuffeled into all elements
532   /// of the vector.
533   ///
534   /// %splat_one = load <1 x double>* %p
535   /// %splat = shufflevector <1 x double> %splat_one, <1 x
536   ///       double> %splat_one, <4 x i32> zeroinitializer
537   ///
538   Value *generateStrideZeroLoad(const LoadInst *Load, ValueMapT &BBMap);
539 
540   /// @Load a vector from scalars distributed in memory
541   ///
542   /// In case some scalars a distributed randomly in memory. Create a vector
543   /// by loading each scalar and by inserting one after the other into the
544   /// vector.
545   ///
546   /// %scalar_1= load double* %p_1
547   /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
548   /// %scalar 2 = load double* %p_2
549   /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
550   ///
551   Value *generateUnknownStrideLoad(const LoadInst *Load,
552                                    VectorValueMapT &ScalarMaps);
553 
554   void generateLoad(const LoadInst *Load, ValueMapT &VectorMap,
555                     VectorValueMapT &ScalarMaps);
556 
557   void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &VectorMap,
558                      VectorValueMapT &ScalarMaps);
559 
560   void copyBinaryInst(const BinaryOperator *Inst, ValueMapT &VectorMap,
561                       VectorValueMapT &ScalarMaps);
562 
563   void copyStore(const StoreInst *Store, ValueMapT &VectorMap,
564                  VectorValueMapT &ScalarMaps);
565 
566   bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
567 
568   void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap,
569                        VectorValueMapT &ScalarMaps);
570 
571   void copyBB();
572 };
573 
574 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
575   VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain,
576   Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps),
577   Domain(Domain) {
578     assert(GlobalMaps.size() > 1 && "Only one vector lane found");
579     assert(Domain && "No statement domain provided");
580   }
581 
582 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
583                                             ValueMapT &VectorMap,
584                                             VectorValueMapT &ScalarMaps) {
585   if (VectorMap.count(Old))
586     return VectorMap[Old];
587 
588   int Width = getVectorWidth();
589 
590   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
591 
592   for (int Lane = 0; Lane < Width; Lane++)
593     Vector = Builder.CreateInsertElement(Vector,
594                                          getNewValue(Old,
595                                                      ScalarMaps[Lane],
596                                                      GlobalMaps[Lane]),
597                                          Builder.getInt32(Lane));
598 
599   VectorMap[Old] = Vector;
600 
601   return Vector;
602 }
603 
604 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
605   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
606   assert(PointerTy && "PointerType expected");
607 
608   Type *ScalarType = PointerTy->getElementType();
609   VectorType *VectorType = VectorType::get(ScalarType, Width);
610 
611   return PointerType::getUnqual(VectorType);
612 }
613 
614 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
615                                                    ValueMapT &BBMap) {
616   const Value *Pointer = Load->getPointerOperand();
617   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
618   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
619   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
620                                            "vector_ptr");
621   LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
622                                          Load->getName() + "_p_vec_full");
623   if (!Aligned)
624     VecLoad->setAlignment(8);
625 
626   return VecLoad;
627 }
628 
629 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
630                                                     ValueMapT &BBMap) {
631   const Value *Pointer = Load->getPointerOperand();
632   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
633   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
634   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
635                                            Load->getName() + "_p_vec_p");
636   LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
637                                            Load->getName() + "_p_splat_one");
638 
639   if (!Aligned)
640     ScalarLoad->setAlignment(8);
641 
642   Constant *SplatVector =
643     Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
644                                            getVectorWidth()));
645 
646   Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
647                                                   SplatVector,
648                                                   Load->getName()
649                                                   + "_p_splat");
650   return VectorLoad;
651 }
652 
653 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
654   VectorValueMapT &ScalarMaps) {
655   int VectorWidth = getVectorWidth();
656   const Value *Pointer = Load->getPointerOperand();
657   VectorType *VectorType = VectorType::get(
658     dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
659 
660   Value *Vector = UndefValue::get(VectorType);
661 
662   for (int i = 0; i < VectorWidth; i++) {
663     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
664     Value *ScalarLoad = Builder.CreateLoad(NewPointer,
665                                            Load->getName() + "_p_scalar_");
666     Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
667                                          Builder.getInt32(i),
668                                          Load->getName() + "_p_vec_");
669   }
670 
671   return Vector;
672 }
673 
674 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
675                                         ValueMapT &VectorMap,
676                                         VectorValueMapT &ScalarMaps) {
677   Value *NewLoad;
678 
679   MemoryAccess &Access = Statement.getAccessFor(Load);
680 
681   if (Access.isStrideZero(isl_set_copy(Domain)))
682     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
683   else if (Access.isStrideOne(isl_set_copy(Domain)))
684     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
685   else
686     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
687 
688   VectorMap[Load] = NewLoad;
689 }
690 
691 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
692                                          ValueMapT &VectorMap,
693                                          VectorValueMapT &ScalarMaps) {
694   int VectorWidth = getVectorWidth();
695   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
696                                      ScalarMaps);
697 
698   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
699 
700   const CastInst *Cast = dyn_cast<CastInst>(Inst);
701   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
702   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
703 }
704 
705 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
706                                           ValueMapT &VectorMap,
707                                           VectorValueMapT &ScalarMaps) {
708   Value *OpZero = Inst->getOperand(0);
709   Value *OpOne = Inst->getOperand(1);
710 
711   Value *NewOpZero, *NewOpOne;
712   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
713   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
714 
715   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero,
716                                        NewOpOne,
717                                        Inst->getName() + "p_vec");
718   VectorMap[Inst] = NewInst;
719 }
720 
721 void VectorBlockGenerator::copyStore(const StoreInst *Store,
722                                      ValueMapT &VectorMap,
723                                      VectorValueMapT &ScalarMaps) {
724   int VectorWidth = getVectorWidth();
725 
726   MemoryAccess &Access = Statement.getAccessFor(Store);
727 
728   const Value *Pointer = Store->getPointerOperand();
729   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
730                                    ScalarMaps);
731 
732   if (Access.isStrideOne(isl_set_copy(Domain))) {
733     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
734     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
735 
736     Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
737                                              "vector_ptr");
738     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
739 
740     if (!Aligned)
741       Store->setAlignment(8);
742   } else {
743     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
744       Value *Scalar = Builder.CreateExtractElement(Vector,
745                                                    Builder.getInt32(i));
746       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
747       Builder.CreateStore(Scalar, NewPointer);
748     }
749   }
750 }
751 
752 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
753                                              ValueMapT &VectorMap) {
754   for (Instruction::const_op_iterator OI = Inst->op_begin(),
755        OE = Inst->op_end(); OI != OE; ++OI)
756     if (VectorMap.count(*OI))
757       return true;
758   return false;
759 }
760 
761 int VectorBlockGenerator::getVectorWidth() {
762   return GlobalMaps.size();
763 }
764 
765 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
766                                            ValueMapT &VectorMap,
767                                            VectorValueMapT &ScalarMaps) {
768   // Terminator instructions control the control flow. They are explicitly
769   // expressed in the clast and do not need to be copied.
770   if (Inst->isTerminator())
771     return;
772 
773   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
774     generateLoad(Load, VectorMap, ScalarMaps);
775     return;
776   }
777 
778   if (hasVectorOperands(Inst, VectorMap)) {
779     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
780       copyStore(Store, VectorMap, ScalarMaps);
781       return;
782     }
783 
784     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
785       copyUnaryInst(Unary, VectorMap, ScalarMaps);
786       return;
787     }
788 
789     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
790       copyBinaryInst(Binary, VectorMap, ScalarMaps);
791       return;
792     }
793 
794     llvm_unreachable("Cannot issue vector code for this instruction");
795   }
796 
797   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
798     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
799 }
800 
801 void VectorBlockGenerator::copyBB() {
802   BasicBlock *BB = Statement.getBasicBlock();
803   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
804                                   Builder.GetInsertPoint(), P);
805   CopyBB->setName("polly.stmt." + BB->getName());
806   Builder.SetInsertPoint(CopyBB->begin());
807 
808   // Create two maps that store the mapping from the original instructions of
809   // the old basic block to their copies in the new basic block. Those maps
810   // are basic block local.
811   //
812   // As vector code generation is supported there is one map for scalar values
813   // and one for vector values.
814   //
815   // In case we just do scalar code generation, the vectorMap is not used and
816   // the scalarMap has just one dimension, which contains the mapping.
817   //
818   // In case vector code generation is done, an instruction may either appear
819   // in the vector map once (as it is calculating >vectorwidth< values at a
820   // time. Or (if the values are calculated using scalar operations), it
821   // appears once in every dimension of the scalarMap.
822   VectorValueMapT ScalarBlockMap(getVectorWidth());
823   ValueMapT VectorBlockMap;
824 
825   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
826        II != IE; ++II)
827       copyInstruction(II, VectorBlockMap, ScalarBlockMap);
828 }
829 
830 /// Class to generate LLVM-IR that calculates the value of a clast_expr.
831 class ClastExpCodeGen {
832   IRBuilder<> &Builder;
833   const CharMapT &IVS;
834 
835   Value *codegen(const clast_name *e, Type *Ty);
836   Value *codegen(const clast_term *e, Type *Ty);
837   Value *codegen(const clast_binary *e, Type *Ty);
838   Value *codegen(const clast_reduction *r, Type *Ty);
839 public:
840 
841   // A generator for clast expressions.
842   //
843   // @param B The IRBuilder that defines where the code to calculate the
844   //          clast expressions should be inserted.
845   // @param IVMAP A Map that translates strings describing the induction
846   //              variables to the Values* that represent these variables
847   //              on the LLVM side.
848   ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap);
849 
850   // Generates code to calculate a given clast expression.
851   //
852   // @param e The expression to calculate.
853   // @return The Value that holds the result.
854   Value *codegen(const clast_expr *e, Type *Ty);
855 };
856 
857 Value *ClastExpCodeGen::codegen(const clast_name *e, Type *Ty) {
858   CharMapT::const_iterator I = IVS.find(e->name);
859 
860   assert(I != IVS.end() && "Clast name not found");
861 
862   return Builder.CreateSExtOrBitCast(I->second, Ty);
863 }
864 
865 Value *ClastExpCodeGen::codegen(const clast_term *e, Type *Ty) {
866   APInt a = APInt_from_MPZ(e->val);
867 
868   Value *ConstOne = ConstantInt::get(Builder.getContext(), a);
869   ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty);
870 
871   if (!e->var)
872     return ConstOne;
873 
874   Value *var = codegen(e->var, Ty);
875   return Builder.CreateMul(ConstOne, var);
876 }
877 
878 Value *ClastExpCodeGen::codegen(const clast_binary *e, Type *Ty) {
879   Value *LHS = codegen(e->LHS, Ty);
880 
881   APInt RHS_AP = APInt_from_MPZ(e->RHS);
882 
883   Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP);
884   RHS = Builder.CreateSExtOrBitCast(RHS, Ty);
885 
886   switch (e->type) {
887   case clast_bin_mod:
888     return Builder.CreateSRem(LHS, RHS);
889   case clast_bin_fdiv:
890     {
891       // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
892       Value *One = ConstantInt::get(Ty, 1);
893       Value *Zero = ConstantInt::get(Ty, 0);
894       Value *Sum1 = Builder.CreateSub(LHS, RHS);
895       Value *Sum2 = Builder.CreateAdd(Sum1, One);
896       Value *isNegative = Builder.CreateICmpSLT(LHS, Zero);
897       Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS);
898       return Builder.CreateSDiv(Dividend, RHS);
899     }
900   case clast_bin_cdiv:
901     {
902       // ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d
903       Value *One = ConstantInt::get(Ty, 1);
904       Value *Zero = ConstantInt::get(Ty, 0);
905       Value *Sum1 = Builder.CreateAdd(LHS, RHS);
906       Value *Sum2 = Builder.CreateSub(Sum1, One);
907       Value *isNegative = Builder.CreateICmpSLT(LHS, Zero);
908       Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2);
909       return Builder.CreateSDiv(Dividend, RHS);
910     }
911   case clast_bin_div:
912     return Builder.CreateSDiv(LHS, RHS);
913   };
914 
915   llvm_unreachable("Unknown clast binary expression type");
916 }
917 
918 Value *ClastExpCodeGen::codegen(const clast_reduction *r, Type *Ty) {
919   assert((   r->type == clast_red_min
920              || r->type == clast_red_max
921              || r->type == clast_red_sum)
922          && "Clast reduction type not supported");
923   Value *old = codegen(r->elts[0], Ty);
924 
925   for (int i=1; i < r->n; ++i) {
926     Value *exprValue = codegen(r->elts[i], Ty);
927 
928     switch (r->type) {
929     case clast_red_min:
930       {
931         Value *cmp = Builder.CreateICmpSLT(old, exprValue);
932         old = Builder.CreateSelect(cmp, old, exprValue);
933         break;
934       }
935     case clast_red_max:
936       {
937         Value *cmp = Builder.CreateICmpSGT(old, exprValue);
938         old = Builder.CreateSelect(cmp, old, exprValue);
939         break;
940       }
941     case clast_red_sum:
942       old = Builder.CreateAdd(old, exprValue);
943       break;
944     }
945   }
946 
947   return old;
948 }
949 
950 ClastExpCodeGen::ClastExpCodeGen(IRBuilder<> &B, CharMapT &IVMap)
951   : Builder(B), IVS(IVMap) {}
952 
953 Value *ClastExpCodeGen::codegen(const clast_expr *e, Type *Ty) {
954   switch(e->type) {
955   case clast_expr_name:
956     return codegen((const clast_name *)e, Ty);
957   case clast_expr_term:
958     return codegen((const clast_term *)e, Ty);
959   case clast_expr_bin:
960     return codegen((const clast_binary *)e, Ty);
961   case clast_expr_red:
962     return codegen((const clast_reduction *)e, Ty);
963   }
964 
965   llvm_unreachable("Unknown clast expression!");
966 }
967 
968 class ClastStmtCodeGen {
969 public:
970   const std::vector<std::string> &getParallelLoops();
971 
972 private:
973   // The Scop we code generate.
974   Scop *S;
975   Pass *P;
976 
977   // The Builder specifies the current location to code generate at.
978   IRBuilder<> &Builder;
979 
980   // Map the Values from the old code to their counterparts in the new code.
981   ValueMapT ValueMap;
982 
983   // clastVars maps from the textual representation of a clast variable to its
984   // current *Value. clast variables are scheduling variables, original
985   // induction variables or parameters. They are used either in loop bounds or
986   // to define the statement instance that is executed.
987   //
988   //   for (s = 0; s < n + 3; ++i)
989   //     for (t = s; t < m; ++j)
990   //       Stmt(i = s + 3 * m, j = t);
991   //
992   // {s,t,i,j,n,m} is the set of clast variables in this clast.
993   CharMapT ClastVars;
994 
995   // Codegenerator for clast expressions.
996   ClastExpCodeGen ExpGen;
997 
998   // Do we currently generate parallel code?
999   bool parallelCodeGeneration;
1000 
1001   std::vector<std::string> parallelLoops;
1002 
1003   void codegen(const clast_assignment *a);
1004 
1005   void codegen(const clast_assignment *a, ScopStmt *Statement,
1006                unsigned Dimension, int vectorDim,
1007                std::vector<ValueMapT> *VectorVMap = 0);
1008 
1009   void codegenSubstitutions(const clast_stmt *Assignment,
1010                             ScopStmt *Statement, int vectorDim = 0,
1011                             std::vector<ValueMapT> *VectorVMap = 0);
1012 
1013   void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL,
1014                const char *iterator = NULL, isl_set *scatteringDomain = 0);
1015 
1016   void codegen(const clast_block *b);
1017 
1018   /// @brief Create a classical sequential loop.
1019   void codegenForSequential(const clast_for *f);
1020 
1021   /// @brief Create OpenMP structure values.
1022   ///
1023   /// Create a list of values that has to be stored into the OpenMP subfuncition
1024   /// structure.
1025   SetVector<Value*> getOMPValues();
1026 
1027   /// @brief Update the internal structures according to a Value Map.
1028   ///
1029   /// @param VMap     A map from old to new values.
1030   /// @param Reverse  If true, we assume the update should be reversed.
1031   void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap,
1032                           bool Reverse);
1033 
1034   /// @brief Create an OpenMP parallel for loop.
1035   ///
1036   /// This loop reflects a loop as if it would have been created by an OpenMP
1037   /// statement.
1038   void codegenForOpenMP(const clast_for *f);
1039 
1040   bool isInnermostLoop(const clast_for *f);
1041 
1042   /// @brief Get the number of loop iterations for this loop.
1043   /// @param f The clast for loop to check.
1044   int getNumberOfIterations(const clast_for *f);
1045 
1046   /// @brief Create vector instructions for this loop.
1047   void codegenForVector(const clast_for *f);
1048 
1049   void codegen(const clast_for *f);
1050 
1051   Value *codegen(const clast_equation *eq);
1052 
1053   void codegen(const clast_guard *g);
1054 
1055   void codegen(const clast_stmt *stmt);
1056 
1057   void addParameters(const CloogNames *names);
1058 
1059   IntegerType *getIntPtrTy();
1060 
1061   public:
1062   void codegen(const clast_root *r);
1063 
1064   ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P);
1065 };
1066 }
1067 
1068 IntegerType *ClastStmtCodeGen::getIntPtrTy() {
1069   return P->getAnalysis<TargetData>().getIntPtrType(Builder.getContext());
1070 }
1071 
1072 const std::vector<std::string> &ClastStmtCodeGen::getParallelLoops() {
1073   return parallelLoops;
1074 }
1075 
1076 void ClastStmtCodeGen::codegen(const clast_assignment *a) {
1077   Value *V= ExpGen.codegen(a->RHS, getIntPtrTy());
1078   ClastVars[a->LHS] = V;
1079 }
1080 
1081 void ClastStmtCodeGen::codegen(const clast_assignment *A, ScopStmt *Stmt,
1082                                unsigned Dim, int VectorDim,
1083                                std::vector<ValueMapT> *VectorVMap) {
1084   const PHINode *PN;
1085   Value *RHS;
1086 
1087   assert(!A->LHS && "Statement assignments do not have left hand side");
1088 
1089   PN = Stmt->getInductionVariableForDimension(Dim);
1090   RHS = ExpGen.codegen(A->RHS, Builder.getInt64Ty());
1091   RHS = Builder.CreateTruncOrBitCast(RHS, PN->getType());
1092 
1093   if (VectorVMap)
1094     (*VectorVMap)[VectorDim][PN] = RHS;
1095 
1096   ValueMap[PN] = RHS;
1097 }
1098 
1099 void ClastStmtCodeGen::codegenSubstitutions(const clast_stmt *Assignment,
1100                                              ScopStmt *Statement, int vectorDim,
1101   std::vector<ValueMapT> *VectorVMap) {
1102   int Dimension = 0;
1103 
1104   while (Assignment) {
1105     assert(CLAST_STMT_IS_A(Assignment, stmt_ass)
1106            && "Substitions are expected to be assignments");
1107     codegen((const clast_assignment *)Assignment, Statement, Dimension,
1108             vectorDim, VectorVMap);
1109     Assignment = Assignment->next;
1110     Dimension++;
1111   }
1112 }
1113 
1114 void ClastStmtCodeGen::codegen(const clast_user_stmt *u,
1115                                std::vector<Value*> *IVS , const char *iterator,
1116                                isl_set *Domain) {
1117   ScopStmt *Statement = (ScopStmt *)u->statement->usr;
1118 
1119   if (u->substitutions)
1120     codegenSubstitutions(u->substitutions, Statement);
1121 
1122   int VectorDimensions = IVS ? IVS->size() : 1;
1123 
1124   if (VectorDimensions == 1) {
1125     BlockGenerator::generate(Builder, *Statement, ValueMap, P);
1126     return;
1127   }
1128 
1129   VectorValueMapT VectorMap(VectorDimensions);
1130 
1131   if (IVS) {
1132     assert (u->substitutions && "Substitutions expected!");
1133     int i = 0;
1134     for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end();
1135          II != IE; ++II) {
1136       ClastVars[iterator] = *II;
1137       codegenSubstitutions(u->substitutions, Statement, i, &VectorMap);
1138       i++;
1139     }
1140   }
1141 
1142   VectorBlockGenerator::generate(Builder, *Statement, VectorMap, Domain, P);
1143 }
1144 
1145 void ClastStmtCodeGen::codegen(const clast_block *b) {
1146   if (b->body)
1147     codegen(b->body);
1148 }
1149 
1150 void ClastStmtCodeGen::codegenForSequential(const clast_for *f) {
1151   Value *LowerBound, *UpperBound, *IV, *Stride;
1152   BasicBlock *AfterBB;
1153   Type *IntPtrTy = getIntPtrTy();
1154 
1155   LowerBound = ExpGen.codegen(f->LB, IntPtrTy);
1156   UpperBound = ExpGen.codegen(f->UB, IntPtrTy);
1157   Stride = Builder.getInt(APInt_from_MPZ(f->stride));
1158 
1159   IV = createLoop(LowerBound, UpperBound, Stride, &Builder, P, &AfterBB);
1160 
1161   // Add loop iv to symbols.
1162   ClastVars[f->iterator] = IV;
1163 
1164   if (f->body)
1165     codegen(f->body);
1166 
1167   // Loop is finished, so remove its iv from the live symbols.
1168   ClastVars.erase(f->iterator);
1169   Builder.SetInsertPoint(AfterBB->begin());
1170 }
1171 
1172 SetVector<Value*> ClastStmtCodeGen::getOMPValues() {
1173   SetVector<Value*> Values;
1174 
1175   // The clast variables
1176   for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
1177        I != E; I++)
1178     Values.insert(I->second);
1179 
1180   // The memory reference base addresses
1181   for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) {
1182     ScopStmt *Stmt = *SI;
1183     for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(),
1184          E = Stmt->memacc_end(); I != E; ++I) {
1185       Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr());
1186       Values.insert((BaseAddr));
1187     }
1188   }
1189 
1190   return Values;
1191 }
1192 
1193 void ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap,
1194                                           bool Reverse) {
1195   std::set<Value*> Inserted;
1196 
1197   if (Reverse) {
1198     OMPGenerator::ValueToValueMapTy ReverseMap;
1199 
1200     for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end();
1201          I != E; ++I)
1202        ReverseMap.insert(std::make_pair(I->second, I->first));
1203 
1204     for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
1205          I != E; I++) {
1206       ClastVars[I->first] = ReverseMap[I->second];
1207       Inserted.insert(I->second);
1208     }
1209 
1210     /// FIXME: At the moment we do not reverse the update of the ValueMap.
1211     ///        This is incomplet, but the failure should be obvious, such that
1212     ///        we can fix this later.
1213     return;
1214   }
1215 
1216   for (CharMapT::iterator I = ClastVars.begin(), E = ClastVars.end();
1217        I != E; I++) {
1218     ClastVars[I->first] = VMap[I->second];
1219     Inserted.insert(I->second);
1220   }
1221 
1222   for (std::map<Value*, Value*>::iterator I = VMap.begin(), E = VMap.end();
1223        I != E; ++I) {
1224     if (Inserted.count(I->first))
1225       continue;
1226 
1227     ValueMap[I->first] = I->second;
1228   }
1229 }
1230 
1231 static void clearDomtree(Function *F, DominatorTree &DT) {
1232   DomTreeNode *N = DT.getNode(&F->getEntryBlock());
1233   std::vector<BasicBlock*> Nodes;
1234   for (po_iterator<DomTreeNode*> I = po_begin(N), E = po_end(N); I != E; ++I)
1235     Nodes.push_back(I->getBlock());
1236 
1237   for (std::vector<BasicBlock*>::iterator I = Nodes.begin(), E = Nodes.end();
1238        I != E; ++I)
1239     DT.eraseNode(*I);
1240 }
1241 
1242 void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) {
1243   Value *Stride, *LB, *UB, *IV;
1244   BasicBlock::iterator LoopBody;
1245   IntegerType *IntPtrTy = getIntPtrTy();
1246   SetVector<Value*> Values;
1247   OMPGenerator::ValueToValueMapTy VMap;
1248   OMPGenerator OMPGen(Builder, P);
1249 
1250   Stride = Builder.getInt(APInt_from_MPZ(For->stride));
1251   Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy);
1252   LB = ExpGen.codegen(For->LB, IntPtrTy);
1253   UB = ExpGen.codegen(For->UB, IntPtrTy);
1254 
1255   Values = getOMPValues();
1256 
1257   IV = OMPGen.createParallelLoop(LB, UB, Stride, Values, VMap, &LoopBody);
1258   BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
1259   Builder.SetInsertPoint(LoopBody);
1260 
1261   updateWithValueMap(VMap, /* reverse */ false);
1262   ClastVars[For->iterator] = IV;
1263 
1264   if (For->body)
1265     codegen(For->body);
1266 
1267   ClastVars.erase(For->iterator);
1268   updateWithValueMap(VMap, /* reverse */ true);
1269 
1270   clearDomtree((*LoopBody).getParent()->getParent(),
1271                P->getAnalysis<DominatorTree>());
1272 
1273   Builder.SetInsertPoint(AfterLoop);
1274 }
1275 
1276 bool ClastStmtCodeGen::isInnermostLoop(const clast_for *f) {
1277   const clast_stmt *stmt = f->body;
1278 
1279   while (stmt) {
1280     if (!CLAST_STMT_IS_A(stmt, stmt_user))
1281       return false;
1282 
1283     stmt = stmt->next;
1284   }
1285 
1286   return true;
1287 }
1288 
1289 int ClastStmtCodeGen::getNumberOfIterations(const clast_for *f) {
1290   isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain));
1291   isl_set *tmp = isl_set_copy(loopDomain);
1292 
1293   // Calculate a map similar to the identity map, but with the last input
1294   // and output dimension not related.
1295   //  [i0, i1, i2, i3] -> [i0, i1, i2, o0]
1296   isl_space *Space = isl_set_get_space(loopDomain);
1297   Space = isl_space_drop_outputs(Space,
1298                                  isl_set_dim(loopDomain, isl_dim_set) - 2, 1);
1299   Space = isl_space_map_from_set(Space);
1300   isl_map *identity = isl_map_identity(Space);
1301   identity = isl_map_add_dims(identity, isl_dim_in, 1);
1302   identity = isl_map_add_dims(identity, isl_dim_out, 1);
1303 
1304   isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain);
1305   map = isl_map_intersect(map, identity);
1306 
1307   isl_map *lexmax = isl_map_lexmax(isl_map_copy(map));
1308   isl_map *lexmin = isl_map_lexmin(map);
1309   isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin));
1310 
1311   isl_set *elements = isl_map_range(sub);
1312 
1313   if (!isl_set_is_singleton(elements)) {
1314     isl_set_free(elements);
1315     return -1;
1316   }
1317 
1318   isl_point *p = isl_set_sample_point(elements);
1319 
1320   isl_int v;
1321   isl_int_init(v);
1322   isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v);
1323   int numberIterations = isl_int_get_si(v);
1324   isl_int_clear(v);
1325   isl_point_free(p);
1326 
1327   return (numberIterations) / isl_int_get_si(f->stride) + 1;
1328 }
1329 
1330 void ClastStmtCodeGen::codegenForVector(const clast_for *F) {
1331   DEBUG(dbgs() << "Vectorizing loop '" << F->iterator << "'\n";);
1332   int VectorWidth = getNumberOfIterations(F);
1333 
1334   Value *LB = ExpGen.codegen(F->LB, getIntPtrTy());
1335 
1336   APInt Stride = APInt_from_MPZ(F->stride);
1337   IntegerType *LoopIVType = dyn_cast<IntegerType>(LB->getType());
1338   Stride =  Stride.zext(LoopIVType->getBitWidth());
1339   Value *StrideValue = ConstantInt::get(LoopIVType, Stride);
1340 
1341   std::vector<Value*> IVS(VectorWidth);
1342   IVS[0] = LB;
1343 
1344   for (int i = 1; i < VectorWidth; i++)
1345     IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv");
1346 
1347   isl_set *Domain = isl_set_from_cloog_domain(F->domain);
1348 
1349   // Add loop iv to symbols.
1350   ClastVars[F->iterator] = LB;
1351 
1352   const clast_stmt *Stmt = F->body;
1353 
1354   while (Stmt) {
1355     codegen((const clast_user_stmt *)Stmt, &IVS, F->iterator,
1356             isl_set_copy(Domain));
1357     Stmt = Stmt->next;
1358   }
1359 
1360   // Loop is finished, so remove its iv from the live symbols.
1361   isl_set_free(Domain);
1362   ClastVars.erase(F->iterator);
1363 }
1364 
1365 void ClastStmtCodeGen::codegen(const clast_for *f) {
1366   if ((Vector || OpenMP) && P->getAnalysis<Dependences>().isParallelFor(f)) {
1367     if (Vector && isInnermostLoop(f) && (-1 != getNumberOfIterations(f))
1368         && (getNumberOfIterations(f) <= 16)) {
1369       codegenForVector(f);
1370       return;
1371     }
1372 
1373     if (OpenMP && !parallelCodeGeneration) {
1374       parallelCodeGeneration = true;
1375       parallelLoops.push_back(f->iterator);
1376       codegenForOpenMP(f);
1377       parallelCodeGeneration = false;
1378       return;
1379     }
1380   }
1381 
1382   codegenForSequential(f);
1383 }
1384 
1385 Value *ClastStmtCodeGen::codegen(const clast_equation *eq) {
1386   Value *LHS = ExpGen.codegen(eq->LHS, getIntPtrTy());
1387   Value *RHS = ExpGen.codegen(eq->RHS, getIntPtrTy());
1388   CmpInst::Predicate P;
1389 
1390   if (eq->sign == 0)
1391     P = ICmpInst::ICMP_EQ;
1392   else if (eq->sign > 0)
1393     P = ICmpInst::ICMP_SGE;
1394   else
1395     P = ICmpInst::ICMP_SLE;
1396 
1397   return Builder.CreateICmp(P, LHS, RHS);
1398 }
1399 
1400 void ClastStmtCodeGen::codegen(const clast_guard *g) {
1401   Function *F = Builder.GetInsertBlock()->getParent();
1402   LLVMContext &Context = F->getContext();
1403 
1404   BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
1405                                       Builder.GetInsertPoint(), P);
1406   CondBB->setName("polly.cond");
1407   BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), P);
1408   MergeBB->setName("polly.merge");
1409   BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
1410 
1411   DominatorTree &DT = P->getAnalysis<DominatorTree>();
1412   DT.addNewBlock(ThenBB, CondBB);
1413   DT.changeImmediateDominator(MergeBB, CondBB);
1414 
1415   CondBB->getTerminator()->eraseFromParent();
1416 
1417   Builder.SetInsertPoint(CondBB);
1418 
1419   Value *Predicate = codegen(&(g->eq[0]));
1420 
1421   for (int i = 1; i < g->n; ++i) {
1422     Value *TmpPredicate = codegen(&(g->eq[i]));
1423     Predicate = Builder.CreateAnd(Predicate, TmpPredicate);
1424   }
1425 
1426   Builder.CreateCondBr(Predicate, ThenBB, MergeBB);
1427   Builder.SetInsertPoint(ThenBB);
1428   Builder.CreateBr(MergeBB);
1429   Builder.SetInsertPoint(ThenBB->begin());
1430 
1431   codegen(g->then);
1432 
1433   Builder.SetInsertPoint(MergeBB->begin());
1434 }
1435 
1436 void ClastStmtCodeGen::codegen(const clast_stmt *stmt) {
1437   if	    (CLAST_STMT_IS_A(stmt, stmt_root))
1438     assert(false && "No second root statement expected");
1439   else if (CLAST_STMT_IS_A(stmt, stmt_ass))
1440     codegen((const clast_assignment *)stmt);
1441   else if (CLAST_STMT_IS_A(stmt, stmt_user))
1442     codegen((const clast_user_stmt *)stmt);
1443   else if (CLAST_STMT_IS_A(stmt, stmt_block))
1444     codegen((const clast_block *)stmt);
1445   else if (CLAST_STMT_IS_A(stmt, stmt_for))
1446     codegen((const clast_for *)stmt);
1447   else if (CLAST_STMT_IS_A(stmt, stmt_guard))
1448     codegen((const clast_guard *)stmt);
1449 
1450   if (stmt->next)
1451     codegen(stmt->next);
1452 }
1453 
1454 void ClastStmtCodeGen::addParameters(const CloogNames *names) {
1455   SCEVExpander Rewriter(P->getAnalysis<ScalarEvolution>(), "polly");
1456 
1457   int i = 0;
1458   for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end();
1459        PI != PE; ++PI) {
1460     assert(i < names->nb_parameters && "Not enough parameter names");
1461 
1462     const SCEV *Param = *PI;
1463     Type *Ty = Param->getType();
1464 
1465     Instruction *insertLocation = --(Builder.GetInsertBlock()->end());
1466     Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation);
1467     ClastVars[names->parameters[i]] = V;
1468 
1469     ++i;
1470   }
1471 }
1472 
1473 void ClastStmtCodeGen::codegen(const clast_root *r) {
1474   addParameters(r->names);
1475 
1476   parallelCodeGeneration = false;
1477 
1478   const clast_stmt *stmt = (const clast_stmt*) r;
1479   if (stmt->next)
1480     codegen(stmt->next);
1481 }
1482 
1483 ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, IRBuilder<> &B, Pass *P) :
1484     S(scop), P(P), Builder(B), ExpGen(Builder, ClastVars) {}
1485 
1486 namespace {
1487 class CodeGeneration : public ScopPass {
1488   Region *region;
1489   Scop *S;
1490   DominatorTree *DT;
1491   RegionInfo *RI;
1492 
1493   std::vector<std::string> parallelLoops;
1494 
1495   public:
1496   static char ID;
1497 
1498   CodeGeneration() : ScopPass(ID) {}
1499 
1500   // Split the entry edge of the region and generate a new basic block on this
1501   // edge. This function also updates ScopInfo and RegionInfo.
1502   //
1503   // @param region The region where the entry edge will be splitted.
1504   BasicBlock *splitEdgeAdvanced(Region *region) {
1505     BasicBlock *newBlock;
1506     BasicBlock *splitBlock;
1507 
1508     newBlock = SplitEdge(region->getEnteringBlock(), region->getEntry(), this);
1509 
1510     if (DT->dominates(region->getEntry(), newBlock)) {
1511       BasicBlock *OldBlock = region->getEntry();
1512       std::string OldName = OldBlock->getName();
1513 
1514       // Update ScopInfo.
1515       for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI)
1516         if ((*SI)->getBasicBlock() == OldBlock) {
1517           (*SI)->setBasicBlock(newBlock);
1518           break;
1519         }
1520 
1521       // Update RegionInfo.
1522       splitBlock = OldBlock;
1523       OldBlock->setName("polly.split");
1524       newBlock->setName(OldName);
1525       region->replaceEntry(newBlock);
1526       RI->setRegionFor(newBlock, region);
1527     } else {
1528       RI->setRegionFor(newBlock, region->getParent());
1529       splitBlock = newBlock;
1530     }
1531 
1532     return splitBlock;
1533   }
1534 
1535   // Create a split block that branches either to the old code or to a new basic
1536   // block where the new code can be inserted.
1537   //
1538   // @param Builder A builder that will be set to point to a basic block, where
1539   //                the new code can be generated.
1540   // @return The split basic block.
1541   BasicBlock *addSplitAndStartBlock(IRBuilder<> *Builder) {
1542     BasicBlock *StartBlock, *SplitBlock;
1543 
1544     SplitBlock = splitEdgeAdvanced(region);
1545     SplitBlock->setName("polly.split_new_and_old");
1546     Function *F = SplitBlock->getParent();
1547     StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F);
1548     SplitBlock->getTerminator()->eraseFromParent();
1549     Builder->SetInsertPoint(SplitBlock);
1550     Builder->CreateCondBr(Builder->getTrue(), StartBlock, region->getEntry());
1551     DT->addNewBlock(StartBlock, SplitBlock);
1552     Builder->SetInsertPoint(StartBlock);
1553     return SplitBlock;
1554   }
1555 
1556   // Merge the control flow of the newly generated code with the existing code.
1557   //
1558   // @param SplitBlock The basic block where the control flow was split between
1559   //                   old and new version of the Scop.
1560   // @param Builder    An IRBuilder that points to the last instruction of the
1561   //                   newly generated code.
1562   void mergeControlFlow(BasicBlock *SplitBlock, IRBuilder<> *Builder) {
1563     BasicBlock *MergeBlock;
1564     Region *R = region;
1565 
1566     if (R->getExit()->getSinglePredecessor())
1567       // No splitEdge required.  A block with a single predecessor cannot have
1568       // PHI nodes that would complicate life.
1569       MergeBlock = R->getExit();
1570     else {
1571       MergeBlock = SplitEdge(R->getExitingBlock(), R->getExit(), this);
1572       // SplitEdge will never split R->getExit(), as R->getExit() has more than
1573       // one predecessor. Hence, mergeBlock is always a newly generated block.
1574       R->replaceExit(MergeBlock);
1575     }
1576 
1577     Builder->CreateBr(MergeBlock);
1578     MergeBlock->setName("polly.merge_new_and_old");
1579 
1580     if (DT->dominates(SplitBlock, MergeBlock))
1581       DT->changeImmediateDominator(MergeBlock, SplitBlock);
1582   }
1583 
1584   bool runOnScop(Scop &scop) {
1585     S = &scop;
1586     region = &S->getRegion();
1587     DT = &getAnalysis<DominatorTree>();
1588     RI = &getAnalysis<RegionInfo>();
1589 
1590     parallelLoops.clear();
1591 
1592     assert(region->isSimple() && "Only simple regions are supported");
1593 
1594     // In the CFG the optimized code of the SCoP is generated next to the
1595     // original code. Both the new and the original version of the code remain
1596     // in the CFG. A branch statement decides which version is executed.
1597     // For now, we always execute the new version (the old one is dead code
1598     // eliminated by the cleanup passes). In the future we may decide to execute
1599     // the new version only if certain run time checks succeed. This will be
1600     // useful to support constructs for which we cannot prove all assumptions at
1601     // compile time.
1602     //
1603     // Before transformation:
1604     //
1605     //                        bb0
1606     //                         |
1607     //                     orig_scop
1608     //                         |
1609     //                        bb1
1610     //
1611     // After transformation:
1612     //                        bb0
1613     //                         |
1614     //                  polly.splitBlock
1615     //                     /       \.
1616     //                     |     startBlock
1617     //                     |        |
1618     //               orig_scop   new_scop
1619     //                     \      /
1620     //                      \    /
1621     //                        bb1 (joinBlock)
1622     IRBuilder<> builder(region->getEntry());
1623 
1624     // The builder will be set to startBlock.
1625     BasicBlock *splitBlock = addSplitAndStartBlock(&builder);
1626     BasicBlock *StartBlock = builder.GetInsertBlock();
1627 
1628     mergeControlFlow(splitBlock, &builder);
1629     builder.SetInsertPoint(StartBlock->begin());
1630 
1631     ClastStmtCodeGen CodeGen(S, builder, this);
1632     CloogInfo &C = getAnalysis<CloogInfo>();
1633     CodeGen.codegen(C.getClast());
1634 
1635     parallelLoops.insert(parallelLoops.begin(),
1636                          CodeGen.getParallelLoops().begin(),
1637                          CodeGen.getParallelLoops().end());
1638 
1639     return true;
1640   }
1641 
1642   virtual void printScop(raw_ostream &OS) const {
1643     for (std::vector<std::string>::const_iterator PI = parallelLoops.begin(),
1644          PE = parallelLoops.end(); PI != PE; ++PI)
1645       OS << "Parallel loop with iterator '" << *PI << "' generated\n";
1646   }
1647 
1648   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
1649     AU.addRequired<CloogInfo>();
1650     AU.addRequired<Dependences>();
1651     AU.addRequired<DominatorTree>();
1652     AU.addRequired<RegionInfo>();
1653     AU.addRequired<ScalarEvolution>();
1654     AU.addRequired<ScopDetection>();
1655     AU.addRequired<ScopInfo>();
1656     AU.addRequired<TargetData>();
1657 
1658     AU.addPreserved<CloogInfo>();
1659     AU.addPreserved<Dependences>();
1660 
1661     // FIXME: We do not create LoopInfo for the newly generated loops.
1662     AU.addPreserved<LoopInfo>();
1663     AU.addPreserved<DominatorTree>();
1664     AU.addPreserved<ScopDetection>();
1665     AU.addPreserved<ScalarEvolution>();
1666 
1667     // FIXME: We do not yet add regions for the newly generated code to the
1668     //        region tree.
1669     AU.addPreserved<RegionInfo>();
1670     AU.addPreserved<TempScopInfo>();
1671     AU.addPreserved<ScopInfo>();
1672     AU.addPreservedID(IndependentBlocksID);
1673   }
1674 };
1675 }
1676 
1677 char CodeGeneration::ID = 1;
1678 
1679 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen",
1680                       "Polly - Create LLVM-IR from SCoPs", false, false)
1681 INITIALIZE_PASS_DEPENDENCY(CloogInfo)
1682 INITIALIZE_PASS_DEPENDENCY(Dependences)
1683 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
1684 INITIALIZE_PASS_DEPENDENCY(RegionInfo)
1685 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
1686 INITIALIZE_PASS_DEPENDENCY(ScopDetection)
1687 INITIALIZE_PASS_DEPENDENCY(TargetData)
1688 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen",
1689                       "Polly - Create LLVM-IR from SCoPs", false, false)
1690 
1691 Pass *polly::createCodeGenerationPass() {
1692   return new CodeGeneration();
1693 }
1694