1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "polly/CodeGen/BlockGenerators.h"
18 #include "polly/Support/GICHelper.h"
19 
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/ScalarEvolution.h"
22 #include "llvm/Analysis/ScalarEvolutionExpander.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 #include "llvm/Support/CommandLine.h"
25 
26 #include "isl/aff.h"
27 #include "isl/set.h"
28 
29 using namespace llvm;
30 using namespace polly;
31 
32 static cl::opt<bool>
33 Aligned("enable-polly-aligned",
34        cl::desc("Assumed aligned memory accesses."), cl::Hidden,
35        cl::value_desc("OpenMP code generation enabled if true"),
36        cl::init(false), cl::ZeroOrMore);
37 
38 static cl::opt<bool>
39 GroupedUnrolling("enable-polly-grouped-unroll",
40                  cl::desc("Perform grouped unrolling, but don't generate SIMD "
41                           "instuctions"), cl::Hidden, cl::init(false),
42                  cl::ZeroOrMore);
43 
44 static cl::opt<bool>
45 SCEVCodegen("polly-codegen-scev",
46             cl::desc("Use SCEV based code generation."), cl::Hidden,
47             cl::init(false), cl::ZeroOrMore);
48 
49 /// The SCEVRewriter takes a scalar evolution expression and updates the
50 /// following components:
51 ///
52 /// - SCEVUnknown
53 ///
54 ///   Values referenced in SCEVUnknown subexpressions are looked up in
55 ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
56 ///   replaced by a reference to the value they map to.
57 ///
58 /// - SCEVAddRecExpr
59 ///
60 ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
61 ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
62 ///   AddRecExpr's with more than two operands can not be translated.
63 ///
64 ///   FIXME: The comment above is not yet reality. At the moment we derive
65 ///   %Value by looking up the canonical IV of the loop and by defining
66 ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
67 ///   canonical induction variables.
68 ///
69 ///
70 /// How can this be used?
71 /// ====================
72 ///
73 /// SCEVRewrite based code generation works on virtually independent blocks.
74 /// This means we do not run the independent blocks pass to rewrite scalar
75 /// instructions, but just ignore instructions that we can analyze with scalar
76 /// evolution. Virtually independent blocks are blocks that only reference the
77 /// following values:
78 ///
79 /// o Values calculated within a basic block
80 /// o Values representable by SCEV
81 ///
82 /// During code generation we can ignore all instructions:
83 ///
84 /// - Ignore all instructions except:
85 ///   - Load instructions
86 ///   - Instructions that reference operands already calculated within the
87 ///     basic block.
88 ///   - Store instructions
89 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
90 public:
91   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
92                              ValueMapT &GlobalMap,  ValueMapT &BBMap) {
93     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
94     return Rewriter.visit(scev);
95   }
96 
97   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
98                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
99                BBMap(BBMap) {}
100 
101   const SCEV *visit(const SCEV *Expr) {
102     // FIXME: The parameter handling is incorrect.
103     //
104     // Polly does only detect parameters in Access function and loop iteration
105     // counters, but it does not get parameters that are just used by
106     // instructions within the basic block.
107     //
108     // There are two options to solve this:
109     //  o Iterate over all instructions of the SCoP and find the actual
110     //    parameters.
111     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
112     //    and detect parameters on the fly.
113     //
114     // This is especially important for OpenMP and GPGPU code generation, as
115     // they require us to detect and possibly rewrite the corresponding
116     // parameters.
117     if (isl_id *Id = S.getIdForParam(Expr)) {
118       isl_id_free(Id);
119       return Expr;
120     }
121 
122 
123     return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
124   }
125 
126   const SCEV *visitConstant(const SCEVConstant *Constant) {
127     return Constant;
128   }
129 
130   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
131     const SCEV *Operand = visit(Expr->getOperand());
132     return SE.getTruncateExpr(Operand, Expr->getType());
133   }
134 
135   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
136     const SCEV *Operand = visit(Expr->getOperand());
137     return SE.getZeroExtendExpr(Operand, Expr->getType());
138   }
139 
140   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
141     const SCEV *Operand = visit(Expr->getOperand());
142     return SE.getSignExtendExpr(Operand, Expr->getType());
143   }
144 
145   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
146     SmallVector<const SCEV *, 2> Operands;
147     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
148       const SCEV *Operand = visit(Expr->getOperand(i));
149       Operands.push_back(Operand);
150     }
151 
152     return SE.getAddExpr(Operands);
153   }
154 
155   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
156     SmallVector<const SCEV *, 2> Operands;
157     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
158       const SCEV *Operand = visit(Expr->getOperand(i));
159       Operands.push_back(Operand);
160     }
161 
162     return SE.getMulExpr(Operands);
163   }
164 
165   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
166     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
167   }
168 
169   // Return a new induction variable if the loop is within the original SCoP
170   // or NULL otherwise.
171   Value *getNewIV(const Loop *L) {
172     Value *IV = L->getCanonicalInductionVariable();
173     if (!IV)
174       return NULL;
175 
176     ValueMapT::iterator NewIV = GlobalMap.find(IV);
177 
178     if (NewIV == GlobalMap.end())
179       return NULL;
180 
181     return NewIV->second;
182   }
183 
184   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
185     Value *IV;
186 
187     IV = getNewIV(Expr->getLoop());
188 
189     // The IV is not within the GlobalMaps. So do not rewrite it and also do
190     // not rewrite any descendants.
191     if (!IV)
192       return Expr;
193 
194     assert(Expr->getNumOperands() == 2
195           && "An AddRecExpr with more than two operands can not be rewritten.");
196 
197     const SCEV *Base, *Step, *IVExpr, *Product;
198 
199     Base = visit(Expr->getStart());
200     Step = visit(Expr->getOperand(1));
201     IVExpr = SE.getUnknown(IV);
202     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
203     Product = SE.getMulExpr(Step, IVExpr);
204 
205     return SE.getAddExpr(Base, Product);
206   }
207 
208   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
209     SmallVector<const SCEV *, 2> Operands;
210     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
211       const SCEV *Operand = visit(Expr->getOperand(i));
212       Operands.push_back(Operand);
213     }
214 
215     return SE.getSMaxExpr(Operands);
216   }
217 
218   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
219     SmallVector<const SCEV *, 2> Operands;
220     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
221       const SCEV *Operand = visit(Expr->getOperand(i));
222       Operands.push_back(Operand);
223     }
224 
225     return SE.getUMaxExpr(Operands);
226   }
227 
228   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
229     Value *V = Expr->getValue();
230 
231     if (GlobalMap.count(V))
232       return SE.getUnknown(GlobalMap[V]);
233 
234     if (BBMap.count(V))
235       return SE.getUnknown(BBMap[V]);
236 
237     return Expr;
238   }
239 
240 private:
241   Scop &S;
242   ScalarEvolution &SE;
243   ValueMapT &GlobalMap;
244   ValueMapT &BBMap;
245 };
246 
247 
248 // Helper class to generate memory location.
249 namespace {
250 class IslGenerator {
251 public:
252   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
253     Builder(Builder), IVS(IVS) {}
254   Value *generateIslInt(__isl_take isl_int Int);
255   Value *generateIslAff(__isl_take isl_aff *Aff);
256   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
257 
258 private:
259   typedef struct {
260     Value *Result;
261     class IslGenerator *Generator;
262   } IslGenInfo;
263 
264   IRBuilder<> &Builder;
265   std::vector<Value *> &IVS;
266   static int mergeIslAffValues(__isl_take isl_set *Set,
267                                __isl_take isl_aff *Aff, void *User);
268 };
269 }
270 
271 
272 Value *IslGenerator::generateIslInt(isl_int Int) {
273   mpz_t IntMPZ;
274   mpz_init(IntMPZ);
275   isl_int_get_gmp(Int, IntMPZ);
276   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
277   mpz_clear(IntMPZ);
278   return IntValue;
279 }
280 
281 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
282   Value *Result;
283   Value *ConstValue;
284   isl_int ConstIsl;
285 
286   isl_int_init(ConstIsl);
287   isl_aff_get_constant(Aff, &ConstIsl);
288   ConstValue = generateIslInt(ConstIsl);
289   Type *Ty = Builder.getInt64Ty();
290 
291   // FIXME: We should give the constant and coefficients the right type. Here
292   // we force it into i64.
293   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
294 
295   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
296 
297   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
298          "must match the dimension of the affine space.");
299 
300   isl_int CoefficientIsl;
301   isl_int_init(CoefficientIsl);
302 
303   for (unsigned int i = 0; i < NbInputDims; ++i) {
304     Value *CoefficientValue;
305     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
306 
307     if (isl_int_is_zero(CoefficientIsl))
308       continue;
309 
310     CoefficientValue = generateIslInt(CoefficientIsl);
311     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
312     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
313     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
314     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
315   }
316 
317   isl_int_clear(CoefficientIsl);
318   isl_int_clear(ConstIsl);
319   isl_aff_free(Aff);
320 
321   return Result;
322 }
323 
324 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
325                                     __isl_take isl_aff *Aff, void *User) {
326   IslGenInfo *GenInfo = (IslGenInfo *)User;
327 
328   assert((GenInfo->Result == NULL) && "Result is already set."
329          "Currently only single isl_aff is supported");
330   assert(isl_set_plain_is_universe(Set)
331          && "Code generation failed because the set is not universe");
332 
333   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
334 
335   isl_set_free(Set);
336   return 0;
337 }
338 
339 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
340   IslGenInfo User;
341   User.Result = NULL;
342   User.Generator = this;
343   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
344   assert(User.Result && "Code generation for isl_pw_aff failed");
345 
346   isl_pw_aff_free(PwAff);
347   return User.Result;
348 }
349 
350 
351 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
352   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
353 
354 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
355   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
356     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
357       if (!isa<SCEVCouldNotCompute>(Scev)) {
358         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
359           if (Unknown->getValue() != Inst)
360             return true;
361         } else {
362           return true;
363         }
364       }
365 
366   return false;
367 }
368 
369 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
370                                    ValueMapT &GlobalMap) {
371   // We assume constants never change.
372   // This avoids map lookups for many calls to this function.
373   if (isa<Constant>(Old))
374     return const_cast<Value*>(Old);
375 
376   if (GlobalMap.count(Old)) {
377     Value *New = GlobalMap[Old];
378 
379     if (Old->getType()->getScalarSizeInBits()
380         < New->getType()->getScalarSizeInBits())
381       New = Builder.CreateTruncOrBitCast(New, Old->getType());
382 
383     return New;
384   }
385 
386   if (BBMap.count(Old)) {
387     return BBMap[Old];
388   }
389 
390   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
391     if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
392       if (!isa<SCEVCouldNotCompute>(Scev)) {
393         const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
394                                                     *Statement.getParent(), SE,
395                                                     GlobalMap, BBMap);
396         SCEVExpander Expander(SE, "polly");
397         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
398                                                  Builder.GetInsertPoint());
399 
400         BBMap[Old] = Expanded;
401         return Expanded;
402       }
403 
404   // 'Old' is within the original SCoP, but was not rewritten.
405   //
406   // Such values appear, if they only calculate information already available in
407   // the polyhedral description (e.g.  an induction variable increment). They
408   // can be safely ignored.
409   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
410     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
411       return NULL;
412 
413   // Everything else is probably a scop-constant value defined as global,
414   // function parameter or an instruction not within the scop.
415   return const_cast<Value*>(Old);
416 }
417 
418 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
419                                     ValueMapT &GlobalMap) {
420   Instruction *NewInst = Inst->clone();
421 
422   // Replace old operands with the new ones.
423   for (Instruction::const_op_iterator OI = Inst->op_begin(),
424        OE = Inst->op_end(); OI != OE; ++OI) {
425     Value *OldOperand = *OI;
426     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
427 
428     if (!NewOperand) {
429       assert(!isa<StoreInst>(NewInst)
430              && "Store instructions are always needed!");
431       delete NewInst;
432       return;
433     }
434 
435     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
436   }
437 
438   Builder.Insert(NewInst);
439   BBMap[Inst] = NewInst;
440 
441   if (!NewInst->getType()->isVoidTy())
442     NewInst->setName("p_" + Inst->getName());
443 }
444 
445 std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
446   __isl_keep isl_map *AccessRelation, Value *BaseAddress,
447   ValueMapT &BBMap, ValueMapT &GlobalMap) {
448 
449   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
450          && "Only single dimensional access functions supported");
451 
452   std::vector<Value *> IVS;
453   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
454     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
455     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
456     IVS.push_back(NewIV);
457   }
458 
459   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
460   IslGenerator IslGen(Builder, IVS);
461   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
462 
463   Type *Ty = Builder.getInt64Ty();
464   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
465 
466   std::vector<Value*> IndexArray;
467   Value *NullValue = Constant::getNullValue(Ty);
468   IndexArray.push_back(NullValue);
469   IndexArray.push_back(OffsetValue);
470   return IndexArray;
471 }
472 
473 Value *BlockGenerator::getNewAccessOperand(
474   __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
475   ValueMapT &BBMap, ValueMapT &GlobalMap) {
476   std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
477                                                         BaseAddress,
478                                                         BBMap, GlobalMap);
479   Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
480                                         "p_newarrayidx_");
481   return NewOperand;
482 }
483 
484 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
485                                                 const Value *Pointer,
486                                                 ValueMapT &BBMap,
487                                                 ValueMapT &GlobalMap) {
488   MemoryAccess &Access = Statement.getAccessFor(Inst);
489   isl_map *CurrentAccessRelation = Access.getAccessRelation();
490   isl_map *NewAccessRelation = Access.getNewAccessRelation();
491 
492   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
493          && "Current and new access function use different spaces");
494 
495   Value *NewPointer;
496 
497   if (!NewAccessRelation) {
498     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
499   } else {
500     Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
501     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
502                                      BBMap, GlobalMap);
503   }
504 
505   isl_map_free(CurrentAccessRelation);
506   isl_map_free(NewAccessRelation);
507   return NewPointer;
508 }
509 
510 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
511                                           ValueMapT &BBMap,
512                                           ValueMapT &GlobalMap) {
513   const Value *Pointer = Load->getPointerOperand();
514   const Instruction *Inst = dyn_cast<Instruction>(Load);
515   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
516   Value *ScalarLoad = Builder.CreateLoad(NewPointer,
517                                          Load->getName() + "_p_scalar_");
518   return ScalarLoad;
519 }
520 
521 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
522                                            ValueMapT &BBMap,
523                                            ValueMapT &GlobalMap) {
524   const Value *Pointer = Store->getPointerOperand();
525   Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
526                                                GlobalMap);
527   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
528 
529   return Builder.CreateStore(ValueOperand, NewPointer);
530 }
531 
532 void BlockGenerator::copyInstruction(const Instruction *Inst,
533                                      ValueMapT &BBMap, ValueMapT &GlobalMap) {
534   // Terminator instructions control the control flow. They are explicitly
535   // expressed in the clast and do not need to be copied.
536   if (Inst->isTerminator())
537     return;
538 
539   if (isSCEVIgnore(Inst))
540     return;
541 
542   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
543     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
544     return;
545   }
546 
547   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
548     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
549     return;
550   }
551 
552   copyInstScalar(Inst, BBMap, GlobalMap);
553 }
554 
555 
556 void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
557   BasicBlock *BB = Statement.getBasicBlock();
558   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
559                                   Builder.GetInsertPoint(), P);
560   CopyBB->setName("polly.stmt." + BB->getName());
561   Builder.SetInsertPoint(CopyBB->begin());
562 
563   ValueMapT BBMap;
564 
565   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
566        ++II)
567       copyInstruction(II, BBMap, GlobalMap);
568 }
569 
570 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
571   VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain,
572   Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps),
573   Domain(Domain) {
574     assert(GlobalMaps.size() > 1 && "Only one vector lane found");
575     assert(Domain && "No statement domain provided");
576   }
577 
578 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
579                                             ValueMapT &VectorMap,
580                                             VectorValueMapT &ScalarMaps) {
581   if (VectorMap.count(Old))
582     return VectorMap[Old];
583 
584   int Width = getVectorWidth();
585 
586   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
587 
588   for (int Lane = 0; Lane < Width; Lane++)
589     Vector = Builder.CreateInsertElement(Vector,
590                                          getNewValue(Old,
591                                                      ScalarMaps[Lane],
592                                                      GlobalMaps[Lane]),
593                                          Builder.getInt32(Lane));
594 
595   VectorMap[Old] = Vector;
596 
597   return Vector;
598 }
599 
600 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
601   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
602   assert(PointerTy && "PointerType expected");
603 
604   Type *ScalarType = PointerTy->getElementType();
605   VectorType *VectorType = VectorType::get(ScalarType, Width);
606 
607   return PointerType::getUnqual(VectorType);
608 }
609 
610 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
611                                                    ValueMapT &BBMap) {
612   const Value *Pointer = Load->getPointerOperand();
613   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
614   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
615   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
616                                            "vector_ptr");
617   LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
618                                          Load->getName() + "_p_vec_full");
619   if (!Aligned)
620     VecLoad->setAlignment(8);
621 
622   return VecLoad;
623 }
624 
625 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
626                                                     ValueMapT &BBMap) {
627   const Value *Pointer = Load->getPointerOperand();
628   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
629   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
630   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
631                                            Load->getName() + "_p_vec_p");
632   LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
633                                            Load->getName() + "_p_splat_one");
634 
635   if (!Aligned)
636     ScalarLoad->setAlignment(8);
637 
638   Constant *SplatVector =
639     Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
640                                            getVectorWidth()));
641 
642   Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
643                                                   SplatVector,
644                                                   Load->getName()
645                                                   + "_p_splat");
646   return VectorLoad;
647 }
648 
649 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
650   VectorValueMapT &ScalarMaps) {
651   int VectorWidth = getVectorWidth();
652   const Value *Pointer = Load->getPointerOperand();
653   VectorType *VectorType = VectorType::get(
654     dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
655 
656   Value *Vector = UndefValue::get(VectorType);
657 
658   for (int i = 0; i < VectorWidth; i++) {
659     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
660     Value *ScalarLoad = Builder.CreateLoad(NewPointer,
661                                            Load->getName() + "_p_scalar_");
662     Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
663                                          Builder.getInt32(i),
664                                          Load->getName() + "_p_vec_");
665   }
666 
667   return Vector;
668 }
669 
670 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
671                                         ValueMapT &VectorMap,
672                                         VectorValueMapT &ScalarMaps) {
673   if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) {
674     for (int i = 0; i < getVectorWidth(); i++)
675       ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
676                                                GlobalMaps[i]);
677     return;
678   }
679 
680   MemoryAccess &Access = Statement.getAccessFor(Load);
681 
682   Value *NewLoad;
683   if (Access.isStrideZero(isl_set_copy(Domain)))
684     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
685   else if (Access.isStrideOne(isl_set_copy(Domain)))
686     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
687   else
688     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
689 
690   VectorMap[Load] = NewLoad;
691 }
692 
693 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
694                                          ValueMapT &VectorMap,
695                                          VectorValueMapT &ScalarMaps) {
696   int VectorWidth = getVectorWidth();
697   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
698                                      ScalarMaps);
699 
700   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
701 
702   const CastInst *Cast = dyn_cast<CastInst>(Inst);
703   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
704   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
705 }
706 
707 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
708                                           ValueMapT &VectorMap,
709                                           VectorValueMapT &ScalarMaps) {
710   Value *OpZero = Inst->getOperand(0);
711   Value *OpOne = Inst->getOperand(1);
712 
713   Value *NewOpZero, *NewOpOne;
714   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
715   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
716 
717   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero,
718                                        NewOpOne,
719                                        Inst->getName() + "p_vec");
720   VectorMap[Inst] = NewInst;
721 }
722 
723 void VectorBlockGenerator::copyStore(const StoreInst *Store,
724                                      ValueMapT &VectorMap,
725                                      VectorValueMapT &ScalarMaps) {
726   int VectorWidth = getVectorWidth();
727 
728   MemoryAccess &Access = Statement.getAccessFor(Store);
729 
730   const Value *Pointer = Store->getPointerOperand();
731   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
732                                    ScalarMaps);
733 
734   if (Access.isStrideOne(isl_set_copy(Domain))) {
735     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
736     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
737 
738     Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
739                                              "vector_ptr");
740     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
741 
742     if (!Aligned)
743       Store->setAlignment(8);
744   } else {
745     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
746       Value *Scalar = Builder.CreateExtractElement(Vector,
747                                                    Builder.getInt32(i));
748       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
749       Builder.CreateStore(Scalar, NewPointer);
750     }
751   }
752 }
753 
754 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
755                                              ValueMapT &VectorMap) {
756   for (Instruction::const_op_iterator OI = Inst->op_begin(),
757        OE = Inst->op_end(); OI != OE; ++OI)
758     if (VectorMap.count(*OI))
759       return true;
760   return false;
761 }
762 
763 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
764                                                ValueMapT &VectorMap,
765                                                VectorValueMapT &ScalarMaps) {
766   bool HasVectorOperand = false;
767   int VectorWidth = getVectorWidth();
768 
769   for (Instruction::const_op_iterator OI = Inst->op_begin(),
770        OE = Inst->op_end(); OI != OE; ++OI) {
771     ValueMapT::iterator VecOp = VectorMap.find(*OI);
772 
773     if (VecOp == VectorMap.end())
774       continue;
775 
776     HasVectorOperand = true;
777     Value *NewVector = VecOp->second;
778 
779     for (int i = 0; i < VectorWidth; ++i) {
780       ValueMapT &SM = ScalarMaps[i];
781 
782       // If there is one scalar extracted, all scalar elements should have
783       // already been extracted by the code here. So no need to check for the
784       // existance of all of them.
785       if (SM.count(*OI))
786         break;
787 
788       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
789     }
790   }
791 
792   return HasVectorOperand;
793 }
794 
795 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
796                                               ValueMapT &VectorMap,
797                                               VectorValueMapT &ScalarMaps) {
798   bool HasVectorOperand;
799   int VectorWidth = getVectorWidth();
800 
801   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
802 
803   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
804     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
805 
806   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
807     return;
808 
809   // Make the result available as vector value.
810   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
811   Value *Vector = UndefValue::get(VectorType);
812 
813   for (int i = 0; i < VectorWidth; i++)
814     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
815                                          Builder.getInt32(i));
816 
817   VectorMap[Inst] = Vector;
818 }
819 
820 int VectorBlockGenerator::getVectorWidth() {
821   return GlobalMaps.size();
822 }
823 
824 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
825                                            ValueMapT &VectorMap,
826                                            VectorValueMapT &ScalarMaps) {
827   // Terminator instructions control the control flow. They are explicitly
828   // expressed in the clast and do not need to be copied.
829   if (Inst->isTerminator())
830     return;
831 
832   if (isSCEVIgnore(Inst))
833     return;
834 
835   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
836     generateLoad(Load, VectorMap, ScalarMaps);
837     return;
838   }
839 
840   if (hasVectorOperands(Inst, VectorMap)) {
841     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
842       copyStore(Store, VectorMap, ScalarMaps);
843       return;
844     }
845 
846     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
847       copyUnaryInst(Unary, VectorMap, ScalarMaps);
848       return;
849     }
850 
851     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
852       copyBinaryInst(Binary, VectorMap, ScalarMaps);
853       return;
854     }
855 
856     // Falltrough: We generate scalar instructions, if we don't know how to
857     // generate vector code.
858   }
859 
860   copyInstScalarized(Inst, VectorMap, ScalarMaps);
861 }
862 
863 void VectorBlockGenerator::copyBB() {
864   BasicBlock *BB = Statement.getBasicBlock();
865   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
866                                   Builder.GetInsertPoint(), P);
867   CopyBB->setName("polly.stmt." + BB->getName());
868   Builder.SetInsertPoint(CopyBB->begin());
869 
870   // Create two maps that store the mapping from the original instructions of
871   // the old basic block to their copies in the new basic block. Those maps
872   // are basic block local.
873   //
874   // As vector code generation is supported there is one map for scalar values
875   // and one for vector values.
876   //
877   // In case we just do scalar code generation, the vectorMap is not used and
878   // the scalarMap has just one dimension, which contains the mapping.
879   //
880   // In case vector code generation is done, an instruction may either appear
881   // in the vector map once (as it is calculating >vectorwidth< values at a
882   // time. Or (if the values are calculated using scalar operations), it
883   // appears once in every dimension of the scalarMap.
884   VectorValueMapT ScalarBlockMap(getVectorWidth());
885   ValueMapT VectorBlockMap;
886 
887   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
888        II != IE; ++II)
889       copyInstruction(II, VectorBlockMap, ScalarBlockMap);
890 }
891