1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/BlockGenerators.h"
19 #include "polly/Support/GICHelper.h"
20 
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/ScalarEvolution.h"
23 #include "llvm/Analysis/ScalarEvolutionExpander.h"
24 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25 #include "llvm/Support/CommandLine.h"
26 
27 #include "isl/aff.h"
28 #include "isl/set.h"
29 
30 using namespace llvm;
31 using namespace polly;
32 
33 static cl::opt<bool>
34 Aligned("enable-polly-aligned",
35        cl::desc("Assumed aligned memory accesses."), cl::Hidden,
36        cl::value_desc("OpenMP code generation enabled if true"),
37        cl::init(false), cl::ZeroOrMore);
38 
39 static cl::opt<bool>
40 SCEVCodegen("polly-codegen-scev",
41             cl::desc("Use SCEV based code generation."), cl::Hidden,
42             cl::init(false), cl::ZeroOrMore);
43 
44 /// The SCEVRewriter takes a scalar evolution expression and updates the
45 /// following components:
46 ///
47 /// - SCEVUnknown
48 ///
49 ///   Values referenced in SCEVUnknown subexpressions are looked up in
50 ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
51 ///   replaced by a reference to the value they map to.
52 ///
53 /// - SCEVAddRecExpr
54 ///
55 ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
56 ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57 ///   AddRecExpr's with more than two operands can not be translated.
58 ///
59 ///   FIXME: The comment above is not yet reality. At the moment we derive
60 ///   %Value by looking up the canonical IV of the loop and by defining
61 ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62 ///   canonical induction variables.
63 ///
64 ///
65 /// How can this be used?
66 /// ====================
67 ///
68 /// SCEVRewrite based code generation works on virtually independent blocks.
69 /// This means we do not run the independent blocks pass to rewrite scalar
70 /// instructions, but just ignore instructions that we can analyze with scalar
71 /// evolution. Virtually independent blocks are blocks that only reference the
72 /// following values:
73 ///
74 /// o Values calculated within a basic block
75 /// o Values representable by SCEV
76 ///
77 /// During code generation we can ignore all instructions:
78 ///
79 /// - Ignore all instructions except:
80 ///   - Load instructions
81 ///   - Instructions that reference operands already calculated within the
82 ///     basic block.
83 ///   - Store instructions
84 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85 public:
86   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
87                              ValueMapT &GlobalMap, ValueMapT &BBMap) {
88     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89     return Rewriter.visit(scev);
90   }
91 
92   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94                BBMap(BBMap) {}
95 
96   const SCEV *visit(const SCEV *Expr) {
97     // FIXME: The parameter handling is incorrect.
98     //
99     // Polly does only detect parameters in Access function and loop iteration
100     // counters, but it does not get parameters that are just used by
101     // instructions within the basic block.
102     //
103     // There are two options to solve this:
104     //  o Iterate over all instructions of the SCoP and find the actual
105     //    parameters.
106     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
107     //    and detect parameters on the fly.
108     //
109     // This is especially important for OpenMP and GPGPU code generation, as
110     // they require us to detect and possibly rewrite the corresponding
111     // parameters.
112     if (isl_id *Id = S.getIdForParam(Expr)) {
113       isl_id_free(Id);
114       return Expr;
115     }
116 
117 
118     return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119   }
120 
121   const SCEV *visitConstant(const SCEVConstant *Constant) {
122     return Constant;
123   }
124 
125   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126     const SCEV *Operand = visit(Expr->getOperand());
127     return SE.getTruncateExpr(Operand, Expr->getType());
128   }
129 
130   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131     const SCEV *Operand = visit(Expr->getOperand());
132     return SE.getZeroExtendExpr(Operand, Expr->getType());
133   }
134 
135   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136     const SCEV *Operand = visit(Expr->getOperand());
137     return SE.getSignExtendExpr(Operand, Expr->getType());
138   }
139 
140   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141     SmallVector<const SCEV *, 2> Operands;
142     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143       const SCEV *Operand = visit(Expr->getOperand(i));
144       Operands.push_back(Operand);
145     }
146 
147     return SE.getAddExpr(Operands);
148   }
149 
150   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151     SmallVector<const SCEV *, 2> Operands;
152     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153       const SCEV *Operand = visit(Expr->getOperand(i));
154       Operands.push_back(Operand);
155     }
156 
157     return SE.getMulExpr(Operands);
158   }
159 
160   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162   }
163 
164   // Return a new induction variable if the loop is within the original SCoP
165   // or NULL otherwise.
166   Value *getNewIV(const Loop *L) {
167     Value *IV = L->getCanonicalInductionVariable();
168     if (!IV)
169       return NULL;
170 
171     ValueMapT::iterator NewIV = GlobalMap.find(IV);
172 
173     if (NewIV == GlobalMap.end())
174       return NULL;
175 
176     return NewIV->second;
177   }
178 
179   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180     Value *IV;
181 
182     IV = getNewIV(Expr->getLoop());
183 
184     // The IV is not within the GlobalMaps. So do not rewrite it and also do
185     // not rewrite any descendants.
186     if (!IV)
187       return Expr;
188 
189     assert(Expr->getNumOperands() == 2 &&
190            "An AddRecExpr with more than two operands can not be rewritten.");
191 
192     const SCEV *Base, *Step, *IVExpr, *Product;
193 
194     Base = visit(Expr->getStart());
195     Step = visit(Expr->getOperand(1));
196     IVExpr = SE.getUnknown(IV);
197     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198     Product = SE.getMulExpr(Step, IVExpr);
199 
200     return SE.getAddExpr(Base, Product);
201   }
202 
203   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204     SmallVector<const SCEV *, 2> Operands;
205     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206       const SCEV *Operand = visit(Expr->getOperand(i));
207       Operands.push_back(Operand);
208     }
209 
210     return SE.getSMaxExpr(Operands);
211   }
212 
213   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214     SmallVector<const SCEV *, 2> Operands;
215     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216       const SCEV *Operand = visit(Expr->getOperand(i));
217       Operands.push_back(Operand);
218     }
219 
220     return SE.getUMaxExpr(Operands);
221   }
222 
223   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224     Value *V = Expr->getValue();
225 
226     if (GlobalMap.count(V))
227       return SE.getUnknown(GlobalMap[V]);
228 
229     if (BBMap.count(V))
230       return SE.getUnknown(BBMap[V]);
231 
232     return Expr;
233   }
234 
235 private:
236   Scop &S;
237   ScalarEvolution &SE;
238   ValueMapT &GlobalMap;
239   ValueMapT &BBMap;
240 };
241 
242 // Helper class to generate memory location.
243 namespace {
244 class IslGenerator {
245 public:
246   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
247     Builder(Builder), IVS(IVS) {}
248   Value *generateIslInt(__isl_take isl_int Int);
249   Value *generateIslAff(__isl_take isl_aff *Aff);
250   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
251 
252 private:
253   typedef struct {
254     Value *Result;
255     class IslGenerator *Generator;
256   } IslGenInfo;
257 
258   IRBuilder<> &Builder;
259   std::vector<Value *> &IVS;
260   static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff,
261                                void *User);
262 };
263 }
264 
265 Value *IslGenerator::generateIslInt(isl_int Int) {
266   mpz_t IntMPZ;
267   mpz_init(IntMPZ);
268   isl_int_get_gmp(Int, IntMPZ);
269   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
270   mpz_clear(IntMPZ);
271   return IntValue;
272 }
273 
274 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
275   Value *Result;
276   Value *ConstValue;
277   isl_int ConstIsl;
278 
279   isl_int_init(ConstIsl);
280   isl_aff_get_constant(Aff, &ConstIsl);
281   ConstValue = generateIslInt(ConstIsl);
282   Type *Ty = Builder.getInt64Ty();
283 
284   // FIXME: We should give the constant and coefficients the right type. Here
285   // we force it into i64.
286   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
287 
288   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
289 
290   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
291          "must match the dimension of the affine space.");
292 
293   isl_int CoefficientIsl;
294   isl_int_init(CoefficientIsl);
295 
296   for (unsigned int i = 0; i < NbInputDims; ++i) {
297     Value *CoefficientValue;
298     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
299 
300     if (isl_int_is_zero(CoefficientIsl))
301       continue;
302 
303     CoefficientValue = generateIslInt(CoefficientIsl);
304     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
305     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
306     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
307     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
308   }
309 
310   isl_int_clear(CoefficientIsl);
311   isl_int_clear(ConstIsl);
312   isl_aff_free(Aff);
313 
314   return Result;
315 }
316 
317 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
318                                     __isl_take isl_aff *Aff, void *User) {
319   IslGenInfo *GenInfo = (IslGenInfo *)User;
320 
321   assert((GenInfo->Result == NULL) && "Result is already set."
322          "Currently only single isl_aff is supported");
323   assert(isl_set_plain_is_universe(Set)
324          && "Code generation failed because the set is not universe");
325 
326   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
327 
328   isl_set_free(Set);
329   return 0;
330 }
331 
332 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
333   IslGenInfo User;
334   User.Result = NULL;
335   User.Generator = this;
336   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
337   assert(User.Result && "Code generation for isl_pw_aff failed");
338 
339   isl_pw_aff_free(PwAff);
340   return User.Result;
341 }
342 
343 
344 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
345   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
346 
347 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
348   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
349     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
350       if (!isa<SCEVCouldNotCompute>(Scev)) {
351         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
352           if (Unknown->getValue() != Inst)
353             return true;
354         } else {
355           return true;
356         }
357       }
358 
359   return false;
360 }
361 
362 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
363                                    ValueMapT &GlobalMap) {
364   // We assume constants never change.
365   // This avoids map lookups for many calls to this function.
366   if (isa<Constant>(Old))
367     return const_cast<Value*>(Old);
368 
369   if (GlobalMap.count(Old)) {
370     Value *New = GlobalMap[Old];
371 
372     if (Old->getType()->getScalarSizeInBits()
373         < New->getType()->getScalarSizeInBits())
374       New = Builder.CreateTruncOrBitCast(New, Old->getType());
375 
376     return New;
377   }
378 
379   if (BBMap.count(Old)) {
380     return BBMap[Old];
381   }
382 
383   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
384     if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
385       if (!isa<SCEVCouldNotCompute>(Scev)) {
386         const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
387                                                     *Statement.getParent(), SE,
388                                                     GlobalMap, BBMap);
389         SCEVExpander Expander(SE, "polly");
390         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
391                                                  Builder.GetInsertPoint());
392 
393         BBMap[Old] = Expanded;
394         return Expanded;
395       }
396 
397   // 'Old' is within the original SCoP, but was not rewritten.
398   //
399   // Such values appear, if they only calculate information already available in
400   // the polyhedral description (e.g.  an induction variable increment). They
401   // can be safely ignored.
402   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
403     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
404       return NULL;
405 
406   // Everything else is probably a scop-constant value defined as global,
407   // function parameter or an instruction not within the scop.
408   return const_cast<Value*>(Old);
409 }
410 
411 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
412                                     ValueMapT &GlobalMap) {
413   Instruction *NewInst = Inst->clone();
414 
415   // Replace old operands with the new ones.
416   for (Instruction::const_op_iterator OI = Inst->op_begin(),
417        OE = Inst->op_end(); OI != OE; ++OI) {
418     Value *OldOperand = *OI;
419     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
420 
421     if (!NewOperand) {
422       assert(!isa<StoreInst>(NewInst)
423              && "Store instructions are always needed!");
424       delete NewInst;
425       return;
426     }
427 
428     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
429   }
430 
431   Builder.Insert(NewInst);
432   BBMap[Inst] = NewInst;
433 
434   if (!NewInst->getType()->isVoidTy())
435     NewInst->setName("p_" + Inst->getName());
436 }
437 
438 std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
439   __isl_keep isl_map *AccessRelation, Value *BaseAddress,
440   ValueMapT &BBMap, ValueMapT &GlobalMap) {
441 
442   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
443          "Only single dimensional access functions supported");
444 
445   std::vector<Value *> IVS;
446   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
447     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
448     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
449     IVS.push_back(NewIV);
450   }
451 
452   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
453   IslGenerator IslGen(Builder, IVS);
454   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
455 
456   Type *Ty = Builder.getInt64Ty();
457   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
458 
459   std::vector<Value*> IndexArray;
460   Value *NullValue = Constant::getNullValue(Ty);
461   IndexArray.push_back(NullValue);
462   IndexArray.push_back(OffsetValue);
463   return IndexArray;
464 }
465 
466 Value *BlockGenerator::getNewAccessOperand(
467   __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
468   ValueMapT &BBMap, ValueMapT &GlobalMap) {
469   std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
470                                                         BaseAddress,
471                                                         BBMap, GlobalMap);
472   Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
473                                         "p_newarrayidx_");
474   return NewOperand;
475 }
476 
477 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
478                                                 const Value *Pointer,
479                                                 ValueMapT &BBMap,
480                                                 ValueMapT &GlobalMap) {
481   MemoryAccess &Access = Statement.getAccessFor(Inst);
482   isl_map *CurrentAccessRelation = Access.getAccessRelation();
483   isl_map *NewAccessRelation = Access.getNewAccessRelation();
484 
485   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) &&
486          "Current and new access function use different spaces");
487 
488   Value *NewPointer;
489 
490   if (!NewAccessRelation) {
491     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
492   } else {
493     Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
494     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
495                                      BBMap, GlobalMap);
496   }
497 
498   isl_map_free(CurrentAccessRelation);
499   isl_map_free(NewAccessRelation);
500   return NewPointer;
501 }
502 
503 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
504                                           ValueMapT &BBMap,
505                                           ValueMapT &GlobalMap) {
506   const Value *Pointer = Load->getPointerOperand();
507   const Instruction *Inst = dyn_cast<Instruction>(Load);
508   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
509   Value *ScalarLoad = Builder.CreateLoad(NewPointer,
510                                          Load->getName() + "_p_scalar_");
511   return ScalarLoad;
512 }
513 
514 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
515                                            ValueMapT &BBMap,
516                                            ValueMapT &GlobalMap) {
517   const Value *Pointer = Store->getPointerOperand();
518   Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
519                                                GlobalMap);
520   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
521 
522   return Builder.CreateStore(ValueOperand, NewPointer);
523 }
524 
525 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
526                                      ValueMapT &GlobalMap) {
527   // Terminator instructions control the control flow. They are explicitly
528   // expressed in the clast and do not need to be copied.
529   if (Inst->isTerminator())
530     return;
531 
532   if (isSCEVIgnore(Inst))
533     return;
534 
535   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
536     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
537     return;
538   }
539 
540   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
541     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
542     return;
543   }
544 
545   copyInstScalar(Inst, BBMap, GlobalMap);
546 }
547 
548 void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
549   BasicBlock *BB = Statement.getBasicBlock();
550   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
551                                   Builder.GetInsertPoint(), P);
552   CopyBB->setName("polly.stmt." + BB->getName());
553   Builder.SetInsertPoint(CopyBB->begin());
554 
555   ValueMapT BBMap;
556 
557   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
558        ++II)
559       copyInstruction(II, BBMap, GlobalMap);
560 }
561 
562 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
563                                            VectorValueMapT &GlobalMaps,
564                                            ScopStmt &Stmt,
565                                            __isl_keep isl_map *Schedule,
566                                            Pass *P)
567   : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
568   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
569   assert(Schedule && "No statement domain provided");
570 }
571 
572 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
573                                             ValueMapT &VectorMap,
574                                             VectorValueMapT &ScalarMaps) {
575   if (VectorMap.count(Old))
576     return VectorMap[Old];
577 
578   int Width = getVectorWidth();
579 
580   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
581 
582   for (int Lane = 0; Lane < Width; Lane++)
583     Vector = Builder.CreateInsertElement(Vector,
584                                          getNewValue(Old,
585                                                      ScalarMaps[Lane],
586                                                      GlobalMaps[Lane]),
587                                          Builder.getInt32(Lane));
588 
589   VectorMap[Old] = Vector;
590 
591   return Vector;
592 }
593 
594 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
595   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
596   assert(PointerTy && "PointerType expected");
597 
598   Type *ScalarType = PointerTy->getElementType();
599   VectorType *VectorType = VectorType::get(ScalarType, Width);
600 
601   return PointerType::getUnqual(VectorType);
602 }
603 
604 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
605                                                    ValueMapT &BBMap) {
606   const Value *Pointer = Load->getPointerOperand();
607   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
608   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
609   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
610                                            "vector_ptr");
611   LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
612                                          Load->getName() + "_p_vec_full");
613   if (!Aligned)
614     VecLoad->setAlignment(8);
615 
616   return VecLoad;
617 }
618 
619 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
620                                                     ValueMapT &BBMap) {
621   const Value *Pointer = Load->getPointerOperand();
622   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
623   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
624   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
625                                            Load->getName() + "_p_vec_p");
626   LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
627                                            Load->getName() + "_p_splat_one");
628 
629   if (!Aligned)
630     ScalarLoad->setAlignment(8);
631 
632   Constant *SplatVector =
633     Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
634                                            getVectorWidth()));
635 
636   Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
637                                                   SplatVector,
638                                                   Load->getName()
639                                                   + "_p_splat");
640   return VectorLoad;
641 }
642 
643 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
644   VectorValueMapT &ScalarMaps) {
645   int VectorWidth = getVectorWidth();
646   const Value *Pointer = Load->getPointerOperand();
647   VectorType *VectorType = VectorType::get(
648     dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
649 
650   Value *Vector = UndefValue::get(VectorType);
651 
652   for (int i = 0; i < VectorWidth; i++) {
653     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
654     Value *ScalarLoad = Builder.CreateLoad(NewPointer,
655                                            Load->getName() + "_p_scalar_");
656     Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
657                                          Builder.getInt32(i),
658                                          Load->getName() + "_p_vec_");
659   }
660 
661   return Vector;
662 }
663 
664 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
665                                         ValueMapT &VectorMap,
666                                         VectorValueMapT &ScalarMaps) {
667   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
668       !VectorType::isValidElementType(Load->getType())) {
669     for (int i = 0; i < getVectorWidth(); i++)
670       ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
671                                                GlobalMaps[i]);
672     return;
673   }
674 
675   MemoryAccess &Access = Statement.getAccessFor(Load);
676 
677   Value *NewLoad;
678   if (Access.isStrideZero(isl_map_copy(Schedule)))
679     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
680   else if (Access.isStrideOne(isl_map_copy(Schedule)))
681     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
682   else
683     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
684 
685   VectorMap[Load] = NewLoad;
686 }
687 
688 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
689                                          ValueMapT &VectorMap,
690                                          VectorValueMapT &ScalarMaps) {
691   int VectorWidth = getVectorWidth();
692   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
693                                      ScalarMaps);
694 
695   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
696 
697   const CastInst *Cast = dyn_cast<CastInst>(Inst);
698   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
699   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
700 }
701 
702 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
703                                           ValueMapT &VectorMap,
704                                           VectorValueMapT &ScalarMaps) {
705   Value *OpZero = Inst->getOperand(0);
706   Value *OpOne = Inst->getOperand(1);
707 
708   Value *NewOpZero, *NewOpOne;
709   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
710   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
711 
712   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
713                                        Inst->getName() + "p_vec");
714   VectorMap[Inst] = NewInst;
715 }
716 
717 void VectorBlockGenerator::copyStore(const StoreInst *Store,
718                                      ValueMapT &VectorMap,
719                                      VectorValueMapT &ScalarMaps) {
720   int VectorWidth = getVectorWidth();
721 
722   MemoryAccess &Access = Statement.getAccessFor(Store);
723 
724   const Value *Pointer = Store->getPointerOperand();
725   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
726                                    ScalarMaps);
727 
728   if (Access.isStrideOne(isl_map_copy(Schedule))) {
729     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
730     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
731 
732     Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
733                                              "vector_ptr");
734     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
735 
736     if (!Aligned)
737       Store->setAlignment(8);
738   } else {
739     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
740       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
741       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
742       Builder.CreateStore(Scalar, NewPointer);
743     }
744   }
745 }
746 
747 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
748                                              ValueMapT &VectorMap) {
749   for (Instruction::const_op_iterator OI = Inst->op_begin(),
750        OE = Inst->op_end(); OI != OE; ++OI)
751     if (VectorMap.count(*OI))
752       return true;
753   return false;
754 }
755 
756 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
757                                                ValueMapT &VectorMap,
758                                                VectorValueMapT &ScalarMaps) {
759   bool HasVectorOperand = false;
760   int VectorWidth = getVectorWidth();
761 
762   for (Instruction::const_op_iterator OI = Inst->op_begin(),
763        OE = Inst->op_end(); OI != OE; ++OI) {
764     ValueMapT::iterator VecOp = VectorMap.find(*OI);
765 
766     if (VecOp == VectorMap.end())
767       continue;
768 
769     HasVectorOperand = true;
770     Value *NewVector = VecOp->second;
771 
772     for (int i = 0; i < VectorWidth; ++i) {
773       ValueMapT &SM = ScalarMaps[i];
774 
775       // If there is one scalar extracted, all scalar elements should have
776       // already been extracted by the code here. So no need to check for the
777       // existance of all of them.
778       if (SM.count(*OI))
779         break;
780 
781       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
782     }
783   }
784 
785   return HasVectorOperand;
786 }
787 
788 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
789                                               ValueMapT &VectorMap,
790                                               VectorValueMapT &ScalarMaps) {
791   bool HasVectorOperand;
792   int VectorWidth = getVectorWidth();
793 
794   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
795 
796   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
797     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
798 
799   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
800     return;
801 
802   // Make the result available as vector value.
803   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
804   Value *Vector = UndefValue::get(VectorType);
805 
806   for (int i = 0; i < VectorWidth; i++)
807     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
808                                          Builder.getInt32(i));
809 
810   VectorMap[Inst] = Vector;
811 }
812 
813 int VectorBlockGenerator::getVectorWidth() {
814   return GlobalMaps.size();
815 }
816 
817 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
818                                            ValueMapT &VectorMap,
819                                            VectorValueMapT &ScalarMaps) {
820   // Terminator instructions control the control flow. They are explicitly
821   // expressed in the clast and do not need to be copied.
822   if (Inst->isTerminator())
823     return;
824 
825   if (isSCEVIgnore(Inst))
826     return;
827 
828   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
829     generateLoad(Load, VectorMap, ScalarMaps);
830     return;
831   }
832 
833   if (hasVectorOperands(Inst, VectorMap)) {
834     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
835       copyStore(Store, VectorMap, ScalarMaps);
836       return;
837     }
838 
839     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
840       copyUnaryInst(Unary, VectorMap, ScalarMaps);
841       return;
842     }
843 
844     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
845       copyBinaryInst(Binary, VectorMap, ScalarMaps);
846       return;
847     }
848 
849     // Falltrough: We generate scalar instructions, if we don't know how to
850     // generate vector code.
851   }
852 
853   copyInstScalarized(Inst, VectorMap, ScalarMaps);
854 }
855 
856 void VectorBlockGenerator::copyBB() {
857   BasicBlock *BB = Statement.getBasicBlock();
858   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
859                                   Builder.GetInsertPoint(), P);
860   CopyBB->setName("polly.stmt." + BB->getName());
861   Builder.SetInsertPoint(CopyBB->begin());
862 
863   // Create two maps that store the mapping from the original instructions of
864   // the old basic block to their copies in the new basic block. Those maps
865   // are basic block local.
866   //
867   // As vector code generation is supported there is one map for scalar values
868   // and one for vector values.
869   //
870   // In case we just do scalar code generation, the vectorMap is not used and
871   // the scalarMap has just one dimension, which contains the mapping.
872   //
873   // In case vector code generation is done, an instruction may either appear
874   // in the vector map once (as it is calculating >vectorwidth< values at a
875   // time. Or (if the values are calculated using scalar operations), it
876   // appears once in every dimension of the scalarMap.
877   VectorValueMapT ScalarBlockMap(getVectorWidth());
878   ValueMapT VectorBlockMap;
879 
880   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
881        II != IE; ++II)
882       copyInstruction(II, VectorBlockMap, ScalarBlockMap);
883 }
884