1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/BlockGenerators.h"
19 #include "polly/Support/GICHelper.h"
20 
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/ScalarEvolution.h"
23 #include "llvm/Analysis/ScalarEvolutionExpander.h"
24 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25 #include "llvm/Support/CommandLine.h"
26 
27 #include "isl/aff.h"
28 #include "isl/set.h"
29 
30 using namespace llvm;
31 using namespace polly;
32 
33 static cl::opt<bool>
34 Aligned("enable-polly-aligned",
35        cl::desc("Assumed aligned memory accesses."), cl::Hidden,
36        cl::value_desc("OpenMP code generation enabled if true"),
37        cl::init(false), cl::ZeroOrMore);
38 
39 static cl::opt<bool>
40 SCEVCodegen("polly-codegen-scev",
41             cl::desc("Use SCEV based code generation."), cl::Hidden,
42             cl::init(false), cl::ZeroOrMore);
43 
44 /// The SCEVRewriter takes a scalar evolution expression and updates the
45 /// following components:
46 ///
47 /// - SCEVUnknown
48 ///
49 ///   Values referenced in SCEVUnknown subexpressions are looked up in
50 ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
51 ///   replaced by a reference to the value they map to.
52 ///
53 /// - SCEVAddRecExpr
54 ///
55 ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
56 ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57 ///   AddRecExpr's with more than two operands can not be translated.
58 ///
59 ///   FIXME: The comment above is not yet reality. At the moment we derive
60 ///   %Value by looking up the canonical IV of the loop and by defining
61 ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62 ///   canonical induction variables.
63 ///
64 ///
65 /// How can this be used?
66 /// ====================
67 ///
68 /// SCEVRewrite based code generation works on virtually independent blocks.
69 /// This means we do not run the independent blocks pass to rewrite scalar
70 /// instructions, but just ignore instructions that we can analyze with scalar
71 /// evolution. Virtually independent blocks are blocks that only reference the
72 /// following values:
73 ///
74 /// o Values calculated within a basic block
75 /// o Values representable by SCEV
76 ///
77 /// During code generation we can ignore all instructions:
78 ///
79 /// - Ignore all instructions except:
80 ///   - Load instructions
81 ///   - Instructions that reference operands already calculated within the
82 ///     basic block.
83 ///   - Store instructions
84 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85 public:
86   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
87                              ValueMapT &GlobalMap,  ValueMapT &BBMap) {
88     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89     return Rewriter.visit(scev);
90   }
91 
92   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94                BBMap(BBMap) {}
95 
96   const SCEV *visit(const SCEV *Expr) {
97     // FIXME: The parameter handling is incorrect.
98     //
99     // Polly does only detect parameters in Access function and loop iteration
100     // counters, but it does not get parameters that are just used by
101     // instructions within the basic block.
102     //
103     // There are two options to solve this:
104     //  o Iterate over all instructions of the SCoP and find the actual
105     //    parameters.
106     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
107     //    and detect parameters on the fly.
108     //
109     // This is especially important for OpenMP and GPGPU code generation, as
110     // they require us to detect and possibly rewrite the corresponding
111     // parameters.
112     if (isl_id *Id = S.getIdForParam(Expr)) {
113       isl_id_free(Id);
114       return Expr;
115     }
116 
117 
118     return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119   }
120 
121   const SCEV *visitConstant(const SCEVConstant *Constant) {
122     return Constant;
123   }
124 
125   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126     const SCEV *Operand = visit(Expr->getOperand());
127     return SE.getTruncateExpr(Operand, Expr->getType());
128   }
129 
130   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131     const SCEV *Operand = visit(Expr->getOperand());
132     return SE.getZeroExtendExpr(Operand, Expr->getType());
133   }
134 
135   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136     const SCEV *Operand = visit(Expr->getOperand());
137     return SE.getSignExtendExpr(Operand, Expr->getType());
138   }
139 
140   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141     SmallVector<const SCEV *, 2> Operands;
142     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143       const SCEV *Operand = visit(Expr->getOperand(i));
144       Operands.push_back(Operand);
145     }
146 
147     return SE.getAddExpr(Operands);
148   }
149 
150   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151     SmallVector<const SCEV *, 2> Operands;
152     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153       const SCEV *Operand = visit(Expr->getOperand(i));
154       Operands.push_back(Operand);
155     }
156 
157     return SE.getMulExpr(Operands);
158   }
159 
160   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162   }
163 
164   // Return a new induction variable if the loop is within the original SCoP
165   // or NULL otherwise.
166   Value *getNewIV(const Loop *L) {
167     Value *IV = L->getCanonicalInductionVariable();
168     if (!IV)
169       return NULL;
170 
171     ValueMapT::iterator NewIV = GlobalMap.find(IV);
172 
173     if (NewIV == GlobalMap.end())
174       return NULL;
175 
176     return NewIV->second;
177   }
178 
179   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180     Value *IV;
181 
182     IV = getNewIV(Expr->getLoop());
183 
184     // The IV is not within the GlobalMaps. So do not rewrite it and also do
185     // not rewrite any descendants.
186     if (!IV)
187       return Expr;
188 
189     assert(Expr->getNumOperands() == 2
190           && "An AddRecExpr with more than two operands can not be rewritten.");
191 
192     const SCEV *Base, *Step, *IVExpr, *Product;
193 
194     Base = visit(Expr->getStart());
195     Step = visit(Expr->getOperand(1));
196     IVExpr = SE.getUnknown(IV);
197     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198     Product = SE.getMulExpr(Step, IVExpr);
199 
200     return SE.getAddExpr(Base, Product);
201   }
202 
203   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204     SmallVector<const SCEV *, 2> Operands;
205     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206       const SCEV *Operand = visit(Expr->getOperand(i));
207       Operands.push_back(Operand);
208     }
209 
210     return SE.getSMaxExpr(Operands);
211   }
212 
213   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214     SmallVector<const SCEV *, 2> Operands;
215     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216       const SCEV *Operand = visit(Expr->getOperand(i));
217       Operands.push_back(Operand);
218     }
219 
220     return SE.getUMaxExpr(Operands);
221   }
222 
223   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224     Value *V = Expr->getValue();
225 
226     if (GlobalMap.count(V))
227       return SE.getUnknown(GlobalMap[V]);
228 
229     if (BBMap.count(V))
230       return SE.getUnknown(BBMap[V]);
231 
232     return Expr;
233   }
234 
235 private:
236   Scop &S;
237   ScalarEvolution &SE;
238   ValueMapT &GlobalMap;
239   ValueMapT &BBMap;
240 };
241 
242 // Helper class to generate memory location.
243 namespace {
244 class IslGenerator {
245 public:
246   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
247     Builder(Builder), IVS(IVS) {}
248   Value *generateIslInt(__isl_take isl_int Int);
249   Value *generateIslAff(__isl_take isl_aff *Aff);
250   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
251 
252 private:
253   typedef struct {
254     Value *Result;
255     class IslGenerator *Generator;
256   } IslGenInfo;
257 
258   IRBuilder<> &Builder;
259   std::vector<Value *> &IVS;
260   static int mergeIslAffValues(__isl_take isl_set *Set,
261                                __isl_take isl_aff *Aff, void *User);
262 };
263 }
264 
265 
266 Value *IslGenerator::generateIslInt(isl_int Int) {
267   mpz_t IntMPZ;
268   mpz_init(IntMPZ);
269   isl_int_get_gmp(Int, IntMPZ);
270   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
271   mpz_clear(IntMPZ);
272   return IntValue;
273 }
274 
275 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
276   Value *Result;
277   Value *ConstValue;
278   isl_int ConstIsl;
279 
280   isl_int_init(ConstIsl);
281   isl_aff_get_constant(Aff, &ConstIsl);
282   ConstValue = generateIslInt(ConstIsl);
283   Type *Ty = Builder.getInt64Ty();
284 
285   // FIXME: We should give the constant and coefficients the right type. Here
286   // we force it into i64.
287   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
288 
289   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
290 
291   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
292          "must match the dimension of the affine space.");
293 
294   isl_int CoefficientIsl;
295   isl_int_init(CoefficientIsl);
296 
297   for (unsigned int i = 0; i < NbInputDims; ++i) {
298     Value *CoefficientValue;
299     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
300 
301     if (isl_int_is_zero(CoefficientIsl))
302       continue;
303 
304     CoefficientValue = generateIslInt(CoefficientIsl);
305     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
306     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
307     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
308     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
309   }
310 
311   isl_int_clear(CoefficientIsl);
312   isl_int_clear(ConstIsl);
313   isl_aff_free(Aff);
314 
315   return Result;
316 }
317 
318 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
319                                     __isl_take isl_aff *Aff, void *User) {
320   IslGenInfo *GenInfo = (IslGenInfo *)User;
321 
322   assert((GenInfo->Result == NULL) && "Result is already set."
323          "Currently only single isl_aff is supported");
324   assert(isl_set_plain_is_universe(Set)
325          && "Code generation failed because the set is not universe");
326 
327   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
328 
329   isl_set_free(Set);
330   return 0;
331 }
332 
333 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
334   IslGenInfo User;
335   User.Result = NULL;
336   User.Generator = this;
337   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
338   assert(User.Result && "Code generation for isl_pw_aff failed");
339 
340   isl_pw_aff_free(PwAff);
341   return User.Result;
342 }
343 
344 
345 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
346   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
347 
348 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
349   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
350     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
351       if (!isa<SCEVCouldNotCompute>(Scev)) {
352         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
353           if (Unknown->getValue() != Inst)
354             return true;
355         } else {
356           return true;
357         }
358       }
359 
360   return false;
361 }
362 
363 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
364                                    ValueMapT &GlobalMap) {
365   // We assume constants never change.
366   // This avoids map lookups for many calls to this function.
367   if (isa<Constant>(Old))
368     return const_cast<Value*>(Old);
369 
370   if (GlobalMap.count(Old)) {
371     Value *New = GlobalMap[Old];
372 
373     if (Old->getType()->getScalarSizeInBits()
374         < New->getType()->getScalarSizeInBits())
375       New = Builder.CreateTruncOrBitCast(New, Old->getType());
376 
377     return New;
378   }
379 
380   if (BBMap.count(Old)) {
381     return BBMap[Old];
382   }
383 
384   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
385     if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
386       if (!isa<SCEVCouldNotCompute>(Scev)) {
387         const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
388                                                     *Statement.getParent(), SE,
389                                                     GlobalMap, BBMap);
390         SCEVExpander Expander(SE, "polly");
391         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
392                                                  Builder.GetInsertPoint());
393 
394         BBMap[Old] = Expanded;
395         return Expanded;
396       }
397 
398   // 'Old' is within the original SCoP, but was not rewritten.
399   //
400   // Such values appear, if they only calculate information already available in
401   // the polyhedral description (e.g.  an induction variable increment). They
402   // can be safely ignored.
403   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
404     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
405       return NULL;
406 
407   // Everything else is probably a scop-constant value defined as global,
408   // function parameter or an instruction not within the scop.
409   return const_cast<Value*>(Old);
410 }
411 
412 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
413                                     ValueMapT &GlobalMap) {
414   Instruction *NewInst = Inst->clone();
415 
416   // Replace old operands with the new ones.
417   for (Instruction::const_op_iterator OI = Inst->op_begin(),
418        OE = Inst->op_end(); OI != OE; ++OI) {
419     Value *OldOperand = *OI;
420     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
421 
422     if (!NewOperand) {
423       assert(!isa<StoreInst>(NewInst)
424              && "Store instructions are always needed!");
425       delete NewInst;
426       return;
427     }
428 
429     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
430   }
431 
432   Builder.Insert(NewInst);
433   BBMap[Inst] = NewInst;
434 
435   if (!NewInst->getType()->isVoidTy())
436     NewInst->setName("p_" + Inst->getName());
437 }
438 
439 std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
440   __isl_keep isl_map *AccessRelation, Value *BaseAddress,
441   ValueMapT &BBMap, ValueMapT &GlobalMap) {
442 
443   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
444          && "Only single dimensional access functions supported");
445 
446   std::vector<Value *> IVS;
447   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
448     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
449     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
450     IVS.push_back(NewIV);
451   }
452 
453   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
454   IslGenerator IslGen(Builder, IVS);
455   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
456 
457   Type *Ty = Builder.getInt64Ty();
458   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
459 
460   std::vector<Value*> IndexArray;
461   Value *NullValue = Constant::getNullValue(Ty);
462   IndexArray.push_back(NullValue);
463   IndexArray.push_back(OffsetValue);
464   return IndexArray;
465 }
466 
467 Value *BlockGenerator::getNewAccessOperand(
468   __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
469   ValueMapT &BBMap, ValueMapT &GlobalMap) {
470   std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
471                                                         BaseAddress,
472                                                         BBMap, GlobalMap);
473   Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
474                                         "p_newarrayidx_");
475   return NewOperand;
476 }
477 
478 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
479                                                 const Value *Pointer,
480                                                 ValueMapT &BBMap,
481                                                 ValueMapT &GlobalMap) {
482   MemoryAccess &Access = Statement.getAccessFor(Inst);
483   isl_map *CurrentAccessRelation = Access.getAccessRelation();
484   isl_map *NewAccessRelation = Access.getNewAccessRelation();
485 
486   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
487          && "Current and new access function use different spaces");
488 
489   Value *NewPointer;
490 
491   if (!NewAccessRelation) {
492     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
493   } else {
494     Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
495     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
496                                      BBMap, GlobalMap);
497   }
498 
499   isl_map_free(CurrentAccessRelation);
500   isl_map_free(NewAccessRelation);
501   return NewPointer;
502 }
503 
504 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
505                                           ValueMapT &BBMap,
506                                           ValueMapT &GlobalMap) {
507   const Value *Pointer = Load->getPointerOperand();
508   const Instruction *Inst = dyn_cast<Instruction>(Load);
509   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
510   Value *ScalarLoad = Builder.CreateLoad(NewPointer,
511                                          Load->getName() + "_p_scalar_");
512   return ScalarLoad;
513 }
514 
515 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
516                                            ValueMapT &BBMap,
517                                            ValueMapT &GlobalMap) {
518   const Value *Pointer = Store->getPointerOperand();
519   Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
520                                                GlobalMap);
521   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
522 
523   return Builder.CreateStore(ValueOperand, NewPointer);
524 }
525 
526 void BlockGenerator::copyInstruction(const Instruction *Inst,
527                                      ValueMapT &BBMap, ValueMapT &GlobalMap) {
528   // Terminator instructions control the control flow. They are explicitly
529   // expressed in the clast and do not need to be copied.
530   if (Inst->isTerminator())
531     return;
532 
533   if (isSCEVIgnore(Inst))
534     return;
535 
536   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
537     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
538     return;
539   }
540 
541   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
542     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
543     return;
544   }
545 
546   copyInstScalar(Inst, BBMap, GlobalMap);
547 }
548 
549 
550 void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
551   BasicBlock *BB = Statement.getBasicBlock();
552   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
553                                   Builder.GetInsertPoint(), P);
554   CopyBB->setName("polly.stmt." + BB->getName());
555   Builder.SetInsertPoint(CopyBB->begin());
556 
557   ValueMapT BBMap;
558 
559   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
560        ++II)
561       copyInstruction(II, BBMap, GlobalMap);
562 }
563 
564 VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
565   VectorValueMapT &GlobalMaps, ScopStmt &Stmt, __isl_keep isl_set *Domain,
566   Pass *P) : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps),
567   Domain(Domain) {
568     assert(GlobalMaps.size() > 1 && "Only one vector lane found");
569     assert(Domain && "No statement domain provided");
570   }
571 
572 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
573                                             ValueMapT &VectorMap,
574                                             VectorValueMapT &ScalarMaps) {
575   if (VectorMap.count(Old))
576     return VectorMap[Old];
577 
578   int Width = getVectorWidth();
579 
580   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
581 
582   for (int Lane = 0; Lane < Width; Lane++)
583     Vector = Builder.CreateInsertElement(Vector,
584                                          getNewValue(Old,
585                                                      ScalarMaps[Lane],
586                                                      GlobalMaps[Lane]),
587                                          Builder.getInt32(Lane));
588 
589   VectorMap[Old] = Vector;
590 
591   return Vector;
592 }
593 
594 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
595   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
596   assert(PointerTy && "PointerType expected");
597 
598   Type *ScalarType = PointerTy->getElementType();
599   VectorType *VectorType = VectorType::get(ScalarType, Width);
600 
601   return PointerType::getUnqual(VectorType);
602 }
603 
604 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
605                                                    ValueMapT &BBMap) {
606   const Value *Pointer = Load->getPointerOperand();
607   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
608   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
609   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
610                                            "vector_ptr");
611   LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
612                                          Load->getName() + "_p_vec_full");
613   if (!Aligned)
614     VecLoad->setAlignment(8);
615 
616   return VecLoad;
617 }
618 
619 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
620                                                     ValueMapT &BBMap) {
621   const Value *Pointer = Load->getPointerOperand();
622   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
623   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
624   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
625                                            Load->getName() + "_p_vec_p");
626   LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
627                                            Load->getName() + "_p_splat_one");
628 
629   if (!Aligned)
630     ScalarLoad->setAlignment(8);
631 
632   Constant *SplatVector =
633     Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
634                                            getVectorWidth()));
635 
636   Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
637                                                   SplatVector,
638                                                   Load->getName()
639                                                   + "_p_splat");
640   return VectorLoad;
641 }
642 
643 Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
644   VectorValueMapT &ScalarMaps) {
645   int VectorWidth = getVectorWidth();
646   const Value *Pointer = Load->getPointerOperand();
647   VectorType *VectorType = VectorType::get(
648     dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
649 
650   Value *Vector = UndefValue::get(VectorType);
651 
652   for (int i = 0; i < VectorWidth; i++) {
653     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
654     Value *ScalarLoad = Builder.CreateLoad(NewPointer,
655                                            Load->getName() + "_p_scalar_");
656     Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
657                                          Builder.getInt32(i),
658                                          Load->getName() + "_p_vec_");
659   }
660 
661   return Vector;
662 }
663 
664 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
665                                         ValueMapT &VectorMap,
666                                         VectorValueMapT &ScalarMaps) {
667   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
668       !VectorType::isValidElementType(Load->getType())) {
669     for (int i = 0; i < getVectorWidth(); i++)
670       ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
671                                                GlobalMaps[i]);
672     return;
673   }
674 
675   MemoryAccess &Access = Statement.getAccessFor(Load);
676 
677   Value *NewLoad;
678   if (Access.isStrideZero(isl_set_copy(Domain)))
679     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
680   else if (Access.isStrideOne(isl_set_copy(Domain)))
681     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
682   else
683     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
684 
685   VectorMap[Load] = NewLoad;
686 }
687 
688 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
689                                          ValueMapT &VectorMap,
690                                          VectorValueMapT &ScalarMaps) {
691   int VectorWidth = getVectorWidth();
692   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
693                                      ScalarMaps);
694 
695   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
696 
697   const CastInst *Cast = dyn_cast<CastInst>(Inst);
698   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
699   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
700 }
701 
702 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
703                                           ValueMapT &VectorMap,
704                                           VectorValueMapT &ScalarMaps) {
705   Value *OpZero = Inst->getOperand(0);
706   Value *OpOne = Inst->getOperand(1);
707 
708   Value *NewOpZero, *NewOpOne;
709   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
710   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
711 
712   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero,
713                                        NewOpOne,
714                                        Inst->getName() + "p_vec");
715   VectorMap[Inst] = NewInst;
716 }
717 
718 void VectorBlockGenerator::copyStore(const StoreInst *Store,
719                                      ValueMapT &VectorMap,
720                                      VectorValueMapT &ScalarMaps) {
721   int VectorWidth = getVectorWidth();
722 
723   MemoryAccess &Access = Statement.getAccessFor(Store);
724 
725   const Value *Pointer = Store->getPointerOperand();
726   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
727                                    ScalarMaps);
728 
729   if (Access.isStrideOne(isl_set_copy(Domain))) {
730     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
731     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
732 
733     Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
734                                              "vector_ptr");
735     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
736 
737     if (!Aligned)
738       Store->setAlignment(8);
739   } else {
740     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
741       Value *Scalar = Builder.CreateExtractElement(Vector,
742                                                    Builder.getInt32(i));
743       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
744       Builder.CreateStore(Scalar, NewPointer);
745     }
746   }
747 }
748 
749 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
750                                              ValueMapT &VectorMap) {
751   for (Instruction::const_op_iterator OI = Inst->op_begin(),
752        OE = Inst->op_end(); OI != OE; ++OI)
753     if (VectorMap.count(*OI))
754       return true;
755   return false;
756 }
757 
758 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
759                                                ValueMapT &VectorMap,
760                                                VectorValueMapT &ScalarMaps) {
761   bool HasVectorOperand = false;
762   int VectorWidth = getVectorWidth();
763 
764   for (Instruction::const_op_iterator OI = Inst->op_begin(),
765        OE = Inst->op_end(); OI != OE; ++OI) {
766     ValueMapT::iterator VecOp = VectorMap.find(*OI);
767 
768     if (VecOp == VectorMap.end())
769       continue;
770 
771     HasVectorOperand = true;
772     Value *NewVector = VecOp->second;
773 
774     for (int i = 0; i < VectorWidth; ++i) {
775       ValueMapT &SM = ScalarMaps[i];
776 
777       // If there is one scalar extracted, all scalar elements should have
778       // already been extracted by the code here. So no need to check for the
779       // existance of all of them.
780       if (SM.count(*OI))
781         break;
782 
783       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
784     }
785   }
786 
787   return HasVectorOperand;
788 }
789 
790 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
791                                               ValueMapT &VectorMap,
792                                               VectorValueMapT &ScalarMaps) {
793   bool HasVectorOperand;
794   int VectorWidth = getVectorWidth();
795 
796   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
797 
798   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
799     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
800 
801   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
802     return;
803 
804   // Make the result available as vector value.
805   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
806   Value *Vector = UndefValue::get(VectorType);
807 
808   for (int i = 0; i < VectorWidth; i++)
809     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
810                                          Builder.getInt32(i));
811 
812   VectorMap[Inst] = Vector;
813 }
814 
815 int VectorBlockGenerator::getVectorWidth() {
816   return GlobalMaps.size();
817 }
818 
819 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
820                                            ValueMapT &VectorMap,
821                                            VectorValueMapT &ScalarMaps) {
822   // Terminator instructions control the control flow. They are explicitly
823   // expressed in the clast and do not need to be copied.
824   if (Inst->isTerminator())
825     return;
826 
827   if (isSCEVIgnore(Inst))
828     return;
829 
830   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
831     generateLoad(Load, VectorMap, ScalarMaps);
832     return;
833   }
834 
835   if (hasVectorOperands(Inst, VectorMap)) {
836     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
837       copyStore(Store, VectorMap, ScalarMaps);
838       return;
839     }
840 
841     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
842       copyUnaryInst(Unary, VectorMap, ScalarMaps);
843       return;
844     }
845 
846     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
847       copyBinaryInst(Binary, VectorMap, ScalarMaps);
848       return;
849     }
850 
851     // Falltrough: We generate scalar instructions, if we don't know how to
852     // generate vector code.
853   }
854 
855   copyInstScalarized(Inst, VectorMap, ScalarMaps);
856 }
857 
858 void VectorBlockGenerator::copyBB() {
859   BasicBlock *BB = Statement.getBasicBlock();
860   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
861                                   Builder.GetInsertPoint(), P);
862   CopyBB->setName("polly.stmt." + BB->getName());
863   Builder.SetInsertPoint(CopyBB->begin());
864 
865   // Create two maps that store the mapping from the original instructions of
866   // the old basic block to their copies in the new basic block. Those maps
867   // are basic block local.
868   //
869   // As vector code generation is supported there is one map for scalar values
870   // and one for vector values.
871   //
872   // In case we just do scalar code generation, the vectorMap is not used and
873   // the scalarMap has just one dimension, which contains the mapping.
874   //
875   // In case vector code generation is done, an instruction may either appear
876   // in the vector map once (as it is calculating >vectorwidth< values at a
877   // time. Or (if the values are calculated using scalar operations), it
878   // appears once in every dimension of the scalarMap.
879   VectorValueMapT ScalarBlockMap(getVectorWidth());
880   ValueMapT VectorBlockMap;
881 
882   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
883        II != IE; ++II)
884       copyInstruction(II, VectorBlockMap, ScalarBlockMap);
885 }
886