1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/BlockGenerators.h"
19 #include "polly/Support/GICHelper.h"
20 
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/ScalarEvolution.h"
23 #include "llvm/Analysis/ScalarEvolutionExpander.h"
24 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25 #include "llvm/Support/CommandLine.h"
26 
27 #include "isl/aff.h"
28 #include "isl/set.h"
29 
30 using namespace llvm;
31 using namespace polly;
32 
33 static cl::opt<bool>
34 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."),
35         cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
36         cl::init(false), cl::ZeroOrMore);
37 
38 static cl::opt<bool>
39 SCEVCodegen("polly-codegen-scev", cl::desc("Use SCEV based code generation."),
40             cl::Hidden, cl::init(false), cl::ZeroOrMore);
41 
42 /// The SCEVRewriter takes a scalar evolution expression and updates the
43 /// following components:
44 ///
45 /// - SCEVUnknown
46 ///
47 ///   Values referenced in SCEVUnknown subexpressions are looked up in
48 ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
49 ///   replaced by a reference to the value they map to.
50 ///
51 /// - SCEVAddRecExpr
52 ///
53 ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
54 ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
55 ///   AddRecExpr's with more than two operands can not be translated.
56 ///
57 ///   FIXME: The comment above is not yet reality. At the moment we derive
58 ///   %Value by looking up the canonical IV of the loop and by defining
59 ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
60 ///   canonical induction variables.
61 ///
62 ///
63 /// How can this be used?
64 /// ====================
65 ///
66 /// SCEVRewrite based code generation works on virtually independent blocks.
67 /// This means we do not run the independent blocks pass to rewrite scalar
68 /// instructions, but just ignore instructions that we can analyze with scalar
69 /// evolution. Virtually independent blocks are blocks that only reference the
70 /// following values:
71 ///
72 /// o Values calculated within a basic block
73 /// o Values representable by SCEV
74 ///
75 /// During code generation we can ignore all instructions:
76 ///
77 /// - Ignore all instructions except:
78 ///   - Load instructions
79 ///   - Instructions that reference operands already calculated within the
80 ///     basic block.
81 ///   - Store instructions
82 struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV *> {
83 public:
84   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
85                              ValueMapT &GlobalMap, ValueMapT &BBMap) {
86     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
87     return Rewriter.visit(scev);
88   }
89 
90   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
91                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
92                BBMap(BBMap) {}
93 
94   const SCEV *visit(const SCEV *Expr) {
95     // FIXME: The parameter handling is incorrect.
96     //
97     // Polly does only detect parameters in Access function and loop iteration
98     // counters, but it does not get parameters that are just used by
99     // instructions within the basic block.
100     //
101     // There are two options to solve this:
102     //  o Iterate over all instructions of the SCoP and find the actual
103     //    parameters.
104     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
105     //    and detect parameters on the fly.
106     //
107     // This is especially important for OpenMP and GPGPU code generation, as
108     // they require us to detect and possibly rewrite the corresponding
109     // parameters.
110     if (isl_id *Id = S.getIdForParam(Expr)) {
111       isl_id_free(Id);
112       return Expr;
113     }
114 
115     return SCEVVisitor<SCEVRewriter, const SCEV *>::visit(Expr);
116   }
117 
118   const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; }
119 
120   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
121     const SCEV *Operand = visit(Expr->getOperand());
122     return SE.getTruncateExpr(Operand, Expr->getType());
123   }
124 
125   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
126     const SCEV *Operand = visit(Expr->getOperand());
127     return SE.getZeroExtendExpr(Operand, Expr->getType());
128   }
129 
130   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
131     const SCEV *Operand = visit(Expr->getOperand());
132     return SE.getSignExtendExpr(Operand, Expr->getType());
133   }
134 
135   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
136     SmallVector<const SCEV *, 2> Operands;
137     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
138       const SCEV *Operand = visit(Expr->getOperand(i));
139       Operands.push_back(Operand);
140     }
141 
142     return SE.getAddExpr(Operands);
143   }
144 
145   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
146     SmallVector<const SCEV *, 2> Operands;
147     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
148       const SCEV *Operand = visit(Expr->getOperand(i));
149       Operands.push_back(Operand);
150     }
151 
152     return SE.getMulExpr(Operands);
153   }
154 
155   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
156     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
157   }
158 
159   // Return a new induction variable if the loop is within the original SCoP
160   // or NULL otherwise.
161   Value *getNewIV(const Loop *L) {
162     Value *IV = L->getCanonicalInductionVariable();
163     if (!IV)
164       return NULL;
165 
166     ValueMapT::iterator NewIV = GlobalMap.find(IV);
167 
168     if (NewIV == GlobalMap.end())
169       return NULL;
170 
171     return NewIV->second;
172   }
173 
174   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
175     Value *IV;
176 
177     IV = getNewIV(Expr->getLoop());
178 
179     // The IV is not within the GlobalMaps. So do not rewrite it and also do
180     // not rewrite any descendants.
181     if (!IV)
182       return Expr;
183 
184     assert(Expr->getNumOperands() == 2 &&
185            "An AddRecExpr with more than two operands can not be rewritten.");
186 
187     const SCEV *Base, *Step, *IVExpr, *Product;
188 
189     Base = visit(Expr->getStart());
190     Step = visit(Expr->getOperand(1));
191     IVExpr = SE.getUnknown(IV);
192     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
193     Product = SE.getMulExpr(Step, IVExpr);
194 
195     return SE.getAddExpr(Base, Product);
196   }
197 
198   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
199     SmallVector<const SCEV *, 2> Operands;
200     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
201       const SCEV *Operand = visit(Expr->getOperand(i));
202       Operands.push_back(Operand);
203     }
204 
205     return SE.getSMaxExpr(Operands);
206   }
207 
208   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
209     SmallVector<const SCEV *, 2> Operands;
210     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
211       const SCEV *Operand = visit(Expr->getOperand(i));
212       Operands.push_back(Operand);
213     }
214 
215     return SE.getUMaxExpr(Operands);
216   }
217 
218   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
219     Value *V = Expr->getValue();
220 
221     if (GlobalMap.count(V))
222       return SE.getUnknown(GlobalMap[V]);
223 
224     if (BBMap.count(V))
225       return SE.getUnknown(BBMap[V]);
226 
227     return Expr;
228   }
229 
230 private:
231   Scop &S;
232   ScalarEvolution &SE;
233   ValueMapT &GlobalMap;
234   ValueMapT &BBMap;
235 };
236 
237 // Helper class to generate memory location.
238 namespace {
239 class IslGenerator {
240 public:
241   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
242     Builder(Builder), IVS(IVS) {}
243   Value *generateIslInt(__isl_take isl_int Int);
244   Value *generateIslAff(__isl_take isl_aff *Aff);
245   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
246 
247 private:
248   typedef struct {
249     Value *Result;
250     class IslGenerator *Generator;
251   } IslGenInfo;
252 
253   IRBuilder<> &Builder;
254   std::vector<Value *> &IVS;
255   static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff,
256                                void *User);
257 };
258 }
259 
260 Value *IslGenerator::generateIslInt(isl_int Int) {
261   mpz_t IntMPZ;
262   mpz_init(IntMPZ);
263   isl_int_get_gmp(Int, IntMPZ);
264   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
265   mpz_clear(IntMPZ);
266   return IntValue;
267 }
268 
269 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
270   Value *Result;
271   Value *ConstValue;
272   isl_int ConstIsl;
273 
274   isl_int_init(ConstIsl);
275   isl_aff_get_constant(Aff, &ConstIsl);
276   ConstValue = generateIslInt(ConstIsl);
277   Type *Ty = Builder.getInt64Ty();
278 
279   // FIXME: We should give the constant and coefficients the right type. Here
280   // we force it into i64.
281   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
282 
283   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
284 
285   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
286          "must match the dimension of the affine space.");
287 
288   isl_int CoefficientIsl;
289   isl_int_init(CoefficientIsl);
290 
291   for (unsigned int i = 0; i < NbInputDims; ++i) {
292     Value *CoefficientValue;
293     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
294 
295     if (isl_int_is_zero(CoefficientIsl))
296       continue;
297 
298     CoefficientValue = generateIslInt(CoefficientIsl);
299     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
300     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
301     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
302     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
303   }
304 
305   isl_int_clear(CoefficientIsl);
306   isl_int_clear(ConstIsl);
307   isl_aff_free(Aff);
308 
309   return Result;
310 }
311 
312 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
313                                     __isl_take isl_aff *Aff, void *User) {
314   IslGenInfo *GenInfo = (IslGenInfo *)User;
315 
316   assert((GenInfo->Result == NULL) && "Result is already set."
317          "Currently only single isl_aff is supported");
318   assert(isl_set_plain_is_universe(Set)
319          && "Code generation failed because the set is not universe");
320 
321   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
322 
323   isl_set_free(Set);
324   return 0;
325 }
326 
327 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
328   IslGenInfo User;
329   User.Result = NULL;
330   User.Generator = this;
331   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
332   assert(User.Result && "Code generation for isl_pw_aff failed");
333 
334   isl_pw_aff_free(PwAff);
335   return User.Result;
336 }
337 
338 
339 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
340   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
341 
342 bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
343   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
344     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
345       if (!isa<SCEVCouldNotCompute>(Scev)) {
346         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
347           if (Unknown->getValue() != Inst)
348             return true;
349         } else {
350           return true;
351         }
352       }
353 
354   return false;
355 }
356 
357 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
358                                    ValueMapT &GlobalMap) {
359   // We assume constants never change.
360   // This avoids map lookups for many calls to this function.
361   if (isa<Constant>(Old))
362     return const_cast<Value *>(Old);
363 
364   if (GlobalMap.count(Old)) {
365     Value *New = GlobalMap[Old];
366 
367     if (Old->getType()->getScalarSizeInBits() <
368         New->getType()->getScalarSizeInBits())
369       New = Builder.CreateTruncOrBitCast(New, Old->getType());
370 
371     return New;
372   }
373 
374   if (BBMap.count(Old)) {
375     return BBMap[Old];
376   }
377 
378   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
379     if (const SCEV *Scev = SE.getSCEV(const_cast<Value *>(Old)))
380       if (!isa<SCEVCouldNotCompute>(Scev)) {
381         const SCEV *NewScev = SCEVRewriter::rewrite(
382             Scev, *Statement.getParent(), SE, GlobalMap, BBMap);
383         SCEVExpander Expander(SE, "polly");
384         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
385                                                  Builder.GetInsertPoint());
386 
387         BBMap[Old] = Expanded;
388         return Expanded;
389       }
390 
391   // 'Old' is within the original SCoP, but was not rewritten.
392   //
393   // Such values appear, if they only calculate information already available in
394   // the polyhedral description (e.g.  an induction variable increment). They
395   // can be safely ignored.
396   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
397     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
398       return NULL;
399 
400   // Everything else is probably a scop-constant value defined as global,
401   // function parameter or an instruction not within the scop.
402   return const_cast<Value *>(Old);
403 }
404 
405 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
406                                     ValueMapT &GlobalMap) {
407   Instruction *NewInst = Inst->clone();
408 
409   // Replace old operands with the new ones.
410   for (Instruction::const_op_iterator OI = Inst->op_begin(),
411                                       OE = Inst->op_end();
412        OI != OE; ++OI) {
413     Value *OldOperand = *OI;
414     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
415 
416     if (!NewOperand) {
417       assert(!isa<StoreInst>(NewInst) &&
418              "Store instructions are always needed!");
419       delete NewInst;
420       return;
421     }
422 
423     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
424   }
425 
426   Builder.Insert(NewInst);
427   BBMap[Inst] = NewInst;
428 
429   if (!NewInst->getType()->isVoidTy())
430     NewInst->setName("p_" + Inst->getName());
431 }
432 
433 std::vector<Value *> BlockGenerator::getMemoryAccessIndex(
434     __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap,
435     ValueMapT &GlobalMap) {
436 
437   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
438          "Only single dimensional access functions supported");
439 
440   std::vector<Value *> IVS;
441   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
442     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
443     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
444     IVS.push_back(NewIV);
445   }
446 
447   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
448   IslGenerator IslGen(Builder, IVS);
449   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
450 
451   Type *Ty = Builder.getInt64Ty();
452   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
453 
454   std::vector<Value *> IndexArray;
455   Value *NullValue = Constant::getNullValue(Ty);
456   IndexArray.push_back(NullValue);
457   IndexArray.push_back(OffsetValue);
458   return IndexArray;
459 }
460 
461 Value *BlockGenerator::getNewAccessOperand(
462     __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap,
463     ValueMapT &GlobalMap) {
464   std::vector<Value *> IndexArray =
465       getMemoryAccessIndex(NewAccessRelation, BaseAddress, BBMap, GlobalMap);
466   Value *NewOperand =
467       Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_");
468   return NewOperand;
469 }
470 
471 Value *BlockGenerator::generateLocationAccessed(
472     const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap,
473     ValueMapT &GlobalMap) {
474   MemoryAccess &Access = Statement.getAccessFor(Inst);
475   isl_map *CurrentAccessRelation = Access.getAccessRelation();
476   isl_map *NewAccessRelation = Access.getNewAccessRelation();
477 
478   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) &&
479          "Current and new access function use different spaces");
480 
481   Value *NewPointer;
482 
483   if (!NewAccessRelation) {
484     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
485   } else {
486     Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr());
487     NewPointer =
488         getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, GlobalMap);
489   }
490 
491   isl_map_free(CurrentAccessRelation);
492   isl_map_free(NewAccessRelation);
493   return NewPointer;
494 }
495 
496 Value *BlockGenerator::generateScalarLoad(
497     const LoadInst *Load, ValueMapT &BBMap, ValueMapT &GlobalMap) {
498   const Value *Pointer = Load->getPointerOperand();
499   const Instruction *Inst = dyn_cast<Instruction>(Load);
500   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
501   Value *ScalarLoad =
502       Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
503   return ScalarLoad;
504 }
505 
506 Value *BlockGenerator::generateScalarStore(
507     const StoreInst *Store, ValueMapT &BBMap, ValueMapT &GlobalMap) {
508   const Value *Pointer = Store->getPointerOperand();
509   Value *NewPointer =
510       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap);
511   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
512 
513   return Builder.CreateStore(ValueOperand, NewPointer);
514 }
515 
516 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
517                                      ValueMapT &GlobalMap) {
518   // Terminator instructions control the control flow. They are explicitly
519   // expressed in the clast and do not need to be copied.
520   if (Inst->isTerminator())
521     return;
522 
523   if (isSCEVIgnore(Inst))
524     return;
525 
526   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
527     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
528     return;
529   }
530 
531   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
532     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
533     return;
534   }
535 
536   copyInstScalar(Inst, BBMap, GlobalMap);
537 }
538 
539 void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
540   BasicBlock *BB = Statement.getBasicBlock();
541   BasicBlock *CopyBB =
542       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
543   CopyBB->setName("polly.stmt." + BB->getName());
544   Builder.SetInsertPoint(CopyBB->begin());
545 
546   ValueMapT BBMap;
547 
548   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
549        ++II)
550     copyInstruction(II, BBMap, GlobalMap);
551 }
552 
553 VectorBlockGenerator::VectorBlockGenerator(
554     IRBuilder<> &B, VectorValueMapT &GlobalMaps, ScopStmt &Stmt,
555     __isl_keep isl_map *Schedule, Pass *P)
556     : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
557   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
558   assert(Schedule && "No statement domain provided");
559 }
560 
561 Value *VectorBlockGenerator::getVectorValue(
562     const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
563   if (VectorMap.count(Old))
564     return VectorMap[Old];
565 
566   int Width = getVectorWidth();
567 
568   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
569 
570   for (int Lane = 0; Lane < Width; Lane++)
571     Vector = Builder.CreateInsertElement(
572         Vector, getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane]),
573         Builder.getInt32(Lane));
574 
575   VectorMap[Old] = Vector;
576 
577   return Vector;
578 }
579 
580 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
581   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
582   assert(PointerTy && "PointerType expected");
583 
584   Type *ScalarType = PointerTy->getElementType();
585   VectorType *VectorType = VectorType::get(ScalarType, Width);
586 
587   return PointerType::getUnqual(VectorType);
588 }
589 
590 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
591                                                    ValueMapT &BBMap) {
592   const Value *Pointer = Load->getPointerOperand();
593   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
594   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
595   Value *VectorPtr =
596       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
597   LoadInst *VecLoad =
598       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
599   if (!Aligned)
600     VecLoad->setAlignment(8);
601 
602   return VecLoad;
603 }
604 
605 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
606                                                     ValueMapT &BBMap) {
607   const Value *Pointer = Load->getPointerOperand();
608   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
609   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
610   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
611                                            Load->getName() + "_p_vec_p");
612   LoadInst *ScalarLoad =
613       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
614 
615   if (!Aligned)
616     ScalarLoad->setAlignment(8);
617 
618   Constant *SplatVector = Constant::getNullValue(
619       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
620 
621   Value *VectorLoad = Builder.CreateShuffleVector(
622       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
623   return VectorLoad;
624 }
625 
626 Value *VectorBlockGenerator::generateUnknownStrideLoad(
627     const LoadInst *Load, VectorValueMapT &ScalarMaps) {
628   int VectorWidth = getVectorWidth();
629   const Value *Pointer = Load->getPointerOperand();
630   VectorType *VectorType = VectorType::get(
631       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
632 
633   Value *Vector = UndefValue::get(VectorType);
634 
635   for (int i = 0; i < VectorWidth; i++) {
636     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
637     Value *ScalarLoad =
638         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
639     Vector = Builder.CreateInsertElement(
640         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
641   }
642 
643   return Vector;
644 }
645 
646 void VectorBlockGenerator::generateLoad(
647     const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
648   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
649       !VectorType::isValidElementType(Load->getType())) {
650     for (int i = 0; i < getVectorWidth(); i++)
651       ScalarMaps[i][Load] =
652           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i]);
653     return;
654   }
655 
656   MemoryAccess &Access = Statement.getAccessFor(Load);
657 
658   Value *NewLoad;
659   if (Access.isStrideZero(isl_map_copy(Schedule)))
660     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
661   else if (Access.isStrideOne(isl_map_copy(Schedule)))
662     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
663   else
664     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
665 
666   VectorMap[Load] = NewLoad;
667 }
668 
669 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
670                                          ValueMapT &VectorMap,
671                                          VectorValueMapT &ScalarMaps) {
672   int VectorWidth = getVectorWidth();
673   Value *NewOperand =
674       getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps);
675 
676   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
677 
678   const CastInst *Cast = dyn_cast<CastInst>(Inst);
679   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
680   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
681 }
682 
683 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
684                                           ValueMapT &VectorMap,
685                                           VectorValueMapT &ScalarMaps) {
686   Value *OpZero = Inst->getOperand(0);
687   Value *OpOne = Inst->getOperand(1);
688 
689   Value *NewOpZero, *NewOpOne;
690   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
691   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
692 
693   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
694                                        Inst->getName() + "p_vec");
695   VectorMap[Inst] = NewInst;
696 }
697 
698 void VectorBlockGenerator::copyStore(
699     const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
700   int VectorWidth = getVectorWidth();
701 
702   MemoryAccess &Access = Statement.getAccessFor(Store);
703 
704   const Value *Pointer = Store->getPointerOperand();
705   Value *Vector =
706       getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps);
707 
708   if (Access.isStrideOne(isl_map_copy(Schedule))) {
709     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
710     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
711 
712     Value *VectorPtr =
713         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
714     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
715 
716     if (!Aligned)
717       Store->setAlignment(8);
718   } else {
719     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
720       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
721       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
722       Builder.CreateStore(Scalar, NewPointer);
723     }
724   }
725 }
726 
727 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
728                                              ValueMapT &VectorMap) {
729   for (Instruction::const_op_iterator OI = Inst->op_begin(),
730                                       OE = Inst->op_end();
731        OI != OE; ++OI)
732     if (VectorMap.count(*OI))
733       return true;
734   return false;
735 }
736 
737 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
738                                                ValueMapT &VectorMap,
739                                                VectorValueMapT &ScalarMaps) {
740   bool HasVectorOperand = false;
741   int VectorWidth = getVectorWidth();
742 
743   for (Instruction::const_op_iterator OI = Inst->op_begin(),
744                                       OE = Inst->op_end();
745        OI != OE; ++OI) {
746     ValueMapT::iterator VecOp = VectorMap.find(*OI);
747 
748     if (VecOp == VectorMap.end())
749       continue;
750 
751     HasVectorOperand = true;
752     Value *NewVector = VecOp->second;
753 
754     for (int i = 0; i < VectorWidth; ++i) {
755       ValueMapT &SM = ScalarMaps[i];
756 
757       // If there is one scalar extracted, all scalar elements should have
758       // already been extracted by the code here. So no need to check for the
759       // existance of all of them.
760       if (SM.count(*OI))
761         break;
762 
763       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
764     }
765   }
766 
767   return HasVectorOperand;
768 }
769 
770 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
771                                               ValueMapT &VectorMap,
772                                               VectorValueMapT &ScalarMaps) {
773   bool HasVectorOperand;
774   int VectorWidth = getVectorWidth();
775 
776   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
777 
778   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
779     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
780 
781   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
782     return;
783 
784   // Make the result available as vector value.
785   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
786   Value *Vector = UndefValue::get(VectorType);
787 
788   for (int i = 0; i < VectorWidth; i++)
789     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
790                                          Builder.getInt32(i));
791 
792   VectorMap[Inst] = Vector;
793 }
794 
795 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
796 
797 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
798                                            ValueMapT &VectorMap,
799                                            VectorValueMapT &ScalarMaps) {
800   // Terminator instructions control the control flow. They are explicitly
801   // expressed in the clast and do not need to be copied.
802   if (Inst->isTerminator())
803     return;
804 
805   if (isSCEVIgnore(Inst))
806     return;
807 
808   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
809     generateLoad(Load, VectorMap, ScalarMaps);
810     return;
811   }
812 
813   if (hasVectorOperands(Inst, VectorMap)) {
814     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
815       copyStore(Store, VectorMap, ScalarMaps);
816       return;
817     }
818 
819     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
820       copyUnaryInst(Unary, VectorMap, ScalarMaps);
821       return;
822     }
823 
824     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
825       copyBinaryInst(Binary, VectorMap, ScalarMaps);
826       return;
827     }
828 
829     // Falltrough: We generate scalar instructions, if we don't know how to
830     // generate vector code.
831   }
832 
833   copyInstScalarized(Inst, VectorMap, ScalarMaps);
834 }
835 
836 void VectorBlockGenerator::copyBB() {
837   BasicBlock *BB = Statement.getBasicBlock();
838   BasicBlock *CopyBB =
839       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
840   CopyBB->setName("polly.stmt." + BB->getName());
841   Builder.SetInsertPoint(CopyBB->begin());
842 
843   // Create two maps that store the mapping from the original instructions of
844   // the old basic block to their copies in the new basic block. Those maps
845   // are basic block local.
846   //
847   // As vector code generation is supported there is one map for scalar values
848   // and one for vector values.
849   //
850   // In case we just do scalar code generation, the vectorMap is not used and
851   // the scalarMap has just one dimension, which contains the mapping.
852   //
853   // In case vector code generation is done, an instruction may either appear
854   // in the vector map once (as it is calculating >vectorwidth< values at a
855   // time. Or (if the values are calculated using scalar operations), it
856   // appears once in every dimension of the scalarMap.
857   VectorValueMapT ScalarBlockMap(getVectorWidth());
858   ValueMapT VectorBlockMap;
859 
860   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
861        ++II)
862     copyInstruction(II, VectorBlockMap, ScalarBlockMap);
863 }
864