1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "polly/CodeGen/CodeGeneration.h"
18 #include "polly/CodeGen/BlockGenerators.h"
19 #include "polly/Support/GICHelper.h"
20 #include "polly/Support/SCEVValidator.h"
21 #include "polly/Support/ScopHelper.h"
22 
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Analysis/ScalarEvolutionExpander.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 #include "llvm/Support/CommandLine.h"
28 
29 #include "isl/aff.h"
30 #include "isl/set.h"
31 
32 using namespace llvm;
33 using namespace polly;
34 
35 static cl::opt<bool>
36 Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."),
37         cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
38         cl::init(false), cl::ZeroOrMore);
39 
40 static cl::opt<bool, true> SCEVCodegenF(
41     "polly-codegen-scev", cl::desc("Use SCEV based code generation."),
42     cl::Hidden, cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore);
43 
44 bool polly::SCEVCodegen;
45 
46 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI,
47                           ScalarEvolution *SE, const Region *R) {
48   if (SCEVCodegen) {
49     if (!I || !SE->isSCEVable(I->getType()))
50       return false;
51 
52     if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I)))
53       if (!isa<SCEVCouldNotCompute>(Scev))
54         if (!hasScalarDepsInsideRegion(Scev, R))
55           return true;
56 
57     return false;
58   }
59 
60   Loop *L = LI->getLoopFor(I->getParent());
61   return L && I == L->getCanonicalInductionVariable();
62 }
63 
64 // Helper class to generate memory location.
65 namespace {
66 class IslGenerator {
67 public:
68   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS)
69       : Builder(Builder), IVS(IVS) {}
70   Value *generateIslInt(__isl_take isl_int Int);
71   Value *generateIslAff(__isl_take isl_aff *Aff);
72   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
73 
74 private:
75   typedef struct {
76     Value *Result;
77     class IslGenerator *Generator;
78   } IslGenInfo;
79 
80   IRBuilder<> &Builder;
81   std::vector<Value *> &IVS;
82   static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff,
83                                void *User);
84 };
85 }
86 
87 Value *IslGenerator::generateIslInt(isl_int Int) {
88   mpz_t IntMPZ;
89   mpz_init(IntMPZ);
90   isl_int_get_gmp(Int, IntMPZ);
91   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
92   mpz_clear(IntMPZ);
93   return IntValue;
94 }
95 
96 Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
97   Value *Result;
98   Value *ConstValue;
99   isl_int ConstIsl;
100 
101   isl_int_init(ConstIsl);
102   isl_aff_get_constant(Aff, &ConstIsl);
103   ConstValue = generateIslInt(ConstIsl);
104   Type *Ty = Builder.getInt64Ty();
105 
106   // FIXME: We should give the constant and coefficients the right type. Here
107   // we force it into i64.
108   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
109 
110   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
111 
112   assert((IVS.size() == NbInputDims) &&
113          "The Dimension of Induction Variables must match the dimension of the "
114          "affine space.");
115 
116   isl_int CoefficientIsl;
117   isl_int_init(CoefficientIsl);
118 
119   for (unsigned int i = 0; i < NbInputDims; ++i) {
120     Value *CoefficientValue;
121     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
122 
123     if (isl_int_is_zero(CoefficientIsl))
124       continue;
125 
126     CoefficientValue = generateIslInt(CoefficientIsl);
127     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
128     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
129     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
130     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
131   }
132 
133   isl_int_clear(CoefficientIsl);
134   isl_int_clear(ConstIsl);
135   isl_aff_free(Aff);
136 
137   return Result;
138 }
139 
140 int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
141                                     __isl_take isl_aff *Aff, void *User) {
142   IslGenInfo *GenInfo = (IslGenInfo *)User;
143 
144   assert((GenInfo->Result == NULL) &&
145          "Result is already set. Currently only single isl_aff is supported");
146   assert(isl_set_plain_is_universe(Set) &&
147          "Code generation failed because the set is not universe");
148 
149   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
150 
151   isl_set_free(Set);
152   return 0;
153 }
154 
155 Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
156   IslGenInfo User;
157   User.Result = NULL;
158   User.Generator = this;
159   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
160   assert(User.Result && "Code generation for isl_pw_aff failed");
161 
162   isl_pw_aff_free(PwAff);
163   return User.Result;
164 }
165 
166 BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P)
167     : Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {
168 }
169 
170 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
171                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS,
172                                    Loop *L) {
173   // We assume constants never change.
174   // This avoids map lookups for many calls to this function.
175   if (isa<Constant>(Old))
176     return const_cast<Value *>(Old);
177 
178   if (GlobalMap.count(Old)) {
179     Value *New = GlobalMap[Old];
180 
181     if (Old->getType()->getScalarSizeInBits() <
182             New->getType()->getScalarSizeInBits())
183       New = Builder.CreateTruncOrBitCast(New, Old->getType());
184 
185     return New;
186   }
187 
188   if (BBMap.count(Old)) {
189     return BBMap[Old];
190   }
191 
192   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
193     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
194       if (!isa<SCEVCouldNotCompute>(Scev)) {
195         const SCEV *NewScev = apply(Scev, LTS, SE);
196         ValueToValueMap VTV;
197         VTV.insert(BBMap.begin(), BBMap.end());
198         VTV.insert(GlobalMap.begin(), GlobalMap.end());
199         NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV);
200         SCEVExpander Expander(SE, "polly");
201         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
202                                                  Builder.GetInsertPoint());
203 
204         BBMap[Old] = Expanded;
205         return Expanded;
206       }
207     }
208 
209   if (const Instruction *Inst = dyn_cast<Instruction>(Old)) {
210     (void) Inst;
211     assert(!Statement.getParent()->getRegion().contains(Inst->getParent()) &&
212            "unexpected scalar dependence in region");
213   }
214 
215   // Everything else is probably a scop-constant value defined as global,
216   // function parameter or an instruction not within the scop.
217   return const_cast<Value *>(Old);
218 }
219 
220 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
221                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
222   Instruction *NewInst = Inst->clone();
223 
224   // Replace old operands with the new ones.
225   for (Instruction::const_op_iterator OI = Inst->op_begin(),
226                                       OE = Inst->op_end();
227        OI != OE; ++OI) {
228     Value *OldOperand = *OI;
229     Value *NewOperand =
230         getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
231 
232     if (!NewOperand) {
233       assert(!isa<StoreInst>(NewInst) &&
234              "Store instructions are always needed!");
235       delete NewInst;
236       return;
237     }
238 
239     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
240   }
241 
242   Builder.Insert(NewInst);
243   BBMap[Inst] = NewInst;
244 
245   if (!NewInst->getType()->isVoidTy())
246     NewInst->setName("p_" + Inst->getName());
247 }
248 
249 std::vector<Value *> BlockGenerator::getMemoryAccessIndex(
250     __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap,
251     ValueMapT &GlobalMap, LoopToScevMapT &LTS, Loop *L) {
252 
253   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
254          "Only single dimensional access functions supported");
255 
256   std::vector<Value *> IVS;
257   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
258     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
259     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS, L);
260     IVS.push_back(NewIV);
261   }
262 
263   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
264   IslGenerator IslGen(Builder, IVS);
265   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
266 
267   Type *Ty = Builder.getInt64Ty();
268   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
269 
270   std::vector<Value *> IndexArray;
271   Value *NullValue = Constant::getNullValue(Ty);
272   IndexArray.push_back(NullValue);
273   IndexArray.push_back(OffsetValue);
274   return IndexArray;
275 }
276 
277 Value *BlockGenerator::getNewAccessOperand(
278     __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap,
279     ValueMapT &GlobalMap, LoopToScevMapT &LTS, Loop *L) {
280   std::vector<Value *> IndexArray = getMemoryAccessIndex(
281       NewAccessRelation, BaseAddress, BBMap, GlobalMap, LTS, L);
282   Value *NewOperand =
283       Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_");
284   return NewOperand;
285 }
286 
287 Value *BlockGenerator::generateLocationAccessed(
288     const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap,
289     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
290   MemoryAccess &Access = Statement.getAccessFor(Inst);
291   isl_map *CurrentAccessRelation = Access.getAccessRelation();
292   isl_map *NewAccessRelation = Access.getNewAccessRelation();
293 
294   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) &&
295          "Current and new access function use different spaces");
296 
297   Value *NewPointer;
298 
299   if (!NewAccessRelation) {
300     NewPointer =
301         getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
302   } else {
303     Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr());
304     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap,
305                                      GlobalMap, LTS, getLoopForInst(Inst));
306   }
307 
308   isl_map_free(CurrentAccessRelation);
309   isl_map_free(NewAccessRelation);
310   return NewPointer;
311 }
312 
313 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
314   return P->getAnalysis<LoopInfo>().getLoopFor(Inst->getParent());
315 }
316 
317 Value *
318 BlockGenerator::generateScalarLoad(const LoadInst *Load, ValueMapT &BBMap,
319                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
320   const Value *Pointer = Load->getPointerOperand();
321   const Instruction *Inst = dyn_cast<Instruction>(Load);
322   Value *NewPointer =
323       generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS);
324   Value *ScalarLoad =
325       Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
326   return ScalarLoad;
327 }
328 
329 Value *
330 BlockGenerator::generateScalarStore(const StoreInst *Store, ValueMapT &BBMap,
331                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
332   const Value *Pointer = Store->getPointerOperand();
333   Value *NewPointer =
334       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS);
335   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap,
336                                     LTS, getLoopForInst(Store));
337 
338   return Builder.CreateStore(ValueOperand, NewPointer);
339 }
340 
341 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
342                                      ValueMapT &GlobalMap,
343                                      LoopToScevMapT &LTS) {
344   // Terminator instructions control the control flow. They are explicitly
345   // expressed in the clast and do not need to be copied.
346   if (Inst->isTerminator())
347     return;
348 
349   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
350                     &Statement.getParent()->getRegion()))
351     return;
352 
353   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
354     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap, LTS);
355     return;
356   }
357 
358   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
359     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap, LTS);
360     return;
361   }
362 
363   copyInstScalar(Inst, BBMap, GlobalMap, LTS);
364 }
365 
366 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
367   BasicBlock *BB = Statement.getBasicBlock();
368   BasicBlock *CopyBB =
369       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
370   CopyBB->setName("polly.stmt." + BB->getName());
371   Builder.SetInsertPoint(CopyBB->begin());
372 
373   ValueMapT BBMap;
374 
375   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
376        ++II)
377     copyInstruction(II, BBMap, GlobalMap, LTS);
378 }
379 
380 VectorBlockGenerator::VectorBlockGenerator(
381     IRBuilder<> &B, VectorValueMapT &GlobalMaps,
382     std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
383     __isl_keep isl_map *Schedule, Pass *P)
384     : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS),
385       Schedule(Schedule) {
386   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
387   assert(Schedule && "No statement domain provided");
388 }
389 
390 Value *
391 VectorBlockGenerator::getVectorValue(const Value *Old, ValueMapT &VectorMap,
392                                      VectorValueMapT &ScalarMaps, Loop *L) {
393   if (VectorMap.count(Old))
394     return VectorMap[Old];
395 
396   int Width = getVectorWidth();
397 
398   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
399 
400   for (int Lane = 0; Lane < Width; Lane++)
401     Vector = Builder.CreateInsertElement(
402         Vector,
403         getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L),
404         Builder.getInt32(Lane));
405 
406   VectorMap[Old] = Vector;
407 
408   return Vector;
409 }
410 
411 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
412   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
413   assert(PointerTy && "PointerType expected");
414 
415   Type *ScalarType = PointerTy->getElementType();
416   VectorType *VectorType = VectorType::get(ScalarType, Width);
417 
418   return PointerType::getUnqual(VectorType);
419 }
420 
421 Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
422                                                    ValueMapT &BBMap) {
423   const Value *Pointer = Load->getPointerOperand();
424   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
425   Value *NewPointer =
426       getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
427   Value *VectorPtr =
428       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
429   LoadInst *VecLoad =
430       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
431   if (!Aligned)
432     VecLoad->setAlignment(8);
433 
434   return VecLoad;
435 }
436 
437 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
438                                                     ValueMapT &BBMap) {
439   const Value *Pointer = Load->getPointerOperand();
440   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
441   Value *NewPointer =
442       getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
443   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
444                                            Load->getName() + "_p_vec_p");
445   LoadInst *ScalarLoad =
446       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
447 
448   if (!Aligned)
449     ScalarLoad->setAlignment(8);
450 
451   Constant *SplatVector = Constant::getNullValue(
452       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
453 
454   Value *VectorLoad = Builder.CreateShuffleVector(
455       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
456   return VectorLoad;
457 }
458 
459 Value *VectorBlockGenerator::generateUnknownStrideLoad(
460     const LoadInst *Load, VectorValueMapT &ScalarMaps) {
461   int VectorWidth = getVectorWidth();
462   const Value *Pointer = Load->getPointerOperand();
463   VectorType *VectorType = VectorType::get(
464       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
465 
466   Value *Vector = UndefValue::get(VectorType);
467 
468   for (int i = 0; i < VectorWidth; i++) {
469     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
470                                     VLTS[i], getLoopForInst(Load));
471     Value *ScalarLoad =
472         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
473     Vector = Builder.CreateInsertElement(
474         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
475   }
476 
477   return Vector;
478 }
479 
480 void VectorBlockGenerator::generateLoad(
481     const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
482   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
483       !VectorType::isValidElementType(Load->getType())) {
484     for (int i = 0; i < getVectorWidth(); i++)
485       ScalarMaps[i][Load] =
486           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
487     return;
488   }
489 
490   MemoryAccess &Access = Statement.getAccessFor(Load);
491 
492   Value *NewLoad;
493   if (Access.isStrideZero(isl_map_copy(Schedule)))
494     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
495   else if (Access.isStrideOne(isl_map_copy(Schedule)))
496     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
497   else
498     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
499 
500   VectorMap[Load] = NewLoad;
501 }
502 
503 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
504                                          ValueMapT &VectorMap,
505                                          VectorValueMapT &ScalarMaps) {
506   int VectorWidth = getVectorWidth();
507   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps,
508                                      getLoopForInst(Inst));
509 
510   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
511 
512   const CastInst *Cast = dyn_cast<CastInst>(Inst);
513   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
514   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
515 }
516 
517 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
518                                           ValueMapT &VectorMap,
519                                           VectorValueMapT &ScalarMaps) {
520   Loop *L = getLoopForInst(Inst);
521   Value *OpZero = Inst->getOperand(0);
522   Value *OpOne = Inst->getOperand(1);
523 
524   Value *NewOpZero, *NewOpOne;
525   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L);
526   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L);
527 
528   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
529                                        Inst->getName() + "p_vec");
530   VectorMap[Inst] = NewInst;
531 }
532 
533 void VectorBlockGenerator::copyStore(
534     const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
535   int VectorWidth = getVectorWidth();
536 
537   MemoryAccess &Access = Statement.getAccessFor(Store);
538 
539   const Value *Pointer = Store->getPointerOperand();
540   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
541                                  ScalarMaps, getLoopForInst(Store));
542 
543   if (Access.isStrideOne(isl_map_copy(Schedule))) {
544     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
545     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0],
546                                     VLTS[0], getLoopForInst(Store));
547 
548     Value *VectorPtr =
549         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
550     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
551 
552     if (!Aligned)
553       Store->setAlignment(8);
554   } else {
555     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
556       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
557       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
558                                       VLTS[i], getLoopForInst(Store));
559       Builder.CreateStore(Scalar, NewPointer);
560     }
561   }
562 }
563 
564 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
565                                              ValueMapT &VectorMap) {
566   for (Instruction::const_op_iterator OI = Inst->op_begin(),
567                                       OE = Inst->op_end();
568        OI != OE; ++OI)
569     if (VectorMap.count(*OI))
570       return true;
571   return false;
572 }
573 
574 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
575                                                ValueMapT &VectorMap,
576                                                VectorValueMapT &ScalarMaps) {
577   bool HasVectorOperand = false;
578   int VectorWidth = getVectorWidth();
579 
580   for (Instruction::const_op_iterator OI = Inst->op_begin(),
581                                       OE = Inst->op_end();
582        OI != OE; ++OI) {
583     ValueMapT::iterator VecOp = VectorMap.find(*OI);
584 
585     if (VecOp == VectorMap.end())
586       continue;
587 
588     HasVectorOperand = true;
589     Value *NewVector = VecOp->second;
590 
591     for (int i = 0; i < VectorWidth; ++i) {
592       ValueMapT &SM = ScalarMaps[i];
593 
594       // If there is one scalar extracted, all scalar elements should have
595       // already been extracted by the code here. So no need to check for the
596       // existance of all of them.
597       if (SM.count(*OI))
598         break;
599 
600       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
601     }
602   }
603 
604   return HasVectorOperand;
605 }
606 
607 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
608                                               ValueMapT &VectorMap,
609                                               VectorValueMapT &ScalarMaps) {
610   bool HasVectorOperand;
611   int VectorWidth = getVectorWidth();
612 
613   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
614 
615   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
616     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane],
617                    VLTS[VectorLane]);
618 
619   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
620     return;
621 
622   // Make the result available as vector value.
623   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
624   Value *Vector = UndefValue::get(VectorType);
625 
626   for (int i = 0; i < VectorWidth; i++)
627     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
628                                          Builder.getInt32(i));
629 
630   VectorMap[Inst] = Vector;
631 }
632 
633 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
634 
635 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
636                                            ValueMapT &VectorMap,
637                                            VectorValueMapT &ScalarMaps) {
638   // Terminator instructions control the control flow. They are explicitly
639   // expressed in the clast and do not need to be copied.
640   if (Inst->isTerminator())
641     return;
642 
643   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
644                     &Statement.getParent()->getRegion()))
645     return;
646 
647   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
648     generateLoad(Load, VectorMap, ScalarMaps);
649     return;
650   }
651 
652   if (hasVectorOperands(Inst, VectorMap)) {
653     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
654       copyStore(Store, VectorMap, ScalarMaps);
655       return;
656     }
657 
658     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
659       copyUnaryInst(Unary, VectorMap, ScalarMaps);
660       return;
661     }
662 
663     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
664       copyBinaryInst(Binary, VectorMap, ScalarMaps);
665       return;
666     }
667 
668     // Falltrough: We generate scalar instructions, if we don't know how to
669     // generate vector code.
670   }
671 
672   copyInstScalarized(Inst, VectorMap, ScalarMaps);
673 }
674 
675 void VectorBlockGenerator::copyBB() {
676   BasicBlock *BB = Statement.getBasicBlock();
677   BasicBlock *CopyBB =
678       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
679   CopyBB->setName("polly.stmt." + BB->getName());
680   Builder.SetInsertPoint(CopyBB->begin());
681 
682   // Create two maps that store the mapping from the original instructions of
683   // the old basic block to their copies in the new basic block. Those maps
684   // are basic block local.
685   //
686   // As vector code generation is supported there is one map for scalar values
687   // and one for vector values.
688   //
689   // In case we just do scalar code generation, the vectorMap is not used and
690   // the scalarMap has just one dimension, which contains the mapping.
691   //
692   // In case vector code generation is done, an instruction may either appear
693   // in the vector map once (as it is calculating >vectorwidth< values at a
694   // time. Or (if the values are calculated using scalar operations), it
695   // appears once in every dimension of the scalarMap.
696   VectorValueMapT ScalarBlockMap(getVectorWidth());
697   ValueMapT VectorBlockMap;
698 
699   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
700        ++II)
701     copyInstruction(II, VectorBlockMap, ScalarBlockMap);
702 }
703