1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "isl/aff.h"
18 #include "isl/ast.h"
19 #include "isl/ast_build.h"
20 #include "isl/set.h"
21 #include "polly/CodeGen/BlockGenerators.h"
22 #include "polly/CodeGen/CodeGeneration.h"
23 #include "polly/CodeGen/IslExprBuilder.h"
24 #include "polly/Options.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/SCEVValidator.h"
27 #include "polly/Support/ScopHelper.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/ScalarEvolution.h"
30 #include "llvm/Analysis/ScalarEvolutionExpander.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
33 
34 using namespace llvm;
35 using namespace polly;
36 
37 static cl::opt<bool>
38     Aligned("enable-polly-aligned",
39             cl::desc("Assumed aligned memory accesses."), cl::Hidden,
40             cl::value_desc("OpenMP code generation enabled if true"),
41             cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
42 
43 static cl::opt<bool, true>
44     SCEVCodegenF("polly-codegen-scev",
45                  cl::desc("Use SCEV based code generation."), cl::Hidden,
46                  cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore,
47                  cl::cat(PollyCategory));
48 
49 bool polly::SCEVCodegen;
50 
51 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI,
52                           ScalarEvolution *SE, const Region *R) {
53   if (SCEVCodegen) {
54     if (!I || !SE->isSCEVable(I->getType()))
55       return false;
56 
57     if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I)))
58       if (!isa<SCEVCouldNotCompute>(Scev))
59         if (!hasScalarDepsInsideRegion(Scev, R))
60           return true;
61 
62     return false;
63   }
64 
65   Loop *L = LI->getLoopFor(I->getParent());
66   return L && I == L->getCanonicalInductionVariable() && R->contains(L);
67 }
68 
69 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P,
70                                LoopInfo &LI, ScalarEvolution &SE,
71                                isl_ast_build *Build,
72                                IslExprBuilder *ExprBuilder)
73     : Builder(B), Statement(Stmt), P(P), LI(LI), SE(SE), Build(Build),
74       ExprBuilder(ExprBuilder) {}
75 
76 Value *BlockGenerator::lookupAvailableValue(const Value *Old, ValueMapT &BBMap,
77                                             ValueMapT &GlobalMap) const {
78   // We assume constants never change.
79   // This avoids map lookups for many calls to this function.
80   if (isa<Constant>(Old))
81     return const_cast<Value *>(Old);
82 
83   if (Value *New = GlobalMap.lookup(Old)) {
84     if (Old->getType()->getScalarSizeInBits() <
85         New->getType()->getScalarSizeInBits())
86       New = Builder.CreateTruncOrBitCast(New, Old->getType());
87 
88     return New;
89   }
90 
91   // Or it is probably a scop-constant value defined as global, function
92   // parameter or an instruction not within the scop.
93   if (isa<GlobalValue>(Old) || isa<Argument>(Old))
94     return const_cast<Value *>(Old);
95 
96   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
97     if (!Statement.getParent()->getRegion().contains(Inst->getParent()))
98       return const_cast<Value *>(Old);
99 
100   if (Value *New = BBMap.lookup(Old))
101     return New;
102 
103   return nullptr;
104 }
105 
106 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
107                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS,
108                                    Loop *L) {
109   if (Value *New = lookupAvailableValue(Old, BBMap, GlobalMap))
110     return New;
111 
112   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
113     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
114       if (!isa<SCEVCouldNotCompute>(Scev)) {
115         const SCEV *NewScev = apply(Scev, LTS, SE);
116         ValueToValueMap VTV;
117         VTV.insert(BBMap.begin(), BBMap.end());
118         VTV.insert(GlobalMap.begin(), GlobalMap.end());
119         NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV);
120         SCEVExpander Expander(SE, "polly");
121         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
122                                                  Builder.GetInsertPoint());
123 
124         BBMap[Old] = Expanded;
125         return Expanded;
126       }
127     }
128 
129   // Now the scalar dependence is neither available nor SCEVCodegenable, this
130   // should never happen in the current code generator.
131   llvm_unreachable("Unexpected scalar dependence in region!");
132   return nullptr;
133 }
134 
135 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
136                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
137   // We do not generate debug intrinsics as we did not investigate how to
138   // copy them correctly. At the current state, they just crash the code
139   // generation as the meta-data operands are not correctly copied.
140   if (isa<DbgInfoIntrinsic>(Inst))
141     return;
142 
143   Instruction *NewInst = Inst->clone();
144 
145   // Replace old operands with the new ones.
146   for (Value *OldOperand : Inst->operands()) {
147     Value *NewOperand =
148         getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
149 
150     if (!NewOperand) {
151       assert(!isa<StoreInst>(NewInst) &&
152              "Store instructions are always needed!");
153       delete NewInst;
154       return;
155     }
156 
157     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
158   }
159 
160   Builder.Insert(NewInst);
161   BBMap[Inst] = NewInst;
162 
163   if (!NewInst->getType()->isVoidTy())
164     NewInst->setName("p_" + Inst->getName());
165 }
166 
167 Value *BlockGenerator::getNewAccessOperand(const MemoryAccess &MA) {
168   isl_pw_multi_aff *PWSchedule, *PWAccRel;
169   isl_union_map *ScheduleU;
170   isl_map *Schedule, *AccRel;
171   isl_ast_expr *Expr;
172 
173   assert(ExprBuilder && Build &&
174          "Cannot generate new value without IslExprBuilder!");
175 
176   AccRel = MA.getNewAccessRelation();
177   assert(AccRel && "We generate new code only for new access relations!");
178 
179   ScheduleU = isl_ast_build_get_schedule(Build);
180   ScheduleU = isl_union_map_intersect_domain(
181       ScheduleU, isl_union_set_from_set(MA.getStatement()->getDomain()));
182   Schedule = isl_map_from_union_map(ScheduleU);
183 
184   PWSchedule = isl_pw_multi_aff_from_map(isl_map_reverse(Schedule));
185   PWAccRel = isl_pw_multi_aff_from_map(AccRel);
186   PWAccRel = isl_pw_multi_aff_pullback_pw_multi_aff(PWAccRel, PWSchedule);
187 
188   Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
189 
190   return ExprBuilder->create(Expr);
191 }
192 
193 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
194                                                 const Value *Pointer,
195                                                 ValueMapT &BBMap,
196                                                 ValueMapT &GlobalMap,
197                                                 LoopToScevMapT &LTS) {
198   const MemoryAccess &MA = Statement.getAccessFor(Inst);
199   isl_map *NewAccRel = MA.getNewAccessRelation();
200 
201   Value *NewPointer;
202   if (NewAccRel)
203     NewPointer = getNewAccessOperand(MA);
204   else
205     NewPointer =
206         getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
207 
208   isl_map_free(NewAccRel);
209   return NewPointer;
210 }
211 
212 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
213   return LI.getLoopFor(Inst->getParent());
214 }
215 
216 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
217                                           ValueMapT &BBMap,
218                                           ValueMapT &GlobalMap,
219                                           LoopToScevMapT &LTS) {
220   const Value *Pointer = Load->getPointerOperand();
221   const Instruction *Inst = dyn_cast<Instruction>(Load);
222   Value *NewPointer =
223       generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS);
224   Value *ScalarLoad =
225       Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
226   return ScalarLoad;
227 }
228 
229 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
230                                            ValueMapT &BBMap,
231                                            ValueMapT &GlobalMap,
232                                            LoopToScevMapT &LTS) {
233   const Value *Pointer = Store->getPointerOperand();
234   Value *NewPointer =
235       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS);
236   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap,
237                                     LTS, getLoopForInst(Store));
238 
239   return Builder.CreateStore(ValueOperand, NewPointer);
240 }
241 
242 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
243                                      ValueMapT &GlobalMap,
244                                      LoopToScevMapT &LTS) {
245   // Terminator instructions control the control flow. They are explicitly
246   // expressed in the clast and do not need to be copied.
247   if (Inst->isTerminator())
248     return;
249 
250   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
251                     &Statement.getParent()->getRegion()))
252     return;
253 
254   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
255     Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS);
256     // Compute NewLoad before its insertion in BBMap to make the insertion
257     // deterministic.
258     BBMap[Load] = NewLoad;
259     return;
260   }
261 
262   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
263     Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS);
264     // Compute NewStore before its insertion in BBMap to make the insertion
265     // deterministic.
266     BBMap[Store] = NewStore;
267     return;
268   }
269 
270   copyInstScalar(Inst, BBMap, GlobalMap, LTS);
271 }
272 
273 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
274   BasicBlock *BB = Statement.getBasicBlock();
275   BasicBlock *CopyBB =
276       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
277   CopyBB->setName("polly.stmt." + BB->getName());
278   Builder.SetInsertPoint(CopyBB->begin());
279 
280   ValueMapT BBMap;
281 
282   for (Instruction &Inst : *BB)
283     copyInstruction(&Inst, BBMap, GlobalMap, LTS);
284 }
285 
286 VectorBlockGenerator::VectorBlockGenerator(
287     PollyIRBuilder &B, VectorValueMapT &GlobalMaps,
288     std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
289     __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE)
290     : BlockGenerator(B, Stmt, P, LI, SE, nullptr, nullptr),
291       GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) {
292   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
293   assert(Schedule && "No statement domain provided");
294 }
295 
296 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
297                                             ValueMapT &VectorMap,
298                                             VectorValueMapT &ScalarMaps,
299                                             Loop *L) {
300   if (Value *NewValue = VectorMap.lookup(Old))
301     return NewValue;
302 
303   int Width = getVectorWidth();
304 
305   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
306 
307   for (int Lane = 0; Lane < Width; Lane++)
308     Vector = Builder.CreateInsertElement(
309         Vector,
310         getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L),
311         Builder.getInt32(Lane));
312 
313   VectorMap[Old] = Vector;
314 
315   return Vector;
316 }
317 
318 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
319   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
320   assert(PointerTy && "PointerType expected");
321 
322   Type *ScalarType = PointerTy->getElementType();
323   VectorType *VectorType = VectorType::get(ScalarType, Width);
324 
325   return PointerType::getUnqual(VectorType);
326 }
327 
328 Value *
329 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
330                                             VectorValueMapT &ScalarMaps,
331                                             bool NegativeStride = false) {
332   unsigned VectorWidth = getVectorWidth();
333   const Value *Pointer = Load->getPointerOperand();
334   Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
335   unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
336 
337   Value *NewPointer = nullptr;
338   NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset],
339                            VLTS[Offset], getLoopForInst(Load));
340   Value *VectorPtr =
341       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
342   LoadInst *VecLoad =
343       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
344   if (!Aligned)
345     VecLoad->setAlignment(8);
346 
347   if (NegativeStride) {
348     SmallVector<Constant *, 16> Indices;
349     for (int i = VectorWidth - 1; i >= 0; i--)
350       Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
351     Constant *SV = llvm::ConstantVector::get(Indices);
352     Value *RevVecLoad = Builder.CreateShuffleVector(
353         VecLoad, VecLoad, SV, Load->getName() + "_reverse");
354     return RevVecLoad;
355   }
356 
357   return VecLoad;
358 }
359 
360 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
361                                                     ValueMapT &BBMap) {
362   const Value *Pointer = Load->getPointerOperand();
363   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
364   Value *NewPointer =
365       getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
366   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
367                                            Load->getName() + "_p_vec_p");
368   LoadInst *ScalarLoad =
369       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
370 
371   if (!Aligned)
372     ScalarLoad->setAlignment(8);
373 
374   Constant *SplatVector = Constant::getNullValue(
375       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
376 
377   Value *VectorLoad = Builder.CreateShuffleVector(
378       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
379   return VectorLoad;
380 }
381 
382 Value *
383 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
384                                                 VectorValueMapT &ScalarMaps) {
385   int VectorWidth = getVectorWidth();
386   const Value *Pointer = Load->getPointerOperand();
387   VectorType *VectorType = VectorType::get(
388       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
389 
390   Value *Vector = UndefValue::get(VectorType);
391 
392   for (int i = 0; i < VectorWidth; i++) {
393     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
394                                     VLTS[i], getLoopForInst(Load));
395     Value *ScalarLoad =
396         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
397     Vector = Builder.CreateInsertElement(
398         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
399   }
400 
401   return Vector;
402 }
403 
404 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
405                                         ValueMapT &VectorMap,
406                                         VectorValueMapT &ScalarMaps) {
407   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
408       !VectorType::isValidElementType(Load->getType())) {
409     for (int i = 0; i < getVectorWidth(); i++)
410       ScalarMaps[i][Load] =
411           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
412     return;
413   }
414 
415   const MemoryAccess &Access = Statement.getAccessFor(Load);
416 
417   // Make sure we have scalar values available to access the pointer to
418   // the data location.
419   extractScalarValues(Load, VectorMap, ScalarMaps);
420 
421   Value *NewLoad;
422   if (Access.isStrideZero(isl_map_copy(Schedule)))
423     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
424   else if (Access.isStrideOne(isl_map_copy(Schedule)))
425     NewLoad = generateStrideOneLoad(Load, ScalarMaps);
426   else if (Access.isStrideX(isl_map_copy(Schedule), -1))
427     NewLoad = generateStrideOneLoad(Load, ScalarMaps, true);
428   else
429     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
430 
431   VectorMap[Load] = NewLoad;
432 }
433 
434 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
435                                          ValueMapT &VectorMap,
436                                          VectorValueMapT &ScalarMaps) {
437   int VectorWidth = getVectorWidth();
438   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps,
439                                      getLoopForInst(Inst));
440 
441   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
442 
443   const CastInst *Cast = dyn_cast<CastInst>(Inst);
444   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
445   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
446 }
447 
448 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
449                                           ValueMapT &VectorMap,
450                                           VectorValueMapT &ScalarMaps) {
451   Loop *L = getLoopForInst(Inst);
452   Value *OpZero = Inst->getOperand(0);
453   Value *OpOne = Inst->getOperand(1);
454 
455   Value *NewOpZero, *NewOpOne;
456   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L);
457   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L);
458 
459   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
460                                        Inst->getName() + "p_vec");
461   VectorMap[Inst] = NewInst;
462 }
463 
464 void VectorBlockGenerator::copyStore(const StoreInst *Store,
465                                      ValueMapT &VectorMap,
466                                      VectorValueMapT &ScalarMaps) {
467   int VectorWidth = getVectorWidth();
468 
469   const MemoryAccess &Access = Statement.getAccessFor(Store);
470 
471   const Value *Pointer = Store->getPointerOperand();
472   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
473                                  ScalarMaps, getLoopForInst(Store));
474 
475   // Make sure we have scalar values available to access the pointer to
476   // the data location.
477   extractScalarValues(Store, VectorMap, ScalarMaps);
478 
479   if (Access.isStrideOne(isl_map_copy(Schedule))) {
480     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
481     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0],
482                                     VLTS[0], getLoopForInst(Store));
483 
484     Value *VectorPtr =
485         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
486     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
487 
488     if (!Aligned)
489       Store->setAlignment(8);
490   } else {
491     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
492       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
493       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
494                                       VLTS[i], getLoopForInst(Store));
495       Builder.CreateStore(Scalar, NewPointer);
496     }
497   }
498 }
499 
500 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
501                                              ValueMapT &VectorMap) {
502   for (Value *Operand : Inst->operands())
503     if (VectorMap.count(Operand))
504       return true;
505   return false;
506 }
507 
508 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
509                                                ValueMapT &VectorMap,
510                                                VectorValueMapT &ScalarMaps) {
511   bool HasVectorOperand = false;
512   int VectorWidth = getVectorWidth();
513 
514   for (Value *Operand : Inst->operands()) {
515     ValueMapT::iterator VecOp = VectorMap.find(Operand);
516 
517     if (VecOp == VectorMap.end())
518       continue;
519 
520     HasVectorOperand = true;
521     Value *NewVector = VecOp->second;
522 
523     for (int i = 0; i < VectorWidth; ++i) {
524       ValueMapT &SM = ScalarMaps[i];
525 
526       // If there is one scalar extracted, all scalar elements should have
527       // already been extracted by the code here. So no need to check for the
528       // existance of all of them.
529       if (SM.count(Operand))
530         break;
531 
532       SM[Operand] =
533           Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
534     }
535   }
536 
537   return HasVectorOperand;
538 }
539 
540 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
541                                               ValueMapT &VectorMap,
542                                               VectorValueMapT &ScalarMaps) {
543   bool HasVectorOperand;
544   int VectorWidth = getVectorWidth();
545 
546   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
547 
548   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
549     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane],
550                    VLTS[VectorLane]);
551 
552   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
553     return;
554 
555   // Make the result available as vector value.
556   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
557   Value *Vector = UndefValue::get(VectorType);
558 
559   for (int i = 0; i < VectorWidth; i++)
560     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
561                                          Builder.getInt32(i));
562 
563   VectorMap[Inst] = Vector;
564 }
565 
566 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
567 
568 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
569                                            ValueMapT &VectorMap,
570                                            VectorValueMapT &ScalarMaps) {
571   // Terminator instructions control the control flow. They are explicitly
572   // expressed in the clast and do not need to be copied.
573   if (Inst->isTerminator())
574     return;
575 
576   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
577                     &Statement.getParent()->getRegion()))
578     return;
579 
580   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
581     generateLoad(Load, VectorMap, ScalarMaps);
582     return;
583   }
584 
585   if (hasVectorOperands(Inst, VectorMap)) {
586     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
587       copyStore(Store, VectorMap, ScalarMaps);
588       return;
589     }
590 
591     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
592       copyUnaryInst(Unary, VectorMap, ScalarMaps);
593       return;
594     }
595 
596     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
597       copyBinaryInst(Binary, VectorMap, ScalarMaps);
598       return;
599     }
600 
601     // Falltrough: We generate scalar instructions, if we don't know how to
602     // generate vector code.
603   }
604 
605   copyInstScalarized(Inst, VectorMap, ScalarMaps);
606 }
607 
608 void VectorBlockGenerator::copyBB() {
609   BasicBlock *BB = Statement.getBasicBlock();
610   BasicBlock *CopyBB =
611       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
612   CopyBB->setName("polly.stmt." + BB->getName());
613   Builder.SetInsertPoint(CopyBB->begin());
614 
615   // Create two maps that store the mapping from the original instructions of
616   // the old basic block to their copies in the new basic block. Those maps
617   // are basic block local.
618   //
619   // As vector code generation is supported there is one map for scalar values
620   // and one for vector values.
621   //
622   // In case we just do scalar code generation, the vectorMap is not used and
623   // the scalarMap has just one dimension, which contains the mapping.
624   //
625   // In case vector code generation is done, an instruction may either appear
626   // in the vector map once (as it is calculating >vectorwidth< values at a
627   // time. Or (if the values are calculated using scalar operations), it
628   // appears once in every dimension of the scalarMap.
629   VectorValueMapT ScalarBlockMap(getVectorWidth());
630   ValueMapT VectorBlockMap;
631 
632   for (Instruction &Inst : *BB)
633     copyInstruction(&Inst, VectorBlockMap, ScalarBlockMap);
634 }
635