1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "isl/aff.h"
18 #include "isl/ast.h"
19 #include "isl/ast_build.h"
20 #include "isl/set.h"
21 #include "polly/CodeGen/BlockGenerators.h"
22 #include "polly/CodeGen/CodeGeneration.h"
23 #include "polly/CodeGen/IslExprBuilder.h"
24 #include "polly/Options.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/SCEVValidator.h"
27 #include "polly/Support/ScopHelper.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/ScalarEvolution.h"
30 #include "llvm/Analysis/ScalarEvolutionExpander.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
33 
34 using namespace llvm;
35 using namespace polly;
36 
37 static cl::opt<bool>
38     Aligned("enable-polly-aligned",
39             cl::desc("Assumed aligned memory accesses."), cl::Hidden,
40             cl::value_desc("OpenMP code generation enabled if true"),
41             cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
42 
43 static cl::opt<bool, true>
44     SCEVCodegenF("polly-codegen-scev",
45                  cl::desc("Use SCEV based code generation."), cl::Hidden,
46                  cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore,
47                  cl::cat(PollyCategory));
48 
49 bool polly::SCEVCodegen;
50 
51 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI,
52                           ScalarEvolution *SE, const Region *R) {
53   if (SCEVCodegen) {
54     if (!I || !SE->isSCEVable(I->getType()))
55       return false;
56 
57     if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I)))
58       if (!isa<SCEVCouldNotCompute>(Scev))
59         if (!hasScalarDepsInsideRegion(Scev, R))
60           return true;
61 
62     return false;
63   }
64 
65   Loop *L = LI->getLoopFor(I->getParent());
66   return L && I == L->getCanonicalInductionVariable() && R->contains(L);
67 }
68 
69 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P,
70                                LoopInfo &LI, ScalarEvolution &SE,
71                                isl_ast_build *Build,
72                                IslExprBuilder *ExprBuilder)
73     : Builder(B), Statement(Stmt), P(P), LI(LI), SE(SE), Build(Build),
74       ExprBuilder(ExprBuilder) {}
75 
76 Value *BlockGenerator::lookupAvailableValue(const Value *Old, ValueMapT &BBMap,
77                                             ValueMapT &GlobalMap) const {
78   // We assume constants never change.
79   // This avoids map lookups for many calls to this function.
80   if (isa<Constant>(Old))
81     return const_cast<Value *>(Old);
82 
83   if (Value *New = GlobalMap.lookup(Old)) {
84     if (Old->getType()->getScalarSizeInBits() <
85         New->getType()->getScalarSizeInBits())
86       New = Builder.CreateTruncOrBitCast(New, Old->getType());
87 
88     return New;
89   }
90 
91   // Or it is probably a scop-constant value defined as global, function
92   // parameter or an instruction not within the scop.
93   if (isa<GlobalValue>(Old) || isa<Argument>(Old))
94     return const_cast<Value *>(Old);
95 
96   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
97     if (!Statement.getParent()->getRegion().contains(Inst->getParent()))
98       return const_cast<Value *>(Old);
99 
100   if (Value *New = BBMap.lookup(Old))
101     return New;
102 
103   return nullptr;
104 }
105 
106 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
107                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS,
108                                    Loop *L) {
109   if (Value *New = lookupAvailableValue(Old, BBMap, GlobalMap))
110     return New;
111 
112   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
113     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
114       if (!isa<SCEVCouldNotCompute>(Scev)) {
115         const SCEV *NewScev = apply(Scev, LTS, SE);
116         ValueToValueMap VTV;
117         VTV.insert(BBMap.begin(), BBMap.end());
118         VTV.insert(GlobalMap.begin(), GlobalMap.end());
119         NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV);
120         SCEVExpander Expander(SE, "polly");
121         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
122                                                  Builder.GetInsertPoint());
123 
124         BBMap[Old] = Expanded;
125         return Expanded;
126       }
127     }
128 
129   // Now the scalar dependence is neither available nor SCEVCodegenable, this
130   // should never happen in the current code generator.
131   llvm_unreachable("Unexpected scalar dependence in region!");
132   return nullptr;
133 }
134 
135 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
136                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
137   // We do not generate debug intrinsics as we did not investigate how to
138   // copy them correctly. At the current state, they just crash the code
139   // generation as the meta-data operands are not correctly copied.
140   if (isa<DbgInfoIntrinsic>(Inst))
141     return;
142 
143   Instruction *NewInst = Inst->clone();
144 
145   // Replace old operands with the new ones.
146   for (Value *OldOperand : Inst->operands()) {
147     Value *NewOperand =
148         getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
149 
150     if (!NewOperand) {
151       assert(!isa<StoreInst>(NewInst) &&
152              "Store instructions are always needed!");
153       delete NewInst;
154       return;
155     }
156 
157     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
158   }
159 
160   Builder.Insert(NewInst);
161   BBMap[Inst] = NewInst;
162 
163   if (!NewInst->getType()->isVoidTy())
164     NewInst->setName("p_" + Inst->getName());
165 }
166 
167 Value *BlockGenerator::getNewAccessOperand(const MemoryAccess &MA) {
168   isl_pw_multi_aff *PWSchedule, *PWAccRel;
169   isl_union_map *ScheduleU;
170   isl_map *Schedule, *AccRel;
171   isl_ast_expr *Expr;
172 
173   assert(ExprBuilder && Build &&
174          "Cannot generate new value without IslExprBuilder!");
175 
176   AccRel = MA.getNewAccessRelation();
177   assert(AccRel && "We generate new code only for new access relations!");
178 
179   ScheduleU = isl_ast_build_get_schedule(Build);
180   ScheduleU = isl_union_map_intersect_domain(
181       ScheduleU, isl_union_set_from_set(MA.getStatement()->getDomain()));
182   Schedule = isl_map_from_union_map(ScheduleU);
183 
184   PWSchedule = isl_pw_multi_aff_from_map(isl_map_reverse(Schedule));
185   PWAccRel = isl_pw_multi_aff_from_map(AccRel);
186   PWAccRel = isl_pw_multi_aff_pullback_pw_multi_aff(PWAccRel, PWSchedule);
187 
188   Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
189   Expr = isl_ast_expr_address_of(Expr);
190 
191   return ExprBuilder->create(Expr);
192 }
193 
194 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
195                                                 const Value *Pointer,
196                                                 ValueMapT &BBMap,
197                                                 ValueMapT &GlobalMap,
198                                                 LoopToScevMapT &LTS) {
199   const MemoryAccess &MA = Statement.getAccessFor(Inst);
200   isl_map *NewAccRel = MA.getNewAccessRelation();
201 
202   Value *NewPointer;
203   if (NewAccRel)
204     NewPointer = getNewAccessOperand(MA);
205   else
206     NewPointer =
207         getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
208 
209   isl_map_free(NewAccRel);
210   return NewPointer;
211 }
212 
213 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
214   return LI.getLoopFor(Inst->getParent());
215 }
216 
217 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
218                                           ValueMapT &BBMap,
219                                           ValueMapT &GlobalMap,
220                                           LoopToScevMapT &LTS) {
221   const Value *Pointer = Load->getPointerOperand();
222   const Instruction *Inst = dyn_cast<Instruction>(Load);
223   Value *NewPointer =
224       generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap, LTS);
225   Value *ScalarLoad = Builder.CreateAlignedLoad(
226       NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
227   return ScalarLoad;
228 }
229 
230 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
231                                            ValueMapT &BBMap,
232                                            ValueMapT &GlobalMap,
233                                            LoopToScevMapT &LTS) {
234   const Value *Pointer = Store->getPointerOperand();
235   Value *NewPointer =
236       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS);
237   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap,
238                                     LTS, getLoopForInst(Store));
239 
240   Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer,
241                                                Store->getAlignment());
242   return NewStore;
243 }
244 
245 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
246                                      ValueMapT &GlobalMap,
247                                      LoopToScevMapT &LTS) {
248   // Terminator instructions control the control flow. They are explicitly
249   // expressed in the clast and do not need to be copied.
250   if (Inst->isTerminator())
251     return;
252 
253   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
254                     &Statement.getParent()->getRegion()))
255     return;
256 
257   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
258     Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS);
259     // Compute NewLoad before its insertion in BBMap to make the insertion
260     // deterministic.
261     BBMap[Load] = NewLoad;
262     return;
263   }
264 
265   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
266     Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS);
267     // Compute NewStore before its insertion in BBMap to make the insertion
268     // deterministic.
269     BBMap[Store] = NewStore;
270     return;
271   }
272 
273   copyInstScalar(Inst, BBMap, GlobalMap, LTS);
274 }
275 
276 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
277   BasicBlock *BB = Statement.getBasicBlock();
278   BasicBlock *CopyBB =
279       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
280   CopyBB->setName("polly.stmt." + BB->getName());
281   Builder.SetInsertPoint(CopyBB->begin());
282 
283   ValueMapT BBMap;
284 
285   for (Instruction &Inst : *BB)
286     copyInstruction(&Inst, BBMap, GlobalMap, LTS);
287 }
288 
289 VectorBlockGenerator::VectorBlockGenerator(
290     PollyIRBuilder &B, VectorValueMapT &GlobalMaps,
291     std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
292     __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE)
293     : BlockGenerator(B, Stmt, P, LI, SE, nullptr, nullptr),
294       GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) {
295   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
296   assert(Schedule && "No statement domain provided");
297 }
298 
299 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
300                                             ValueMapT &VectorMap,
301                                             VectorValueMapT &ScalarMaps,
302                                             Loop *L) {
303   if (Value *NewValue = VectorMap.lookup(Old))
304     return NewValue;
305 
306   int Width = getVectorWidth();
307 
308   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
309 
310   for (int Lane = 0; Lane < Width; Lane++)
311     Vector = Builder.CreateInsertElement(
312         Vector,
313         getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L),
314         Builder.getInt32(Lane));
315 
316   VectorMap[Old] = Vector;
317 
318   return Vector;
319 }
320 
321 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
322   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
323   assert(PointerTy && "PointerType expected");
324 
325   Type *ScalarType = PointerTy->getElementType();
326   VectorType *VectorType = VectorType::get(ScalarType, Width);
327 
328   return PointerType::getUnqual(VectorType);
329 }
330 
331 Value *
332 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
333                                             VectorValueMapT &ScalarMaps,
334                                             bool NegativeStride = false) {
335   unsigned VectorWidth = getVectorWidth();
336   const Value *Pointer = Load->getPointerOperand();
337   Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
338   unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
339 
340   Value *NewPointer = nullptr;
341   NewPointer = getNewValue(Pointer, ScalarMaps[Offset], GlobalMaps[Offset],
342                            VLTS[Offset], getLoopForInst(Load));
343   Value *VectorPtr =
344       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
345   LoadInst *VecLoad =
346       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
347   if (!Aligned)
348     VecLoad->setAlignment(8);
349 
350   if (NegativeStride) {
351     SmallVector<Constant *, 16> Indices;
352     for (int i = VectorWidth - 1; i >= 0; i--)
353       Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
354     Constant *SV = llvm::ConstantVector::get(Indices);
355     Value *RevVecLoad = Builder.CreateShuffleVector(
356         VecLoad, VecLoad, SV, Load->getName() + "_reverse");
357     return RevVecLoad;
358   }
359 
360   return VecLoad;
361 }
362 
363 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
364                                                     ValueMapT &BBMap) {
365   const Value *Pointer = Load->getPointerOperand();
366   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
367   Value *NewPointer =
368       getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load));
369   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
370                                            Load->getName() + "_p_vec_p");
371   LoadInst *ScalarLoad =
372       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
373 
374   if (!Aligned)
375     ScalarLoad->setAlignment(8);
376 
377   Constant *SplatVector = Constant::getNullValue(
378       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
379 
380   Value *VectorLoad = Builder.CreateShuffleVector(
381       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
382   return VectorLoad;
383 }
384 
385 Value *
386 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
387                                                 VectorValueMapT &ScalarMaps) {
388   int VectorWidth = getVectorWidth();
389   const Value *Pointer = Load->getPointerOperand();
390   VectorType *VectorType = VectorType::get(
391       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
392 
393   Value *Vector = UndefValue::get(VectorType);
394 
395   for (int i = 0; i < VectorWidth; i++) {
396     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
397                                     VLTS[i], getLoopForInst(Load));
398     Value *ScalarLoad =
399         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
400     Vector = Builder.CreateInsertElement(
401         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
402   }
403 
404   return Vector;
405 }
406 
407 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
408                                         ValueMapT &VectorMap,
409                                         VectorValueMapT &ScalarMaps) {
410   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
411       !VectorType::isValidElementType(Load->getType())) {
412     for (int i = 0; i < getVectorWidth(); i++)
413       ScalarMaps[i][Load] =
414           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
415     return;
416   }
417 
418   const MemoryAccess &Access = Statement.getAccessFor(Load);
419 
420   // Make sure we have scalar values available to access the pointer to
421   // the data location.
422   extractScalarValues(Load, VectorMap, ScalarMaps);
423 
424   Value *NewLoad;
425   if (Access.isStrideZero(isl_map_copy(Schedule)))
426     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
427   else if (Access.isStrideOne(isl_map_copy(Schedule)))
428     NewLoad = generateStrideOneLoad(Load, ScalarMaps);
429   else if (Access.isStrideX(isl_map_copy(Schedule), -1))
430     NewLoad = generateStrideOneLoad(Load, ScalarMaps, true);
431   else
432     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
433 
434   VectorMap[Load] = NewLoad;
435 }
436 
437 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
438                                          ValueMapT &VectorMap,
439                                          VectorValueMapT &ScalarMaps) {
440   int VectorWidth = getVectorWidth();
441   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps,
442                                      getLoopForInst(Inst));
443 
444   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
445 
446   const CastInst *Cast = dyn_cast<CastInst>(Inst);
447   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
448   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
449 }
450 
451 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
452                                           ValueMapT &VectorMap,
453                                           VectorValueMapT &ScalarMaps) {
454   Loop *L = getLoopForInst(Inst);
455   Value *OpZero = Inst->getOperand(0);
456   Value *OpOne = Inst->getOperand(1);
457 
458   Value *NewOpZero, *NewOpOne;
459   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L);
460   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L);
461 
462   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
463                                        Inst->getName() + "p_vec");
464   VectorMap[Inst] = NewInst;
465 }
466 
467 void VectorBlockGenerator::copyStore(const StoreInst *Store,
468                                      ValueMapT &VectorMap,
469                                      VectorValueMapT &ScalarMaps) {
470   int VectorWidth = getVectorWidth();
471 
472   const MemoryAccess &Access = Statement.getAccessFor(Store);
473 
474   const Value *Pointer = Store->getPointerOperand();
475   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
476                                  ScalarMaps, getLoopForInst(Store));
477 
478   // Make sure we have scalar values available to access the pointer to
479   // the data location.
480   extractScalarValues(Store, VectorMap, ScalarMaps);
481 
482   if (Access.isStrideOne(isl_map_copy(Schedule))) {
483     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
484     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0],
485                                     VLTS[0], getLoopForInst(Store));
486 
487     Value *VectorPtr =
488         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
489     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
490 
491     if (!Aligned)
492       Store->setAlignment(8);
493   } else {
494     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
495       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
496       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
497                                       VLTS[i], getLoopForInst(Store));
498       Builder.CreateStore(Scalar, NewPointer);
499     }
500   }
501 }
502 
503 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
504                                              ValueMapT &VectorMap) {
505   for (Value *Operand : Inst->operands())
506     if (VectorMap.count(Operand))
507       return true;
508   return false;
509 }
510 
511 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
512                                                ValueMapT &VectorMap,
513                                                VectorValueMapT &ScalarMaps) {
514   bool HasVectorOperand = false;
515   int VectorWidth = getVectorWidth();
516 
517   for (Value *Operand : Inst->operands()) {
518     ValueMapT::iterator VecOp = VectorMap.find(Operand);
519 
520     if (VecOp == VectorMap.end())
521       continue;
522 
523     HasVectorOperand = true;
524     Value *NewVector = VecOp->second;
525 
526     for (int i = 0; i < VectorWidth; ++i) {
527       ValueMapT &SM = ScalarMaps[i];
528 
529       // If there is one scalar extracted, all scalar elements should have
530       // already been extracted by the code here. So no need to check for the
531       // existance of all of them.
532       if (SM.count(Operand))
533         break;
534 
535       SM[Operand] =
536           Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
537     }
538   }
539 
540   return HasVectorOperand;
541 }
542 
543 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
544                                               ValueMapT &VectorMap,
545                                               VectorValueMapT &ScalarMaps) {
546   bool HasVectorOperand;
547   int VectorWidth = getVectorWidth();
548 
549   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
550 
551   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
552     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane],
553                    VLTS[VectorLane]);
554 
555   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
556     return;
557 
558   // Make the result available as vector value.
559   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
560   Value *Vector = UndefValue::get(VectorType);
561 
562   for (int i = 0; i < VectorWidth; i++)
563     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
564                                          Builder.getInt32(i));
565 
566   VectorMap[Inst] = Vector;
567 }
568 
569 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
570 
571 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
572                                            ValueMapT &VectorMap,
573                                            VectorValueMapT &ScalarMaps) {
574   // Terminator instructions control the control flow. They are explicitly
575   // expressed in the clast and do not need to be copied.
576   if (Inst->isTerminator())
577     return;
578 
579   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
580                     &Statement.getParent()->getRegion()))
581     return;
582 
583   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
584     generateLoad(Load, VectorMap, ScalarMaps);
585     return;
586   }
587 
588   if (hasVectorOperands(Inst, VectorMap)) {
589     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
590       copyStore(Store, VectorMap, ScalarMaps);
591       return;
592     }
593 
594     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
595       copyUnaryInst(Unary, VectorMap, ScalarMaps);
596       return;
597     }
598 
599     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
600       copyBinaryInst(Binary, VectorMap, ScalarMaps);
601       return;
602     }
603 
604     // Falltrough: We generate scalar instructions, if we don't know how to
605     // generate vector code.
606   }
607 
608   copyInstScalarized(Inst, VectorMap, ScalarMaps);
609 }
610 
611 void VectorBlockGenerator::copyBB() {
612   BasicBlock *BB = Statement.getBasicBlock();
613   BasicBlock *CopyBB =
614       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
615   CopyBB->setName("polly.stmt." + BB->getName());
616   Builder.SetInsertPoint(CopyBB->begin());
617 
618   // Create two maps that store the mapping from the original instructions of
619   // the old basic block to their copies in the new basic block. Those maps
620   // are basic block local.
621   //
622   // As vector code generation is supported there is one map for scalar values
623   // and one for vector values.
624   //
625   // In case we just do scalar code generation, the vectorMap is not used and
626   // the scalarMap has just one dimension, which contains the mapping.
627   //
628   // In case vector code generation is done, an instruction may either appear
629   // in the vector map once (as it is calculating >vectorwidth< values at a
630   // time. Or (if the values are calculated using scalar operations), it
631   // appears once in every dimension of the scalarMap.
632   VectorValueMapT ScalarBlockMap(getVectorWidth());
633   ValueMapT VectorBlockMap;
634 
635   for (Instruction &Inst : *BB)
636     copyInstruction(&Inst, VectorBlockMap, ScalarBlockMap);
637 }
638