1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "isl/aff.h"
18 #include "isl/ast.h"
19 #include "isl/ast_build.h"
20 #include "isl/set.h"
21 #include "polly/CodeGen/BlockGenerators.h"
22 #include "polly/CodeGen/CodeGeneration.h"
23 #include "polly/CodeGen/IslExprBuilder.h"
24 #include "polly/Options.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/SCEVValidator.h"
27 #include "polly/Support/ScopHelper.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/ScalarEvolution.h"
30 #include "llvm/Analysis/ScalarEvolutionExpander.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
33 
34 using namespace llvm;
35 using namespace polly;
36 
37 static cl::opt<bool> Aligned("enable-polly-aligned",
38                              cl::desc("Assumed aligned memory accesses."),
39                              cl::Hidden, cl::init(false), cl::ZeroOrMore,
40                              cl::cat(PollyCategory));
41 
42 static cl::opt<bool, true>
43     SCEVCodegenF("polly-codegen-scev",
44                  cl::desc("Use SCEV based code generation."), cl::Hidden,
45                  cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore,
46                  cl::cat(PollyCategory));
47 
48 bool polly::SCEVCodegen;
49 
50 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI,
51                           ScalarEvolution *SE, const Region *R) {
52   if (SCEVCodegen) {
53     if (!I || !SE->isSCEVable(I->getType()))
54       return false;
55 
56     if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I)))
57       if (!isa<SCEVCouldNotCompute>(Scev))
58         if (!hasScalarDepsInsideRegion(Scev, R))
59           return true;
60 
61     return false;
62   }
63 
64   Loop *L = LI->getLoopFor(I->getParent());
65   return L && I == L->getCanonicalInductionVariable() && R->contains(L);
66 }
67 
68 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P,
69                                LoopInfo &LI, ScalarEvolution &SE,
70                                isl_ast_build *Build,
71                                IslExprBuilder *ExprBuilder)
72     : Builder(B), Statement(Stmt), P(P), LI(LI), SE(SE), Build(Build),
73       ExprBuilder(ExprBuilder) {}
74 
75 Value *BlockGenerator::lookupAvailableValue(const Value *Old, ValueMapT &BBMap,
76                                             ValueMapT &GlobalMap) const {
77   // We assume constants never change.
78   // This avoids map lookups for many calls to this function.
79   if (isa<Constant>(Old))
80     return const_cast<Value *>(Old);
81 
82   if (Value *New = GlobalMap.lookup(Old)) {
83     if (Old->getType()->getScalarSizeInBits() <
84         New->getType()->getScalarSizeInBits())
85       New = Builder.CreateTruncOrBitCast(New, Old->getType());
86 
87     return New;
88   }
89 
90   // Or it is probably a scop-constant value defined as global, function
91   // parameter or an instruction not within the scop.
92   if (isa<GlobalValue>(Old) || isa<Argument>(Old))
93     return const_cast<Value *>(Old);
94 
95   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
96     if (!Statement.getParent()->getRegion().contains(Inst->getParent()))
97       return const_cast<Value *>(Old);
98 
99   if (Value *New = BBMap.lookup(Old))
100     return New;
101 
102   return nullptr;
103 }
104 
105 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
106                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS,
107                                    Loop *L) {
108   if (Value *New = lookupAvailableValue(Old, BBMap, GlobalMap))
109     return New;
110 
111   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
112     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
113       if (!isa<SCEVCouldNotCompute>(Scev)) {
114         const SCEV *NewScev = apply(Scev, LTS, SE);
115         ValueToValueMap VTV;
116         VTV.insert(BBMap.begin(), BBMap.end());
117         VTV.insert(GlobalMap.begin(), GlobalMap.end());
118         NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV);
119         SCEVExpander Expander(SE, "polly");
120         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
121                                                  Builder.GetInsertPoint());
122 
123         BBMap[Old] = Expanded;
124         return Expanded;
125       }
126     }
127 
128   // Now the scalar dependence is neither available nor SCEVCodegenable, this
129   // should never happen in the current code generator.
130   llvm_unreachable("Unexpected scalar dependence in region!");
131   return nullptr;
132 }
133 
134 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
135                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
136   // We do not generate debug intrinsics as we did not investigate how to
137   // copy them correctly. At the current state, they just crash the code
138   // generation as the meta-data operands are not correctly copied.
139   if (isa<DbgInfoIntrinsic>(Inst))
140     return;
141 
142   Instruction *NewInst = Inst->clone();
143 
144   // Replace old operands with the new ones.
145   for (Value *OldOperand : Inst->operands()) {
146     Value *NewOperand =
147         getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
148 
149     if (!NewOperand) {
150       assert(!isa<StoreInst>(NewInst) &&
151              "Store instructions are always needed!");
152       delete NewInst;
153       return;
154     }
155 
156     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
157   }
158 
159   Builder.Insert(NewInst);
160   BBMap[Inst] = NewInst;
161 
162   if (!NewInst->getType()->isVoidTy())
163     NewInst->setName("p_" + Inst->getName());
164 }
165 
166 Value *BlockGenerator::getNewAccessOperand(const MemoryAccess &MA) {
167   isl_pw_multi_aff *PWAccRel;
168   isl_union_map *Schedule;
169   isl_ast_expr *Expr;
170 
171   assert(ExprBuilder && Build &&
172          "Cannot generate new value without IslExprBuilder!");
173 
174   Schedule = isl_ast_build_get_schedule(Build);
175   PWAccRel = MA.applyScheduleToAccessRelation(Schedule);
176 
177   Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
178   Expr = isl_ast_expr_address_of(Expr);
179 
180   return ExprBuilder->create(Expr);
181 }
182 
183 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
184                                                 const Value *Pointer,
185                                                 ValueMapT &BBMap,
186                                                 ValueMapT &GlobalMap,
187                                                 LoopToScevMapT &LTS) {
188   const MemoryAccess &MA = Statement.getAccessFor(Inst);
189 
190   Value *NewPointer;
191   if (MA.hasNewAccessRelation())
192     NewPointer = getNewAccessOperand(MA);
193   else
194     NewPointer =
195         getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
196 
197   return NewPointer;
198 }
199 
200 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
201   return LI.getLoopFor(Inst->getParent());
202 }
203 
204 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
205                                           ValueMapT &BBMap,
206                                           ValueMapT &GlobalMap,
207                                           LoopToScevMapT &LTS) {
208   const Value *Pointer = Load->getPointerOperand();
209   Value *NewPointer =
210       generateLocationAccessed(Load, Pointer, BBMap, GlobalMap, LTS);
211   Value *ScalarLoad = Builder.CreateAlignedLoad(
212       NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
213   return ScalarLoad;
214 }
215 
216 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
217                                            ValueMapT &BBMap,
218                                            ValueMapT &GlobalMap,
219                                            LoopToScevMapT &LTS) {
220   const Value *Pointer = Store->getPointerOperand();
221   Value *NewPointer =
222       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS);
223   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap,
224                                     LTS, getLoopForInst(Store));
225 
226   Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer,
227                                                Store->getAlignment());
228   return NewStore;
229 }
230 
231 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
232                                      ValueMapT &GlobalMap,
233                                      LoopToScevMapT &LTS) {
234   // Terminator instructions control the control flow. They are explicitly
235   // expressed in the clast and do not need to be copied.
236   if (Inst->isTerminator())
237     return;
238 
239   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
240                     &Statement.getParent()->getRegion()))
241     return;
242 
243   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
244     Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS);
245     // Compute NewLoad before its insertion in BBMap to make the insertion
246     // deterministic.
247     BBMap[Load] = NewLoad;
248     return;
249   }
250 
251   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
252     Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS);
253     // Compute NewStore before its insertion in BBMap to make the insertion
254     // deterministic.
255     BBMap[Store] = NewStore;
256     return;
257   }
258 
259   copyInstScalar(Inst, BBMap, GlobalMap, LTS);
260 }
261 
262 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
263   BasicBlock *BB = Statement.getBasicBlock();
264   BasicBlock *CopyBB =
265       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
266   CopyBB->setName("polly.stmt." + BB->getName());
267   Builder.SetInsertPoint(CopyBB->begin());
268 
269   ValueMapT BBMap;
270 
271   for (Instruction &Inst : *BB)
272     copyInstruction(&Inst, BBMap, GlobalMap, LTS);
273 }
274 
275 VectorBlockGenerator::VectorBlockGenerator(
276     PollyIRBuilder &B, VectorValueMapT &GlobalMaps,
277     std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
278     __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE,
279     __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder)
280     : BlockGenerator(B, Stmt, P, LI, SE, Build, ExprBuilder),
281       GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) {
282   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
283   assert(Schedule && "No statement domain provided");
284 }
285 
286 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
287                                             ValueMapT &VectorMap,
288                                             VectorValueMapT &ScalarMaps,
289                                             Loop *L) {
290   if (Value *NewValue = VectorMap.lookup(Old))
291     return NewValue;
292 
293   int Width = getVectorWidth();
294 
295   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
296 
297   for (int Lane = 0; Lane < Width; Lane++)
298     Vector = Builder.CreateInsertElement(
299         Vector,
300         getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L),
301         Builder.getInt32(Lane));
302 
303   VectorMap[Old] = Vector;
304 
305   return Vector;
306 }
307 
308 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
309   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
310   assert(PointerTy && "PointerType expected");
311 
312   Type *ScalarType = PointerTy->getElementType();
313   VectorType *VectorType = VectorType::get(ScalarType, Width);
314 
315   return PointerType::getUnqual(VectorType);
316 }
317 
318 Value *
319 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
320                                             VectorValueMapT &ScalarMaps,
321                                             bool NegativeStride = false) {
322   unsigned VectorWidth = getVectorWidth();
323   const Value *Pointer = Load->getPointerOperand();
324   Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
325   unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
326 
327   Value *NewPointer = nullptr;
328   NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[Offset],
329                                         GlobalMaps[Offset], VLTS[Offset]);
330   Value *VectorPtr =
331       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
332   LoadInst *VecLoad =
333       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
334   if (!Aligned)
335     VecLoad->setAlignment(8);
336 
337   if (NegativeStride) {
338     SmallVector<Constant *, 16> Indices;
339     for (int i = VectorWidth - 1; i >= 0; i--)
340       Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
341     Constant *SV = llvm::ConstantVector::get(Indices);
342     Value *RevVecLoad = Builder.CreateShuffleVector(
343         VecLoad, VecLoad, SV, Load->getName() + "_reverse");
344     return RevVecLoad;
345   }
346 
347   return VecLoad;
348 }
349 
350 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
351                                                     ValueMapT &BBMap) {
352   const Value *Pointer = Load->getPointerOperand();
353   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
354   Value *NewPointer =
355       generateLocationAccessed(Load, Pointer, BBMap, GlobalMaps[0], VLTS[0]);
356   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
357                                            Load->getName() + "_p_vec_p");
358   LoadInst *ScalarLoad =
359       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
360 
361   if (!Aligned)
362     ScalarLoad->setAlignment(8);
363 
364   Constant *SplatVector = Constant::getNullValue(
365       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
366 
367   Value *VectorLoad = Builder.CreateShuffleVector(
368       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
369   return VectorLoad;
370 }
371 
372 Value *
373 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
374                                                 VectorValueMapT &ScalarMaps) {
375   int VectorWidth = getVectorWidth();
376   const Value *Pointer = Load->getPointerOperand();
377   VectorType *VectorType = VectorType::get(
378       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
379 
380   Value *Vector = UndefValue::get(VectorType);
381 
382   for (int i = 0; i < VectorWidth; i++) {
383     Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i],
384                                                  GlobalMaps[i], VLTS[i]);
385     Value *ScalarLoad =
386         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
387     Vector = Builder.CreateInsertElement(
388         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
389   }
390 
391   return Vector;
392 }
393 
394 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
395                                         ValueMapT &VectorMap,
396                                         VectorValueMapT &ScalarMaps) {
397   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
398       !VectorType::isValidElementType(Load->getType())) {
399     for (int i = 0; i < getVectorWidth(); i++)
400       ScalarMaps[i][Load] =
401           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
402     return;
403   }
404 
405   const MemoryAccess &Access = Statement.getAccessFor(Load);
406 
407   // Make sure we have scalar values available to access the pointer to
408   // the data location.
409   extractScalarValues(Load, VectorMap, ScalarMaps);
410 
411   Value *NewLoad;
412   if (Access.isStrideZero(isl_map_copy(Schedule)))
413     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
414   else if (Access.isStrideOne(isl_map_copy(Schedule)))
415     NewLoad = generateStrideOneLoad(Load, ScalarMaps);
416   else if (Access.isStrideX(isl_map_copy(Schedule), -1))
417     NewLoad = generateStrideOneLoad(Load, ScalarMaps, true);
418   else
419     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
420 
421   VectorMap[Load] = NewLoad;
422 }
423 
424 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
425                                          ValueMapT &VectorMap,
426                                          VectorValueMapT &ScalarMaps) {
427   int VectorWidth = getVectorWidth();
428   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps,
429                                      getLoopForInst(Inst));
430 
431   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
432 
433   const CastInst *Cast = dyn_cast<CastInst>(Inst);
434   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
435   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
436 }
437 
438 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
439                                           ValueMapT &VectorMap,
440                                           VectorValueMapT &ScalarMaps) {
441   Loop *L = getLoopForInst(Inst);
442   Value *OpZero = Inst->getOperand(0);
443   Value *OpOne = Inst->getOperand(1);
444 
445   Value *NewOpZero, *NewOpOne;
446   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L);
447   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L);
448 
449   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
450                                        Inst->getName() + "p_vec");
451   VectorMap[Inst] = NewInst;
452 }
453 
454 void VectorBlockGenerator::copyStore(const StoreInst *Store,
455                                      ValueMapT &VectorMap,
456                                      VectorValueMapT &ScalarMaps) {
457   const MemoryAccess &Access = Statement.getAccessFor(Store);
458 
459   const Value *Pointer = Store->getPointerOperand();
460   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
461                                  ScalarMaps, getLoopForInst(Store));
462 
463   // Make sure we have scalar values available to access the pointer to
464   // the data location.
465   extractScalarValues(Store, VectorMap, ScalarMaps);
466 
467   if (Access.isStrideOne(isl_map_copy(Schedule))) {
468     Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
469     Value *NewPointer = generateLocationAccessed(Store, Pointer, ScalarMaps[0],
470                                                  GlobalMaps[0], VLTS[0]);
471 
472     Value *VectorPtr =
473         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
474     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
475 
476     if (!Aligned)
477       Store->setAlignment(8);
478   } else {
479     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
480       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
481       Value *NewPointer = generateLocationAccessed(
482           Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
483       Builder.CreateStore(Scalar, NewPointer);
484     }
485   }
486 }
487 
488 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
489                                              ValueMapT &VectorMap) {
490   for (Value *Operand : Inst->operands())
491     if (VectorMap.count(Operand))
492       return true;
493   return false;
494 }
495 
496 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
497                                                ValueMapT &VectorMap,
498                                                VectorValueMapT &ScalarMaps) {
499   bool HasVectorOperand = false;
500   int VectorWidth = getVectorWidth();
501 
502   for (Value *Operand : Inst->operands()) {
503     ValueMapT::iterator VecOp = VectorMap.find(Operand);
504 
505     if (VecOp == VectorMap.end())
506       continue;
507 
508     HasVectorOperand = true;
509     Value *NewVector = VecOp->second;
510 
511     for (int i = 0; i < VectorWidth; ++i) {
512       ValueMapT &SM = ScalarMaps[i];
513 
514       // If there is one scalar extracted, all scalar elements should have
515       // already been extracted by the code here. So no need to check for the
516       // existance of all of them.
517       if (SM.count(Operand))
518         break;
519 
520       SM[Operand] =
521           Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
522     }
523   }
524 
525   return HasVectorOperand;
526 }
527 
528 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
529                                               ValueMapT &VectorMap,
530                                               VectorValueMapT &ScalarMaps) {
531   bool HasVectorOperand;
532   int VectorWidth = getVectorWidth();
533 
534   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
535 
536   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
537     BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane],
538                                     GlobalMaps[VectorLane], VLTS[VectorLane]);
539 
540   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
541     return;
542 
543   // Make the result available as vector value.
544   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
545   Value *Vector = UndefValue::get(VectorType);
546 
547   for (int i = 0; i < VectorWidth; i++)
548     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
549                                          Builder.getInt32(i));
550 
551   VectorMap[Inst] = Vector;
552 }
553 
554 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
555 
556 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
557                                            ValueMapT &VectorMap,
558                                            VectorValueMapT &ScalarMaps) {
559   // Terminator instructions control the control flow. They are explicitly
560   // expressed in the clast and do not need to be copied.
561   if (Inst->isTerminator())
562     return;
563 
564   if (canSynthesize(Inst, &P->getAnalysis<LoopInfo>(), &SE,
565                     &Statement.getParent()->getRegion()))
566     return;
567 
568   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
569     generateLoad(Load, VectorMap, ScalarMaps);
570     return;
571   }
572 
573   if (hasVectorOperands(Inst, VectorMap)) {
574     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
575       copyStore(Store, VectorMap, ScalarMaps);
576       return;
577     }
578 
579     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
580       copyUnaryInst(Unary, VectorMap, ScalarMaps);
581       return;
582     }
583 
584     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
585       copyBinaryInst(Binary, VectorMap, ScalarMaps);
586       return;
587     }
588 
589     // Falltrough: We generate scalar instructions, if we don't know how to
590     // generate vector code.
591   }
592 
593   copyInstScalarized(Inst, VectorMap, ScalarMaps);
594 }
595 
596 void VectorBlockGenerator::copyBB() {
597   BasicBlock *BB = Statement.getBasicBlock();
598   BasicBlock *CopyBB =
599       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
600   CopyBB->setName("polly.stmt." + BB->getName());
601   Builder.SetInsertPoint(CopyBB->begin());
602 
603   // Create two maps that store the mapping from the original instructions of
604   // the old basic block to their copies in the new basic block. Those maps
605   // are basic block local.
606   //
607   // As vector code generation is supported there is one map for scalar values
608   // and one for vector values.
609   //
610   // In case we just do scalar code generation, the vectorMap is not used and
611   // the scalarMap has just one dimension, which contains the mapping.
612   //
613   // In case vector code generation is done, an instruction may either appear
614   // in the vector map once (as it is calculating >vectorwidth< values at a
615   // time. Or (if the values are calculated using scalar operations), it
616   // appears once in every dimension of the scalarMap.
617   VectorValueMapT ScalarBlockMap(getVectorWidth());
618   ValueMapT VectorBlockMap;
619 
620   for (Instruction &Inst : *BB)
621     copyInstruction(&Inst, VectorBlockMap, ScalarBlockMap);
622 }
623