1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "isl/aff.h"
18 #include "isl/ast.h"
19 #include "isl/ast_build.h"
20 #include "isl/set.h"
21 #include "polly/CodeGen/BlockGenerators.h"
22 #include "polly/CodeGen/CodeGeneration.h"
23 #include "polly/CodeGen/IslExprBuilder.h"
24 #include "polly/Options.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/SCEVValidator.h"
27 #include "polly/Support/ScopHelper.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/ScalarEvolution.h"
30 #include "llvm/Analysis/ScalarEvolutionExpander.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
33 
34 using namespace llvm;
35 using namespace polly;
36 
37 static cl::opt<bool> Aligned("enable-polly-aligned",
38                              cl::desc("Assumed aligned memory accesses."),
39                              cl::Hidden, cl::init(false), cl::ZeroOrMore,
40                              cl::cat(PollyCategory));
41 
42 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI,
43                           ScalarEvolution *SE, const Region *R) {
44   if (!I || !SE->isSCEVable(I->getType()))
45     return false;
46 
47   if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I)))
48     if (!isa<SCEVCouldNotCompute>(Scev))
49       if (!hasScalarDepsInsideRegion(Scev, R))
50         return true;
51 
52   return false;
53 }
54 
55 bool polly::isIgnoredIntrinsic(const Value *V) {
56   if (auto *IT = dyn_cast<IntrinsicInst>(V)) {
57     switch (IT->getIntrinsicID()) {
58     // Lifetime markers are supported/ignored.
59     case llvm::Intrinsic::lifetime_start:
60     case llvm::Intrinsic::lifetime_end:
61     // Invariant markers are supported/ignored.
62     case llvm::Intrinsic::invariant_start:
63     case llvm::Intrinsic::invariant_end:
64     // Some misc annotations are supported/ignored.
65     case llvm::Intrinsic::var_annotation:
66     case llvm::Intrinsic::ptr_annotation:
67     case llvm::Intrinsic::annotation:
68     case llvm::Intrinsic::donothing:
69     case llvm::Intrinsic::assume:
70     case llvm::Intrinsic::expect:
71       return true;
72     default:
73       break;
74     }
75   }
76   return false;
77 }
78 
79 BlockGenerator::BlockGenerator(PollyIRBuilder &B, ScopStmt &Stmt, Pass *P,
80                                LoopInfo &LI, ScalarEvolution &SE,
81                                isl_ast_build *Build,
82                                IslExprBuilder *ExprBuilder)
83     : Builder(B), Statement(Stmt), P(P), LI(LI), SE(SE), Build(Build),
84       ExprBuilder(ExprBuilder) {}
85 
86 Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
87                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS,
88                                    Loop *L) const {
89   // We assume constants never change.
90   // This avoids map lookups for many calls to this function.
91   if (isa<Constant>(Old))
92     return const_cast<Value *>(Old);
93 
94   if (Value *New = GlobalMap.lookup(Old)) {
95     if (Old->getType()->getScalarSizeInBits() <
96         New->getType()->getScalarSizeInBits())
97       New = Builder.CreateTruncOrBitCast(New, Old->getType());
98 
99     return New;
100   }
101 
102   if (Value *New = BBMap.lookup(Old))
103     return New;
104 
105   if (SE.isSCEVable(Old->getType()))
106     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
107       if (!isa<SCEVCouldNotCompute>(Scev)) {
108         const SCEV *NewScev = apply(Scev, LTS, SE);
109         ValueToValueMap VTV;
110         VTV.insert(BBMap.begin(), BBMap.end());
111         VTV.insert(GlobalMap.begin(), GlobalMap.end());
112         NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV);
113         SCEVExpander Expander(SE, "polly");
114         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
115                                                  Builder.GetInsertPoint());
116 
117         BBMap[Old] = Expanded;
118         return Expanded;
119       }
120     }
121 
122   // A scop-constant value defined by a global or a function parameter.
123   if (isa<GlobalValue>(Old) || isa<Argument>(Old))
124     return const_cast<Value *>(Old);
125 
126   // A scop-constant value defined by an instruction executed outside the scop.
127   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
128     if (!Statement.getParent()->getRegion().contains(Inst->getParent()))
129       return const_cast<Value *>(Old);
130 
131   // The scalar dependence is neither available nor SCEVCodegenable.
132   llvm_unreachable("Unexpected scalar dependence in region!");
133   return nullptr;
134 }
135 
136 void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
137                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
138   // We do not generate debug intrinsics as we did not investigate how to
139   // copy them correctly. At the current state, they just crash the code
140   // generation as the meta-data operands are not correctly copied.
141   if (isa<DbgInfoIntrinsic>(Inst))
142     return;
143 
144   Instruction *NewInst = Inst->clone();
145 
146   // Replace old operands with the new ones.
147   for (Value *OldOperand : Inst->operands()) {
148     Value *NewOperand =
149         getNewValue(OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
150 
151     if (!NewOperand) {
152       assert(!isa<StoreInst>(NewInst) &&
153              "Store instructions are always needed!");
154       delete NewInst;
155       return;
156     }
157 
158     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
159   }
160 
161   Builder.Insert(NewInst);
162   BBMap[Inst] = NewInst;
163 
164   if (!NewInst->getType()->isVoidTy())
165     NewInst->setName("p_" + Inst->getName());
166 }
167 
168 Value *BlockGenerator::getNewAccessOperand(const MemoryAccess &MA) {
169   isl_pw_multi_aff *PWAccRel;
170   isl_union_map *Schedule;
171   isl_ast_expr *Expr;
172 
173   assert(ExprBuilder && Build &&
174          "Cannot generate new value without IslExprBuilder!");
175 
176   Schedule = isl_ast_build_get_schedule(Build);
177   PWAccRel = MA.applyScheduleToAccessRelation(Schedule);
178 
179   Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
180   Expr = isl_ast_expr_address_of(Expr);
181 
182   return ExprBuilder->create(Expr);
183 }
184 
185 Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
186                                                 const Value *Pointer,
187                                                 ValueMapT &BBMap,
188                                                 ValueMapT &GlobalMap,
189                                                 LoopToScevMapT &LTS) {
190   const MemoryAccess &MA = Statement.getAccessFor(Inst);
191 
192   Value *NewPointer;
193   if (MA.hasNewAccessRelation())
194     NewPointer = getNewAccessOperand(MA);
195   else
196     NewPointer =
197         getNewValue(Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
198 
199   return NewPointer;
200 }
201 
202 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
203   return LI.getLoopFor(Inst->getParent());
204 }
205 
206 Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
207                                           ValueMapT &BBMap,
208                                           ValueMapT &GlobalMap,
209                                           LoopToScevMapT &LTS) {
210   const Value *Pointer = Load->getPointerOperand();
211   Value *NewPointer =
212       generateLocationAccessed(Load, Pointer, BBMap, GlobalMap, LTS);
213   Value *ScalarLoad = Builder.CreateAlignedLoad(
214       NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
215   return ScalarLoad;
216 }
217 
218 Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
219                                            ValueMapT &BBMap,
220                                            ValueMapT &GlobalMap,
221                                            LoopToScevMapT &LTS) {
222   const Value *Pointer = Store->getPointerOperand();
223   Value *NewPointer =
224       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS);
225   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap,
226                                     LTS, getLoopForInst(Store));
227 
228   Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer,
229                                                Store->getAlignment());
230   return NewStore;
231 }
232 
233 void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
234                                      ValueMapT &GlobalMap,
235                                      LoopToScevMapT &LTS) {
236   // Terminator instructions control the control flow. They are explicitly
237   // expressed in the clast and do not need to be copied.
238   if (Inst->isTerminator())
239     return;
240 
241   if (canSynthesize(Inst, &P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
242                     &SE, &Statement.getParent()->getRegion()))
243     return;
244 
245   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
246     Value *NewLoad = generateScalarLoad(Load, BBMap, GlobalMap, LTS);
247     // Compute NewLoad before its insertion in BBMap to make the insertion
248     // deterministic.
249     BBMap[Load] = NewLoad;
250     return;
251   }
252 
253   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
254     Value *NewStore = generateScalarStore(Store, BBMap, GlobalMap, LTS);
255     // Compute NewStore before its insertion in BBMap to make the insertion
256     // deterministic.
257     BBMap[Store] = NewStore;
258     return;
259   }
260 
261   // Skip some special intrinsics for which we do not adjust the semantics to
262   // the new schedule. All others are handled like every other instruction.
263   if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) {
264     switch (IT->getIntrinsicID()) {
265     // Lifetime markers are ignored.
266     case llvm::Intrinsic::lifetime_start:
267     case llvm::Intrinsic::lifetime_end:
268     // Invariant markers are ignored.
269     case llvm::Intrinsic::invariant_start:
270     case llvm::Intrinsic::invariant_end:
271     // Some misc annotations are ignored.
272     case llvm::Intrinsic::var_annotation:
273     case llvm::Intrinsic::ptr_annotation:
274     case llvm::Intrinsic::annotation:
275     case llvm::Intrinsic::donothing:
276     case llvm::Intrinsic::assume:
277     case llvm::Intrinsic::expect:
278       return;
279     default:
280       // Other intrinsics are copied.
281       break;
282     }
283   }
284 
285   copyInstScalar(Inst, BBMap, GlobalMap, LTS);
286 }
287 
288 void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
289   auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
290   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
291   auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>();
292   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
293 
294   BasicBlock *BB = Statement.getBasicBlock();
295   BasicBlock *CopyBB =
296       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, LI);
297   CopyBB->setName("polly.stmt." + BB->getName());
298   Builder.SetInsertPoint(CopyBB->begin());
299 
300   ValueMapT BBMap;
301 
302   for (Instruction &Inst : *BB)
303     copyInstruction(&Inst, BBMap, GlobalMap, LTS);
304 }
305 
306 VectorBlockGenerator::VectorBlockGenerator(
307     PollyIRBuilder &B, VectorValueMapT &GlobalMaps,
308     std::vector<LoopToScevMapT> &VLTS, ScopStmt &Stmt,
309     __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE,
310     __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder)
311     : BlockGenerator(B, Stmt, P, LI, SE, Build, ExprBuilder),
312       GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) {
313   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
314   assert(Schedule && "No statement domain provided");
315 }
316 
317 Value *VectorBlockGenerator::getVectorValue(const Value *Old,
318                                             ValueMapT &VectorMap,
319                                             VectorValueMapT &ScalarMaps,
320                                             Loop *L) {
321   if (Value *NewValue = VectorMap.lookup(Old))
322     return NewValue;
323 
324   int Width = getVectorWidth();
325 
326   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
327 
328   for (int Lane = 0; Lane < Width; Lane++)
329     Vector = Builder.CreateInsertElement(
330         Vector,
331         getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L),
332         Builder.getInt32(Lane));
333 
334   VectorMap[Old] = Vector;
335 
336   return Vector;
337 }
338 
339 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
340   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
341   assert(PointerTy && "PointerType expected");
342 
343   Type *ScalarType = PointerTy->getElementType();
344   VectorType *VectorType = VectorType::get(ScalarType, Width);
345 
346   return PointerType::getUnqual(VectorType);
347 }
348 
349 Value *
350 VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
351                                             VectorValueMapT &ScalarMaps,
352                                             bool NegativeStride = false) {
353   unsigned VectorWidth = getVectorWidth();
354   const Value *Pointer = Load->getPointerOperand();
355   Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
356   unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
357 
358   Value *NewPointer = nullptr;
359   NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[Offset],
360                                         GlobalMaps[Offset], VLTS[Offset]);
361   Value *VectorPtr =
362       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
363   LoadInst *VecLoad =
364       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
365   if (!Aligned)
366     VecLoad->setAlignment(8);
367 
368   if (NegativeStride) {
369     SmallVector<Constant *, 16> Indices;
370     for (int i = VectorWidth - 1; i >= 0; i--)
371       Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
372     Constant *SV = llvm::ConstantVector::get(Indices);
373     Value *RevVecLoad = Builder.CreateShuffleVector(
374         VecLoad, VecLoad, SV, Load->getName() + "_reverse");
375     return RevVecLoad;
376   }
377 
378   return VecLoad;
379 }
380 
381 Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
382                                                     ValueMapT &BBMap) {
383   const Value *Pointer = Load->getPointerOperand();
384   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
385   Value *NewPointer =
386       generateLocationAccessed(Load, Pointer, BBMap, GlobalMaps[0], VLTS[0]);
387   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
388                                            Load->getName() + "_p_vec_p");
389   LoadInst *ScalarLoad =
390       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
391 
392   if (!Aligned)
393     ScalarLoad->setAlignment(8);
394 
395   Constant *SplatVector = Constant::getNullValue(
396       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
397 
398   Value *VectorLoad = Builder.CreateShuffleVector(
399       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
400   return VectorLoad;
401 }
402 
403 Value *
404 VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
405                                                 VectorValueMapT &ScalarMaps) {
406   int VectorWidth = getVectorWidth();
407   const Value *Pointer = Load->getPointerOperand();
408   VectorType *VectorType = VectorType::get(
409       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
410 
411   Value *Vector = UndefValue::get(VectorType);
412 
413   for (int i = 0; i < VectorWidth; i++) {
414     Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i],
415                                                  GlobalMaps[i], VLTS[i]);
416     Value *ScalarLoad =
417         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
418     Vector = Builder.CreateInsertElement(
419         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
420   }
421 
422   return Vector;
423 }
424 
425 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
426                                         ValueMapT &VectorMap,
427                                         VectorValueMapT &ScalarMaps) {
428   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
429       !VectorType::isValidElementType(Load->getType())) {
430     for (int i = 0; i < getVectorWidth(); i++)
431       ScalarMaps[i][Load] =
432           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
433     return;
434   }
435 
436   const MemoryAccess &Access = Statement.getAccessFor(Load);
437 
438   // Make sure we have scalar values available to access the pointer to
439   // the data location.
440   extractScalarValues(Load, VectorMap, ScalarMaps);
441 
442   Value *NewLoad;
443   if (Access.isStrideZero(isl_map_copy(Schedule)))
444     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
445   else if (Access.isStrideOne(isl_map_copy(Schedule)))
446     NewLoad = generateStrideOneLoad(Load, ScalarMaps);
447   else if (Access.isStrideX(isl_map_copy(Schedule), -1))
448     NewLoad = generateStrideOneLoad(Load, ScalarMaps, true);
449   else
450     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
451 
452   VectorMap[Load] = NewLoad;
453 }
454 
455 void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
456                                          ValueMapT &VectorMap,
457                                          VectorValueMapT &ScalarMaps) {
458   int VectorWidth = getVectorWidth();
459   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps,
460                                      getLoopForInst(Inst));
461 
462   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
463 
464   const CastInst *Cast = dyn_cast<CastInst>(Inst);
465   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
466   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
467 }
468 
469 void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
470                                           ValueMapT &VectorMap,
471                                           VectorValueMapT &ScalarMaps) {
472   Loop *L = getLoopForInst(Inst);
473   Value *OpZero = Inst->getOperand(0);
474   Value *OpOne = Inst->getOperand(1);
475 
476   Value *NewOpZero, *NewOpOne;
477   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps, L);
478   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps, L);
479 
480   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
481                                        Inst->getName() + "p_vec");
482   VectorMap[Inst] = NewInst;
483 }
484 
485 void VectorBlockGenerator::copyStore(const StoreInst *Store,
486                                      ValueMapT &VectorMap,
487                                      VectorValueMapT &ScalarMaps) {
488   const MemoryAccess &Access = Statement.getAccessFor(Store);
489 
490   const Value *Pointer = Store->getPointerOperand();
491   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
492                                  ScalarMaps, getLoopForInst(Store));
493 
494   // Make sure we have scalar values available to access the pointer to
495   // the data location.
496   extractScalarValues(Store, VectorMap, ScalarMaps);
497 
498   if (Access.isStrideOne(isl_map_copy(Schedule))) {
499     Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
500     Value *NewPointer = generateLocationAccessed(Store, Pointer, ScalarMaps[0],
501                                                  GlobalMaps[0], VLTS[0]);
502 
503     Value *VectorPtr =
504         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
505     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
506 
507     if (!Aligned)
508       Store->setAlignment(8);
509   } else {
510     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
511       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
512       Value *NewPointer = generateLocationAccessed(
513           Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
514       Builder.CreateStore(Scalar, NewPointer);
515     }
516   }
517 }
518 
519 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
520                                              ValueMapT &VectorMap) {
521   for (Value *Operand : Inst->operands())
522     if (VectorMap.count(Operand))
523       return true;
524   return false;
525 }
526 
527 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
528                                                ValueMapT &VectorMap,
529                                                VectorValueMapT &ScalarMaps) {
530   bool HasVectorOperand = false;
531   int VectorWidth = getVectorWidth();
532 
533   for (Value *Operand : Inst->operands()) {
534     ValueMapT::iterator VecOp = VectorMap.find(Operand);
535 
536     if (VecOp == VectorMap.end())
537       continue;
538 
539     HasVectorOperand = true;
540     Value *NewVector = VecOp->second;
541 
542     for (int i = 0; i < VectorWidth; ++i) {
543       ValueMapT &SM = ScalarMaps[i];
544 
545       // If there is one scalar extracted, all scalar elements should have
546       // already been extracted by the code here. So no need to check for the
547       // existance of all of them.
548       if (SM.count(Operand))
549         break;
550 
551       SM[Operand] =
552           Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
553     }
554   }
555 
556   return HasVectorOperand;
557 }
558 
559 void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
560                                               ValueMapT &VectorMap,
561                                               VectorValueMapT &ScalarMaps) {
562   bool HasVectorOperand;
563   int VectorWidth = getVectorWidth();
564 
565   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
566 
567   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
568     BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane],
569                                     GlobalMaps[VectorLane], VLTS[VectorLane]);
570 
571   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
572     return;
573 
574   // Make the result available as vector value.
575   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
576   Value *Vector = UndefValue::get(VectorType);
577 
578   for (int i = 0; i < VectorWidth; i++)
579     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
580                                          Builder.getInt32(i));
581 
582   VectorMap[Inst] = Vector;
583 }
584 
585 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
586 
587 void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
588                                            ValueMapT &VectorMap,
589                                            VectorValueMapT &ScalarMaps) {
590   // Terminator instructions control the control flow. They are explicitly
591   // expressed in the clast and do not need to be copied.
592   if (Inst->isTerminator())
593     return;
594 
595   if (canSynthesize(Inst, &P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
596                     &SE, &Statement.getParent()->getRegion()))
597     return;
598 
599   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
600     generateLoad(Load, VectorMap, ScalarMaps);
601     return;
602   }
603 
604   if (hasVectorOperands(Inst, VectorMap)) {
605     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
606       copyStore(Store, VectorMap, ScalarMaps);
607       return;
608     }
609 
610     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
611       copyUnaryInst(Unary, VectorMap, ScalarMaps);
612       return;
613     }
614 
615     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
616       copyBinaryInst(Binary, VectorMap, ScalarMaps);
617       return;
618     }
619 
620     // Falltrough: We generate scalar instructions, if we don't know how to
621     // generate vector code.
622   }
623 
624   copyInstScalarized(Inst, VectorMap, ScalarMaps);
625 }
626 
627 void VectorBlockGenerator::copyBB() {
628   auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
629   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
630   auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>();
631   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
632 
633   BasicBlock *BB = Statement.getBasicBlock();
634   BasicBlock *CopyBB =
635       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, LI);
636   CopyBB->setName("polly.stmt." + BB->getName());
637   Builder.SetInsertPoint(CopyBB->begin());
638 
639   // Create two maps that store the mapping from the original instructions of
640   // the old basic block to their copies in the new basic block. Those maps
641   // are basic block local.
642   //
643   // As vector code generation is supported there is one map for scalar values
644   // and one for vector values.
645   //
646   // In case we just do scalar code generation, the vectorMap is not used and
647   // the scalarMap has just one dimension, which contains the mapping.
648   //
649   // In case vector code generation is done, an instruction may either appear
650   // in the vector map once (as it is calculating >vectorwidth< values at a
651   // time. Or (if the values are calculated using scalar operations), it
652   // appears once in every dimension of the scalarMap.
653   VectorValueMapT ScalarBlockMap(getVectorWidth());
654   ValueMapT VectorBlockMap;
655 
656   for (Instruction &Inst : *BB)
657     copyInstruction(&Inst, VectorBlockMap, ScalarBlockMap);
658 }
659