1 //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the BlockGenerator and VectorBlockGenerator classes,
11 // which generate sequential code and vectorized code for a polyhedral
12 // statement, respectively.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "polly/ScopInfo.h"
17 #include "isl/aff.h"
18 #include "isl/ast.h"
19 #include "isl/ast_build.h"
20 #include "isl/set.h"
21 #include "polly/CodeGen/BlockGenerators.h"
22 #include "polly/CodeGen/CodeGeneration.h"
23 #include "polly/CodeGen/IslExprBuilder.h"
24 #include "polly/Options.h"
25 #include "polly/Support/GICHelper.h"
26 #include "polly/Support/SCEVValidator.h"
27 #include "polly/Support/ScopHelper.h"
28 #include "llvm/Analysis/LoopInfo.h"
29 #include "llvm/Analysis/ScalarEvolution.h"
30 #include "llvm/Analysis/ScalarEvolutionExpander.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
33 
34 using namespace llvm;
35 using namespace polly;
36 
37 static cl::opt<bool> Aligned("enable-polly-aligned",
38                              cl::desc("Assumed aligned memory accesses."),
39                              cl::Hidden, cl::init(false), cl::ZeroOrMore,
40                              cl::cat(PollyCategory));
41 
42 bool polly::canSynthesize(const Instruction *I, const llvm::LoopInfo *LI,
43                           ScalarEvolution *SE, const Region *R) {
44   if (!I || !SE->isSCEVable(I->getType()))
45     return false;
46 
47   if (const SCEV *Scev = SE->getSCEV(const_cast<Instruction *>(I)))
48     if (!isa<SCEVCouldNotCompute>(Scev))
49       if (!hasScalarDepsInsideRegion(Scev, R))
50         return true;
51 
52   return false;
53 }
54 
55 bool polly::isIgnoredIntrinsic(const Value *V) {
56   if (auto *IT = dyn_cast<IntrinsicInst>(V)) {
57     switch (IT->getIntrinsicID()) {
58     // Lifetime markers are supported/ignored.
59     case llvm::Intrinsic::lifetime_start:
60     case llvm::Intrinsic::lifetime_end:
61     // Invariant markers are supported/ignored.
62     case llvm::Intrinsic::invariant_start:
63     case llvm::Intrinsic::invariant_end:
64     // Some misc annotations are supported/ignored.
65     case llvm::Intrinsic::var_annotation:
66     case llvm::Intrinsic::ptr_annotation:
67     case llvm::Intrinsic::annotation:
68     case llvm::Intrinsic::donothing:
69     case llvm::Intrinsic::assume:
70     case llvm::Intrinsic::expect:
71       return true;
72     default:
73       break;
74     }
75   }
76   return false;
77 }
78 
79 BlockGenerator::BlockGenerator(PollyIRBuilder &B, Pass *P, LoopInfo &LI,
80                                ScalarEvolution &SE, IslExprBuilder *ExprBuilder)
81     : Builder(B), P(P), LI(LI), SE(SE), ExprBuilder(ExprBuilder) {}
82 
83 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old,
84                                    ValueMapT &BBMap, ValueMapT &GlobalMap,
85                                    LoopToScevMapT &LTS, Loop *L) const {
86   // We assume constants never change.
87   // This avoids map lookups for many calls to this function.
88   if (isa<Constant>(Old))
89     return const_cast<Value *>(Old);
90 
91   if (Value *New = GlobalMap.lookup(Old)) {
92     if (Old->getType()->getScalarSizeInBits() <
93         New->getType()->getScalarSizeInBits())
94       New = Builder.CreateTruncOrBitCast(New, Old->getType());
95 
96     return New;
97   }
98 
99   if (Value *New = BBMap.lookup(Old))
100     return New;
101 
102   if (SE.isSCEVable(Old->getType()))
103     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
104       if (!isa<SCEVCouldNotCompute>(Scev)) {
105         const SCEV *NewScev = apply(Scev, LTS, SE);
106         ValueToValueMap VTV;
107         VTV.insert(BBMap.begin(), BBMap.end());
108         VTV.insert(GlobalMap.begin(), GlobalMap.end());
109         NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV);
110         SCEVExpander Expander(SE, "polly");
111         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
112                                                  Builder.GetInsertPoint());
113 
114         BBMap[Old] = Expanded;
115         return Expanded;
116       }
117     }
118 
119   // A scop-constant value defined by a global or a function parameter.
120   if (isa<GlobalValue>(Old) || isa<Argument>(Old))
121     return const_cast<Value *>(Old);
122 
123   // A scop-constant value defined by an instruction executed outside the scop.
124   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
125     if (!Stmt.getParent()->getRegion().contains(Inst->getParent()))
126       return const_cast<Value *>(Old);
127 
128   // The scalar dependence is neither available nor SCEVCodegenable.
129   llvm_unreachable("Unexpected scalar dependence in region!");
130   return nullptr;
131 }
132 
133 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, const Instruction *Inst,
134                                     ValueMapT &BBMap, ValueMapT &GlobalMap,
135                                     LoopToScevMapT &LTS) {
136   // We do not generate debug intrinsics as we did not investigate how to
137   // copy them correctly. At the current state, they just crash the code
138   // generation as the meta-data operands are not correctly copied.
139   if (isa<DbgInfoIntrinsic>(Inst))
140     return;
141 
142   Instruction *NewInst = Inst->clone();
143 
144   // Replace old operands with the new ones.
145   for (Value *OldOperand : Inst->operands()) {
146     Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS,
147                                     getLoopForInst(Inst));
148 
149     if (!NewOperand) {
150       assert(!isa<StoreInst>(NewInst) &&
151              "Store instructions are always needed!");
152       delete NewInst;
153       return;
154     }
155 
156     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
157   }
158 
159   Builder.Insert(NewInst);
160   BBMap[Inst] = NewInst;
161 
162   if (!NewInst->getType()->isVoidTy())
163     NewInst->setName("p_" + Inst->getName());
164 }
165 
166 Value *BlockGenerator::getNewAccessOperand(ScopStmt &Stmt,
167                                            const MemoryAccess &MA) {
168   isl_pw_multi_aff *PWAccRel;
169   isl_union_map *Schedule;
170   isl_ast_expr *Expr;
171   isl_ast_build *Build = Stmt.getAstBuild();
172 
173   assert(ExprBuilder && Build &&
174          "Cannot generate new value without IslExprBuilder!");
175 
176   Schedule = isl_ast_build_get_schedule(Build);
177   PWAccRel = MA.applyScheduleToAccessRelation(Schedule);
178 
179   Expr = isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
180   Expr = isl_ast_expr_address_of(Expr);
181 
182   return ExprBuilder->create(Expr);
183 }
184 
185 Value *BlockGenerator::generateLocationAccessed(
186     ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer,
187     ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
188   const MemoryAccess &MA = Stmt.getAccessFor(Inst);
189 
190   Value *NewPointer;
191   if (MA.hasNewAccessRelation())
192     NewPointer = getNewAccessOperand(Stmt, MA);
193   else
194     NewPointer =
195         getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst));
196 
197   return NewPointer;
198 }
199 
200 Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) {
201   return LI.getLoopFor(Inst->getParent());
202 }
203 
204 Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load,
205                                           ValueMapT &BBMap,
206                                           ValueMapT &GlobalMap,
207                                           LoopToScevMapT &LTS) {
208   const Value *Pointer = Load->getPointerOperand();
209   Value *NewPointer =
210       generateLocationAccessed(Stmt, Load, Pointer, BBMap, GlobalMap, LTS);
211   Value *ScalarLoad = Builder.CreateAlignedLoad(
212       NewPointer, Load->getAlignment(), Load->getName() + "_p_scalar_");
213   return ScalarLoad;
214 }
215 
216 Value *BlockGenerator::generateScalarStore(ScopStmt &Stmt,
217                                            const StoreInst *Store,
218                                            ValueMapT &BBMap,
219                                            ValueMapT &GlobalMap,
220                                            LoopToScevMapT &LTS) {
221   const Value *Pointer = Store->getPointerOperand();
222   Value *NewPointer =
223       generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS);
224   Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap,
225                                     GlobalMap, LTS, getLoopForInst(Store));
226 
227   Value *NewStore = Builder.CreateAlignedStore(ValueOperand, NewPointer,
228                                                Store->getAlignment());
229   return NewStore;
230 }
231 
232 void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
233                                      ValueMapT &BBMap, ValueMapT &GlobalMap,
234                                      LoopToScevMapT &LTS) {
235   // Terminator instructions control the control flow. They are explicitly
236   // expressed in the clast and do not need to be copied.
237   if (Inst->isTerminator())
238     return;
239 
240   if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion()))
241     return;
242 
243   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
244     Value *NewLoad = generateScalarLoad(Stmt, Load, BBMap, GlobalMap, LTS);
245     // Compute NewLoad before its insertion in BBMap to make the insertion
246     // deterministic.
247     BBMap[Load] = NewLoad;
248     return;
249   }
250 
251   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
252     Value *NewStore = generateScalarStore(Stmt, Store, BBMap, GlobalMap, LTS);
253     // Compute NewStore before its insertion in BBMap to make the insertion
254     // deterministic.
255     BBMap[Store] = NewStore;
256     return;
257   }
258 
259   // Skip some special intrinsics for which we do not adjust the semantics to
260   // the new schedule. All others are handled like every other instruction.
261   if (auto *IT = dyn_cast<IntrinsicInst>(Inst)) {
262     switch (IT->getIntrinsicID()) {
263     // Lifetime markers are ignored.
264     case llvm::Intrinsic::lifetime_start:
265     case llvm::Intrinsic::lifetime_end:
266     // Invariant markers are ignored.
267     case llvm::Intrinsic::invariant_start:
268     case llvm::Intrinsic::invariant_end:
269     // Some misc annotations are ignored.
270     case llvm::Intrinsic::var_annotation:
271     case llvm::Intrinsic::ptr_annotation:
272     case llvm::Intrinsic::annotation:
273     case llvm::Intrinsic::donothing:
274     case llvm::Intrinsic::assume:
275     case llvm::Intrinsic::expect:
276       return;
277     default:
278       // Other intrinsics are copied.
279       break;
280     }
281   }
282 
283   copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS);
284 }
285 
286 void BlockGenerator::copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap,
287                             LoopToScevMapT &LTS) {
288   auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
289   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
290 
291   BasicBlock *BB = Stmt.getBasicBlock();
292   BasicBlock *CopyBB =
293       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, &LI);
294   CopyBB->setName("polly.stmt." + BB->getName());
295   Builder.SetInsertPoint(CopyBB->begin());
296 
297   ValueMapT BBMap;
298 
299   for (Instruction &Inst : *BB)
300     copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS);
301 }
302 
303 VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen,
304                                            VectorValueMapT &GlobalMaps,
305                                            std::vector<LoopToScevMapT> &VLTS,
306                                            isl_map *Schedule)
307     : BlockGenerator(BlockGen), GlobalMaps(GlobalMaps), VLTS(VLTS),
308       Schedule(Schedule) {
309   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
310   assert(Schedule && "No statement domain provided");
311 }
312 
313 Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, const Value *Old,
314                                             ValueMapT &VectorMap,
315                                             VectorValueMapT &ScalarMaps,
316                                             Loop *L) {
317   if (Value *NewValue = VectorMap.lookup(Old))
318     return NewValue;
319 
320   int Width = getVectorWidth();
321 
322   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
323 
324   for (int Lane = 0; Lane < Width; Lane++)
325     Vector = Builder.CreateInsertElement(
326         Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane],
327                             VLTS[Lane], L),
328         Builder.getInt32(Lane));
329 
330   VectorMap[Old] = Vector;
331 
332   return Vector;
333 }
334 
335 Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
336   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
337   assert(PointerTy && "PointerType expected");
338 
339   Type *ScalarType = PointerTy->getElementType();
340   VectorType *VectorType = VectorType::get(ScalarType, Width);
341 
342   return PointerType::getUnqual(VectorType);
343 }
344 
345 Value *VectorBlockGenerator::generateStrideOneLoad(
346     ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps,
347     bool NegativeStride = false) {
348   unsigned VectorWidth = getVectorWidth();
349   const Value *Pointer = Load->getPointerOperand();
350   Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
351   unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
352 
353   Value *NewPointer = nullptr;
354   NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset],
355                                         GlobalMaps[Offset], VLTS[Offset]);
356   Value *VectorPtr =
357       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
358   LoadInst *VecLoad =
359       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
360   if (!Aligned)
361     VecLoad->setAlignment(8);
362 
363   if (NegativeStride) {
364     SmallVector<Constant *, 16> Indices;
365     for (int i = VectorWidth - 1; i >= 0; i--)
366       Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i));
367     Constant *SV = llvm::ConstantVector::get(Indices);
368     Value *RevVecLoad = Builder.CreateShuffleVector(
369         VecLoad, VecLoad, SV, Load->getName() + "_reverse");
370     return RevVecLoad;
371   }
372 
373   return VecLoad;
374 }
375 
376 Value *VectorBlockGenerator::generateStrideZeroLoad(ScopStmt &Stmt,
377                                                     const LoadInst *Load,
378                                                     ValueMapT &BBMap) {
379   const Value *Pointer = Load->getPointerOperand();
380   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
381   Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap,
382                                                GlobalMaps[0], VLTS[0]);
383   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
384                                            Load->getName() + "_p_vec_p");
385   LoadInst *ScalarLoad =
386       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
387 
388   if (!Aligned)
389     ScalarLoad->setAlignment(8);
390 
391   Constant *SplatVector = Constant::getNullValue(
392       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
393 
394   Value *VectorLoad = Builder.CreateShuffleVector(
395       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
396   return VectorLoad;
397 }
398 
399 Value *VectorBlockGenerator::generateUnknownStrideLoad(
400     ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps) {
401   int VectorWidth = getVectorWidth();
402   const Value *Pointer = Load->getPointerOperand();
403   VectorType *VectorType = VectorType::get(
404       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
405 
406   Value *Vector = UndefValue::get(VectorType);
407 
408   for (int i = 0; i < VectorWidth; i++) {
409     Value *NewPointer = generateLocationAccessed(
410         Stmt, Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
411     Value *ScalarLoad =
412         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
413     Vector = Builder.CreateInsertElement(
414         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
415   }
416 
417   return Vector;
418 }
419 
420 void VectorBlockGenerator::generateLoad(ScopStmt &Stmt, const LoadInst *Load,
421                                         ValueMapT &VectorMap,
422                                         VectorValueMapT &ScalarMaps) {
423   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
424       !VectorType::isValidElementType(Load->getType())) {
425     for (int i = 0; i < getVectorWidth(); i++)
426       ScalarMaps[i][Load] =
427           generateScalarLoad(Stmt, Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
428     return;
429   }
430 
431   const MemoryAccess &Access = Stmt.getAccessFor(Load);
432 
433   // Make sure we have scalar values available to access the pointer to
434   // the data location.
435   extractScalarValues(Load, VectorMap, ScalarMaps);
436 
437   Value *NewLoad;
438   if (Access.isStrideZero(isl_map_copy(Schedule)))
439     NewLoad = generateStrideZeroLoad(Stmt, Load, ScalarMaps[0]);
440   else if (Access.isStrideOne(isl_map_copy(Schedule)))
441     NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps);
442   else if (Access.isStrideX(isl_map_copy(Schedule), -1))
443     NewLoad = generateStrideOneLoad(Stmt, Load, ScalarMaps, true);
444   else
445     NewLoad = generateUnknownStrideLoad(Stmt, Load, ScalarMaps);
446 
447   VectorMap[Load] = NewLoad;
448 }
449 
450 void VectorBlockGenerator::copyUnaryInst(ScopStmt &Stmt,
451                                          const UnaryInstruction *Inst,
452                                          ValueMapT &VectorMap,
453                                          VectorValueMapT &ScalarMaps) {
454   int VectorWidth = getVectorWidth();
455   Value *NewOperand = getVectorValue(Stmt, Inst->getOperand(0), VectorMap,
456                                      ScalarMaps, getLoopForInst(Inst));
457 
458   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
459 
460   const CastInst *Cast = dyn_cast<CastInst>(Inst);
461   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
462   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
463 }
464 
465 void VectorBlockGenerator::copyBinaryInst(ScopStmt &Stmt,
466                                           const BinaryOperator *Inst,
467                                           ValueMapT &VectorMap,
468                                           VectorValueMapT &ScalarMaps) {
469   Loop *L = getLoopForInst(Inst);
470   Value *OpZero = Inst->getOperand(0);
471   Value *OpOne = Inst->getOperand(1);
472 
473   Value *NewOpZero, *NewOpOne;
474   NewOpZero = getVectorValue(Stmt, OpZero, VectorMap, ScalarMaps, L);
475   NewOpOne = getVectorValue(Stmt, OpOne, VectorMap, ScalarMaps, L);
476 
477   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
478                                        Inst->getName() + "p_vec");
479   VectorMap[Inst] = NewInst;
480 }
481 
482 void VectorBlockGenerator::copyStore(ScopStmt &Stmt, const StoreInst *Store,
483                                      ValueMapT &VectorMap,
484                                      VectorValueMapT &ScalarMaps) {
485   const MemoryAccess &Access = Stmt.getAccessFor(Store);
486 
487   const Value *Pointer = Store->getPointerOperand();
488   Value *Vector = getVectorValue(Stmt, Store->getValueOperand(), VectorMap,
489                                  ScalarMaps, getLoopForInst(Store));
490 
491   // Make sure we have scalar values available to access the pointer to
492   // the data location.
493   extractScalarValues(Store, VectorMap, ScalarMaps);
494 
495   if (Access.isStrideOne(isl_map_copy(Schedule))) {
496     Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
497     Value *NewPointer = generateLocationAccessed(
498         Stmt, Store, Pointer, ScalarMaps[0], GlobalMaps[0], VLTS[0]);
499 
500     Value *VectorPtr =
501         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
502     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
503 
504     if (!Aligned)
505       Store->setAlignment(8);
506   } else {
507     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
508       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
509       Value *NewPointer = generateLocationAccessed(
510           Stmt, Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
511       Builder.CreateStore(Scalar, NewPointer);
512     }
513   }
514 }
515 
516 bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
517                                              ValueMapT &VectorMap) {
518   for (Value *Operand : Inst->operands())
519     if (VectorMap.count(Operand))
520       return true;
521   return false;
522 }
523 
524 bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
525                                                ValueMapT &VectorMap,
526                                                VectorValueMapT &ScalarMaps) {
527   bool HasVectorOperand = false;
528   int VectorWidth = getVectorWidth();
529 
530   for (Value *Operand : Inst->operands()) {
531     ValueMapT::iterator VecOp = VectorMap.find(Operand);
532 
533     if (VecOp == VectorMap.end())
534       continue;
535 
536     HasVectorOperand = true;
537     Value *NewVector = VecOp->second;
538 
539     for (int i = 0; i < VectorWidth; ++i) {
540       ValueMapT &SM = ScalarMaps[i];
541 
542       // If there is one scalar extracted, all scalar elements should have
543       // already been extracted by the code here. So no need to check for the
544       // existance of all of them.
545       if (SM.count(Operand))
546         break;
547 
548       SM[Operand] =
549           Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
550     }
551   }
552 
553   return HasVectorOperand;
554 }
555 
556 void VectorBlockGenerator::copyInstScalarized(ScopStmt &Stmt,
557                                               const Instruction *Inst,
558                                               ValueMapT &VectorMap,
559                                               VectorValueMapT &ScalarMaps) {
560   bool HasVectorOperand;
561   int VectorWidth = getVectorWidth();
562 
563   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
564 
565   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
566     BlockGenerator::copyInstruction(Stmt, Inst, ScalarMaps[VectorLane],
567                                     GlobalMaps[VectorLane], VLTS[VectorLane]);
568 
569   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
570     return;
571 
572   // Make the result available as vector value.
573   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
574   Value *Vector = UndefValue::get(VectorType);
575 
576   for (int i = 0; i < VectorWidth; i++)
577     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
578                                          Builder.getInt32(i));
579 
580   VectorMap[Inst] = Vector;
581 }
582 
583 int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
584 
585 void VectorBlockGenerator::copyInstruction(ScopStmt &Stmt,
586                                            const Instruction *Inst,
587                                            ValueMapT &VectorMap,
588                                            VectorValueMapT &ScalarMaps) {
589   // Terminator instructions control the control flow. They are explicitly
590   // expressed in the clast and do not need to be copied.
591   if (Inst->isTerminator())
592     return;
593 
594   if (canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion()))
595     return;
596 
597   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
598     generateLoad(Stmt, Load, VectorMap, ScalarMaps);
599     return;
600   }
601 
602   if (hasVectorOperands(Inst, VectorMap)) {
603     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
604       copyStore(Stmt, Store, VectorMap, ScalarMaps);
605       return;
606     }
607 
608     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
609       copyUnaryInst(Stmt, Unary, VectorMap, ScalarMaps);
610       return;
611     }
612 
613     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
614       copyBinaryInst(Stmt, Binary, VectorMap, ScalarMaps);
615       return;
616     }
617 
618     // Falltrough: We generate scalar instructions, if we don't know how to
619     // generate vector code.
620   }
621 
622   copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps);
623 }
624 
625 void VectorBlockGenerator::copyBB(ScopStmt &Stmt) {
626   auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
627   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
628 
629   BasicBlock *BB = Stmt.getBasicBlock();
630   BasicBlock *CopyBB =
631       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), DT, &LI);
632   CopyBB->setName("polly.stmt." + BB->getName());
633   Builder.SetInsertPoint(CopyBB->begin());
634 
635   // Create two maps that store the mapping from the original instructions of
636   // the old basic block to their copies in the new basic block. Those maps
637   // are basic block local.
638   //
639   // As vector code generation is supported there is one map for scalar values
640   // and one for vector values.
641   //
642   // In case we just do scalar code generation, the vectorMap is not used and
643   // the scalarMap has just one dimension, which contains the mapping.
644   //
645   // In case vector code generation is done, an instruction may either appear
646   // in the vector map once (as it is calculating >vectorwidth< values at a
647   // time. Or (if the values are calculated using scalar operations), it
648   // appears once in every dimension of the scalarMap.
649   VectorValueMapT ScalarBlockMap(getVectorWidth());
650   ValueMapT VectorBlockMap;
651 
652   for (Instruction &Inst : *BB)
653     copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap);
654 }
655