13b11a16aSHongbin Zheng //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
23b11a16aSHongbin Zheng //
33b11a16aSHongbin Zheng //                     The LLVM Compiler Infrastructure
43b11a16aSHongbin Zheng //
53b11a16aSHongbin Zheng // This file is distributed under the University of Illinois Open Source
63b11a16aSHongbin Zheng // License. See LICENSE.TXT for details.
73b11a16aSHongbin Zheng //
83b11a16aSHongbin Zheng //===----------------------------------------------------------------------===//
93b11a16aSHongbin Zheng //
103b11a16aSHongbin Zheng // This file implements the BlockGenerator and VectorBlockGenerator classes,
113b11a16aSHongbin Zheng // which generate sequential code and vectorized code for a polyhedral
123b11a16aSHongbin Zheng // statement, respectively.
133b11a16aSHongbin Zheng //
143b11a16aSHongbin Zheng //===----------------------------------------------------------------------===//
153b11a16aSHongbin Zheng 
163b11a16aSHongbin Zheng #include "polly/ScopInfo.h"
1768794217SHongbin Zheng #include "polly/CodeGen/CodeGeneration.h"
188a846610SHongbin Zheng #include "polly/CodeGen/BlockGenerators.h"
193b11a16aSHongbin Zheng #include "polly/Support/GICHelper.h"
203b11a16aSHongbin Zheng 
21e71c6ab5STobias Grosser #include "llvm/Analysis/LoopInfo.h"
22e71c6ab5STobias Grosser #include "llvm/Analysis/ScalarEvolution.h"
23e71c6ab5STobias Grosser #include "llvm/Analysis/ScalarEvolutionExpander.h"
243b11a16aSHongbin Zheng #include "llvm/Transforms/Utils/BasicBlockUtils.h"
253b11a16aSHongbin Zheng #include "llvm/Support/CommandLine.h"
263b11a16aSHongbin Zheng 
273b11a16aSHongbin Zheng #include "isl/aff.h"
283b11a16aSHongbin Zheng #include "isl/set.h"
293b11a16aSHongbin Zheng 
303b11a16aSHongbin Zheng using namespace llvm;
313b11a16aSHongbin Zheng using namespace polly;
323b11a16aSHongbin Zheng 
333b11a16aSHongbin Zheng static cl::opt<bool>
343b11a16aSHongbin Zheng Aligned("enable-polly-aligned",
353b11a16aSHongbin Zheng        cl::desc("Assumed aligned memory accesses."), cl::Hidden,
363b11a16aSHongbin Zheng        cl::value_desc("OpenMP code generation enabled if true"),
373b11a16aSHongbin Zheng        cl::init(false), cl::ZeroOrMore);
383b11a16aSHongbin Zheng 
393b11a16aSHongbin Zheng static cl::opt<bool>
40e71c6ab5STobias Grosser SCEVCodegen("polly-codegen-scev",
41e71c6ab5STobias Grosser             cl::desc("Use SCEV based code generation."), cl::Hidden,
42e71c6ab5STobias Grosser             cl::init(false), cl::ZeroOrMore);
43e71c6ab5STobias Grosser 
44e71c6ab5STobias Grosser /// The SCEVRewriter takes a scalar evolution expression and updates the
45e71c6ab5STobias Grosser /// following components:
46e71c6ab5STobias Grosser ///
47e71c6ab5STobias Grosser /// - SCEVUnknown
48e71c6ab5STobias Grosser ///
49e71c6ab5STobias Grosser ///   Values referenced in SCEVUnknown subexpressions are looked up in
50e71c6ab5STobias Grosser ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
51e71c6ab5STobias Grosser ///   replaced by a reference to the value they map to.
52e71c6ab5STobias Grosser ///
53e71c6ab5STobias Grosser /// - SCEVAddRecExpr
54e71c6ab5STobias Grosser ///
55e71c6ab5STobias Grosser ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
56e71c6ab5STobias Grosser ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57e71c6ab5STobias Grosser ///   AddRecExpr's with more than two operands can not be translated.
58e71c6ab5STobias Grosser ///
59e71c6ab5STobias Grosser ///   FIXME: The comment above is not yet reality. At the moment we derive
60e71c6ab5STobias Grosser ///   %Value by looking up the canonical IV of the loop and by defining
61e71c6ab5STobias Grosser ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62e71c6ab5STobias Grosser ///   canonical induction variables.
63e71c6ab5STobias Grosser ///
64e71c6ab5STobias Grosser ///
65e71c6ab5STobias Grosser /// How can this be used?
66e71c6ab5STobias Grosser /// ====================
67e71c6ab5STobias Grosser ///
68e71c6ab5STobias Grosser /// SCEVRewrite based code generation works on virtually independent blocks.
69e71c6ab5STobias Grosser /// This means we do not run the independent blocks pass to rewrite scalar
70e71c6ab5STobias Grosser /// instructions, but just ignore instructions that we can analyze with scalar
71e71c6ab5STobias Grosser /// evolution. Virtually independent blocks are blocks that only reference the
72e71c6ab5STobias Grosser /// following values:
73e71c6ab5STobias Grosser ///
74e71c6ab5STobias Grosser /// o Values calculated within a basic block
75e71c6ab5STobias Grosser /// o Values representable by SCEV
76e71c6ab5STobias Grosser ///
77e71c6ab5STobias Grosser /// During code generation we can ignore all instructions:
78e71c6ab5STobias Grosser ///
79e71c6ab5STobias Grosser /// - Ignore all instructions except:
80e71c6ab5STobias Grosser ///   - Load instructions
81e71c6ab5STobias Grosser ///   - Instructions that reference operands already calculated within the
82e71c6ab5STobias Grosser ///     basic block.
83e71c6ab5STobias Grosser ///   - Store instructions
84e71c6ab5STobias Grosser struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85e71c6ab5STobias Grosser public:
86e71c6ab5STobias Grosser   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
87e71c6ab5STobias Grosser                              ValueMapT &GlobalMap,  ValueMapT &BBMap) {
88e71c6ab5STobias Grosser     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89e71c6ab5STobias Grosser     return Rewriter.visit(scev);
90e71c6ab5STobias Grosser   }
91e71c6ab5STobias Grosser 
92e71c6ab5STobias Grosser   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93e71c6ab5STobias Grosser                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94e71c6ab5STobias Grosser                BBMap(BBMap) {}
95e71c6ab5STobias Grosser 
96e71c6ab5STobias Grosser   const SCEV *visit(const SCEV *Expr) {
97e71c6ab5STobias Grosser     // FIXME: The parameter handling is incorrect.
98e71c6ab5STobias Grosser     //
99e71c6ab5STobias Grosser     // Polly does only detect parameters in Access function and loop iteration
100e71c6ab5STobias Grosser     // counters, but it does not get parameters that are just used by
101e71c6ab5STobias Grosser     // instructions within the basic block.
102e71c6ab5STobias Grosser     //
103e71c6ab5STobias Grosser     // There are two options to solve this:
104e71c6ab5STobias Grosser     //  o Iterate over all instructions of the SCoP and find the actual
105e71c6ab5STobias Grosser     //    parameters.
106e71c6ab5STobias Grosser     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
107e71c6ab5STobias Grosser     //    and detect parameters on the fly.
108e71c6ab5STobias Grosser     //
109e71c6ab5STobias Grosser     // This is especially important for OpenMP and GPGPU code generation, as
110e71c6ab5STobias Grosser     // they require us to detect and possibly rewrite the corresponding
111e71c6ab5STobias Grosser     // parameters.
112e71c6ab5STobias Grosser     if (isl_id *Id = S.getIdForParam(Expr)) {
113e71c6ab5STobias Grosser       isl_id_free(Id);
114e71c6ab5STobias Grosser       return Expr;
115e71c6ab5STobias Grosser     }
116e71c6ab5STobias Grosser 
117e71c6ab5STobias Grosser 
118e71c6ab5STobias Grosser     return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119e71c6ab5STobias Grosser   }
120e71c6ab5STobias Grosser 
121e71c6ab5STobias Grosser   const SCEV *visitConstant(const SCEVConstant *Constant) {
122e71c6ab5STobias Grosser     return Constant;
123e71c6ab5STobias Grosser   }
124e71c6ab5STobias Grosser 
125e71c6ab5STobias Grosser   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
127e71c6ab5STobias Grosser     return SE.getTruncateExpr(Operand, Expr->getType());
128e71c6ab5STobias Grosser   }
129e71c6ab5STobias Grosser 
130e71c6ab5STobias Grosser   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
132e71c6ab5STobias Grosser     return SE.getZeroExtendExpr(Operand, Expr->getType());
133e71c6ab5STobias Grosser   }
134e71c6ab5STobias Grosser 
135e71c6ab5STobias Grosser   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
137e71c6ab5STobias Grosser     return SE.getSignExtendExpr(Operand, Expr->getType());
138e71c6ab5STobias Grosser   }
139e71c6ab5STobias Grosser 
140e71c6ab5STobias Grosser   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
142e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
144e71c6ab5STobias Grosser       Operands.push_back(Operand);
145e71c6ab5STobias Grosser     }
146e71c6ab5STobias Grosser 
147e71c6ab5STobias Grosser     return SE.getAddExpr(Operands);
148e71c6ab5STobias Grosser   }
149e71c6ab5STobias Grosser 
150e71c6ab5STobias Grosser   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
152e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
154e71c6ab5STobias Grosser       Operands.push_back(Operand);
155e71c6ab5STobias Grosser     }
156e71c6ab5STobias Grosser 
157e71c6ab5STobias Grosser     return SE.getMulExpr(Operands);
158e71c6ab5STobias Grosser   }
159e71c6ab5STobias Grosser 
160e71c6ab5STobias Grosser   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161e71c6ab5STobias Grosser     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162e71c6ab5STobias Grosser   }
163e71c6ab5STobias Grosser 
164e71c6ab5STobias Grosser   // Return a new induction variable if the loop is within the original SCoP
165e71c6ab5STobias Grosser   // or NULL otherwise.
166e71c6ab5STobias Grosser   Value *getNewIV(const Loop *L) {
167e71c6ab5STobias Grosser     Value *IV = L->getCanonicalInductionVariable();
168e71c6ab5STobias Grosser     if (!IV)
169e71c6ab5STobias Grosser       return NULL;
170e71c6ab5STobias Grosser 
171e71c6ab5STobias Grosser     ValueMapT::iterator NewIV = GlobalMap.find(IV);
172e71c6ab5STobias Grosser 
173e71c6ab5STobias Grosser     if (NewIV == GlobalMap.end())
174e71c6ab5STobias Grosser       return NULL;
175e71c6ab5STobias Grosser 
176e71c6ab5STobias Grosser     return NewIV->second;
177e71c6ab5STobias Grosser   }
178e71c6ab5STobias Grosser 
179e71c6ab5STobias Grosser   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180e71c6ab5STobias Grosser     Value *IV;
181e71c6ab5STobias Grosser 
182e71c6ab5STobias Grosser     IV = getNewIV(Expr->getLoop());
183e71c6ab5STobias Grosser 
184e71c6ab5STobias Grosser     // The IV is not within the GlobalMaps. So do not rewrite it and also do
185e71c6ab5STobias Grosser     // not rewrite any descendants.
186e71c6ab5STobias Grosser     if (!IV)
187e71c6ab5STobias Grosser       return Expr;
188e71c6ab5STobias Grosser 
189e71c6ab5STobias Grosser     assert(Expr->getNumOperands() == 2
190e71c6ab5STobias Grosser           && "An AddRecExpr with more than two operands can not be rewritten.");
191e71c6ab5STobias Grosser 
192e71c6ab5STobias Grosser     const SCEV *Base, *Step, *IVExpr, *Product;
193e71c6ab5STobias Grosser 
194e71c6ab5STobias Grosser     Base = visit(Expr->getStart());
195e71c6ab5STobias Grosser     Step = visit(Expr->getOperand(1));
196e71c6ab5STobias Grosser     IVExpr = SE.getUnknown(IV);
197e71c6ab5STobias Grosser     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198e71c6ab5STobias Grosser     Product = SE.getMulExpr(Step, IVExpr);
199e71c6ab5STobias Grosser 
200e71c6ab5STobias Grosser     return SE.getAddExpr(Base, Product);
201e71c6ab5STobias Grosser   }
202e71c6ab5STobias Grosser 
203e71c6ab5STobias Grosser   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
205e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
207e71c6ab5STobias Grosser       Operands.push_back(Operand);
208e71c6ab5STobias Grosser     }
209e71c6ab5STobias Grosser 
210e71c6ab5STobias Grosser     return SE.getSMaxExpr(Operands);
211e71c6ab5STobias Grosser   }
212e71c6ab5STobias Grosser 
213e71c6ab5STobias Grosser   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
215e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
217e71c6ab5STobias Grosser       Operands.push_back(Operand);
218e71c6ab5STobias Grosser     }
219e71c6ab5STobias Grosser 
220e71c6ab5STobias Grosser     return SE.getUMaxExpr(Operands);
221e71c6ab5STobias Grosser   }
222e71c6ab5STobias Grosser 
223e71c6ab5STobias Grosser   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224e71c6ab5STobias Grosser     Value *V = Expr->getValue();
225e71c6ab5STobias Grosser 
226e71c6ab5STobias Grosser     if (GlobalMap.count(V))
227e71c6ab5STobias Grosser       return SE.getUnknown(GlobalMap[V]);
228e71c6ab5STobias Grosser 
229e71c6ab5STobias Grosser     if (BBMap.count(V))
230e71c6ab5STobias Grosser       return SE.getUnknown(BBMap[V]);
231e71c6ab5STobias Grosser 
232e71c6ab5STobias Grosser     return Expr;
233e71c6ab5STobias Grosser   }
234e71c6ab5STobias Grosser 
235e71c6ab5STobias Grosser private:
236e71c6ab5STobias Grosser   Scop &S;
237e71c6ab5STobias Grosser   ScalarEvolution &SE;
238e71c6ab5STobias Grosser   ValueMapT &GlobalMap;
239e71c6ab5STobias Grosser   ValueMapT &BBMap;
240e71c6ab5STobias Grosser };
241e71c6ab5STobias Grosser 
2423b11a16aSHongbin Zheng // Helper class to generate memory location.
2433b11a16aSHongbin Zheng namespace {
2443b11a16aSHongbin Zheng class IslGenerator {
2453b11a16aSHongbin Zheng public:
2463b11a16aSHongbin Zheng   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
2473b11a16aSHongbin Zheng     Builder(Builder), IVS(IVS) {}
2483b11a16aSHongbin Zheng   Value *generateIslInt(__isl_take isl_int Int);
2493b11a16aSHongbin Zheng   Value *generateIslAff(__isl_take isl_aff *Aff);
2503b11a16aSHongbin Zheng   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
2513b11a16aSHongbin Zheng 
2523b11a16aSHongbin Zheng private:
2533b11a16aSHongbin Zheng   typedef struct {
2543b11a16aSHongbin Zheng     Value *Result;
2553b11a16aSHongbin Zheng     class IslGenerator *Generator;
2563b11a16aSHongbin Zheng   } IslGenInfo;
2573b11a16aSHongbin Zheng 
2583b11a16aSHongbin Zheng   IRBuilder<> &Builder;
2593b11a16aSHongbin Zheng   std::vector<Value *> &IVS;
2603b11a16aSHongbin Zheng   static int mergeIslAffValues(__isl_take isl_set *Set,
2613b11a16aSHongbin Zheng                                __isl_take isl_aff *Aff, void *User);
2623b11a16aSHongbin Zheng };
2633b11a16aSHongbin Zheng }
2643b11a16aSHongbin Zheng 
2653b11a16aSHongbin Zheng 
2663b11a16aSHongbin Zheng Value *IslGenerator::generateIslInt(isl_int Int) {
2673b11a16aSHongbin Zheng   mpz_t IntMPZ;
2683b11a16aSHongbin Zheng   mpz_init(IntMPZ);
2693b11a16aSHongbin Zheng   isl_int_get_gmp(Int, IntMPZ);
2703b11a16aSHongbin Zheng   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
2713b11a16aSHongbin Zheng   mpz_clear(IntMPZ);
2723b11a16aSHongbin Zheng   return IntValue;
2733b11a16aSHongbin Zheng }
2743b11a16aSHongbin Zheng 
2753b11a16aSHongbin Zheng Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
2763b11a16aSHongbin Zheng   Value *Result;
2773b11a16aSHongbin Zheng   Value *ConstValue;
2783b11a16aSHongbin Zheng   isl_int ConstIsl;
2793b11a16aSHongbin Zheng 
2803b11a16aSHongbin Zheng   isl_int_init(ConstIsl);
2813b11a16aSHongbin Zheng   isl_aff_get_constant(Aff, &ConstIsl);
2823b11a16aSHongbin Zheng   ConstValue = generateIslInt(ConstIsl);
2833b11a16aSHongbin Zheng   Type *Ty = Builder.getInt64Ty();
2843b11a16aSHongbin Zheng 
2853b11a16aSHongbin Zheng   // FIXME: We should give the constant and coefficients the right type. Here
2863b11a16aSHongbin Zheng   // we force it into i64.
2873b11a16aSHongbin Zheng   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
2883b11a16aSHongbin Zheng 
2893b11a16aSHongbin Zheng   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
2903b11a16aSHongbin Zheng 
2913b11a16aSHongbin Zheng   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
2923b11a16aSHongbin Zheng          "must match the dimension of the affine space.");
2933b11a16aSHongbin Zheng 
2943b11a16aSHongbin Zheng   isl_int CoefficientIsl;
2953b11a16aSHongbin Zheng   isl_int_init(CoefficientIsl);
2963b11a16aSHongbin Zheng 
2973b11a16aSHongbin Zheng   for (unsigned int i = 0; i < NbInputDims; ++i) {
2983b11a16aSHongbin Zheng     Value *CoefficientValue;
2993b11a16aSHongbin Zheng     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
3003b11a16aSHongbin Zheng 
3013b11a16aSHongbin Zheng     if (isl_int_is_zero(CoefficientIsl))
3023b11a16aSHongbin Zheng       continue;
3033b11a16aSHongbin Zheng 
3043b11a16aSHongbin Zheng     CoefficientValue = generateIslInt(CoefficientIsl);
3053b11a16aSHongbin Zheng     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
3063b11a16aSHongbin Zheng     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
3073b11a16aSHongbin Zheng     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
3083b11a16aSHongbin Zheng     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
3093b11a16aSHongbin Zheng   }
3103b11a16aSHongbin Zheng 
3113b11a16aSHongbin Zheng   isl_int_clear(CoefficientIsl);
3123b11a16aSHongbin Zheng   isl_int_clear(ConstIsl);
3133b11a16aSHongbin Zheng   isl_aff_free(Aff);
3143b11a16aSHongbin Zheng 
3153b11a16aSHongbin Zheng   return Result;
3163b11a16aSHongbin Zheng }
3173b11a16aSHongbin Zheng 
3183b11a16aSHongbin Zheng int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
3193b11a16aSHongbin Zheng                                     __isl_take isl_aff *Aff, void *User) {
3203b11a16aSHongbin Zheng   IslGenInfo *GenInfo = (IslGenInfo *)User;
3213b11a16aSHongbin Zheng 
3223b11a16aSHongbin Zheng   assert((GenInfo->Result == NULL) && "Result is already set."
3233b11a16aSHongbin Zheng          "Currently only single isl_aff is supported");
3243b11a16aSHongbin Zheng   assert(isl_set_plain_is_universe(Set)
3253b11a16aSHongbin Zheng          && "Code generation failed because the set is not universe");
3263b11a16aSHongbin Zheng 
3273b11a16aSHongbin Zheng   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
3283b11a16aSHongbin Zheng 
3293b11a16aSHongbin Zheng   isl_set_free(Set);
3303b11a16aSHongbin Zheng   return 0;
3313b11a16aSHongbin Zheng }
3323b11a16aSHongbin Zheng 
3333b11a16aSHongbin Zheng Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
3343b11a16aSHongbin Zheng   IslGenInfo User;
3353b11a16aSHongbin Zheng   User.Result = NULL;
3363b11a16aSHongbin Zheng   User.Generator = this;
3373b11a16aSHongbin Zheng   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
3383b11a16aSHongbin Zheng   assert(User.Result && "Code generation for isl_pw_aff failed");
3393b11a16aSHongbin Zheng 
3403b11a16aSHongbin Zheng   isl_pw_aff_free(PwAff);
3413b11a16aSHongbin Zheng   return User.Result;
3423b11a16aSHongbin Zheng }
3433b11a16aSHongbin Zheng 
3443b11a16aSHongbin Zheng 
3453b11a16aSHongbin Zheng BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
346e71c6ab5STobias Grosser   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
347e71c6ab5STobias Grosser 
348e71c6ab5STobias Grosser bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
349e71c6ab5STobias Grosser   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
350e71c6ab5STobias Grosser     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
351e71c6ab5STobias Grosser       if (!isa<SCEVCouldNotCompute>(Scev)) {
352e71c6ab5STobias Grosser         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
353e71c6ab5STobias Grosser           if (Unknown->getValue() != Inst)
354e71c6ab5STobias Grosser             return true;
355e71c6ab5STobias Grosser         } else {
356e71c6ab5STobias Grosser           return true;
357e71c6ab5STobias Grosser         }
358e71c6ab5STobias Grosser       }
359e71c6ab5STobias Grosser 
360e71c6ab5STobias Grosser   return false;
361e71c6ab5STobias Grosser }
3623b11a16aSHongbin Zheng 
3633b11a16aSHongbin Zheng Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
3643b11a16aSHongbin Zheng                                    ValueMapT &GlobalMap) {
3653b11a16aSHongbin Zheng   // We assume constants never change.
3663b11a16aSHongbin Zheng   // This avoids map lookups for many calls to this function.
3673b11a16aSHongbin Zheng   if (isa<Constant>(Old))
3683b11a16aSHongbin Zheng     return const_cast<Value*>(Old);
3693b11a16aSHongbin Zheng 
3703b11a16aSHongbin Zheng   if (GlobalMap.count(Old)) {
3713b11a16aSHongbin Zheng     Value *New = GlobalMap[Old];
3723b11a16aSHongbin Zheng 
3733b11a16aSHongbin Zheng     if (Old->getType()->getScalarSizeInBits()
3743b11a16aSHongbin Zheng         < New->getType()->getScalarSizeInBits())
3753b11a16aSHongbin Zheng       New = Builder.CreateTruncOrBitCast(New, Old->getType());
3763b11a16aSHongbin Zheng 
3773b11a16aSHongbin Zheng     return New;
3783b11a16aSHongbin Zheng   }
3793b11a16aSHongbin Zheng 
3803b11a16aSHongbin Zheng   if (BBMap.count(Old)) {
3813b11a16aSHongbin Zheng     return BBMap[Old];
3823b11a16aSHongbin Zheng   }
3833b11a16aSHongbin Zheng 
384e71c6ab5STobias Grosser   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
385e71c6ab5STobias Grosser     if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
386e71c6ab5STobias Grosser       if (!isa<SCEVCouldNotCompute>(Scev)) {
387e71c6ab5STobias Grosser         const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
388e71c6ab5STobias Grosser                                                     *Statement.getParent(), SE,
389e71c6ab5STobias Grosser                                                     GlobalMap, BBMap);
390e71c6ab5STobias Grosser         SCEVExpander Expander(SE, "polly");
391e71c6ab5STobias Grosser         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
392e71c6ab5STobias Grosser                                                  Builder.GetInsertPoint());
393e71c6ab5STobias Grosser 
394e71c6ab5STobias Grosser         BBMap[Old] = Expanded;
395e71c6ab5STobias Grosser         return Expanded;
396e71c6ab5STobias Grosser       }
397e71c6ab5STobias Grosser 
3983b11a16aSHongbin Zheng   // 'Old' is within the original SCoP, but was not rewritten.
3993b11a16aSHongbin Zheng   //
4003b11a16aSHongbin Zheng   // Such values appear, if they only calculate information already available in
4013b11a16aSHongbin Zheng   // the polyhedral description (e.g.  an induction variable increment). They
4023b11a16aSHongbin Zheng   // can be safely ignored.
4033b11a16aSHongbin Zheng   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
4043b11a16aSHongbin Zheng     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
4053b11a16aSHongbin Zheng       return NULL;
4063b11a16aSHongbin Zheng 
4073b11a16aSHongbin Zheng   // Everything else is probably a scop-constant value defined as global,
4083b11a16aSHongbin Zheng   // function parameter or an instruction not within the scop.
4093b11a16aSHongbin Zheng   return const_cast<Value*>(Old);
4103b11a16aSHongbin Zheng }
4113b11a16aSHongbin Zheng 
4123b11a16aSHongbin Zheng void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
4133b11a16aSHongbin Zheng                                     ValueMapT &GlobalMap) {
4143b11a16aSHongbin Zheng   Instruction *NewInst = Inst->clone();
4153b11a16aSHongbin Zheng 
4163b11a16aSHongbin Zheng   // Replace old operands with the new ones.
4173b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
4183b11a16aSHongbin Zheng        OE = Inst->op_end(); OI != OE; ++OI) {
4193b11a16aSHongbin Zheng     Value *OldOperand = *OI;
4203b11a16aSHongbin Zheng     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
4213b11a16aSHongbin Zheng 
4223b11a16aSHongbin Zheng     if (!NewOperand) {
4233b11a16aSHongbin Zheng       assert(!isa<StoreInst>(NewInst)
4243b11a16aSHongbin Zheng              && "Store instructions are always needed!");
4253b11a16aSHongbin Zheng       delete NewInst;
4263b11a16aSHongbin Zheng       return;
4273b11a16aSHongbin Zheng     }
4283b11a16aSHongbin Zheng 
4293b11a16aSHongbin Zheng     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
4303b11a16aSHongbin Zheng   }
4313b11a16aSHongbin Zheng 
4323b11a16aSHongbin Zheng   Builder.Insert(NewInst);
4333b11a16aSHongbin Zheng   BBMap[Inst] = NewInst;
4343b11a16aSHongbin Zheng 
4353b11a16aSHongbin Zheng   if (!NewInst->getType()->isVoidTy())
4363b11a16aSHongbin Zheng     NewInst->setName("p_" + Inst->getName());
4373b11a16aSHongbin Zheng }
4383b11a16aSHongbin Zheng 
4393b11a16aSHongbin Zheng std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
4403b11a16aSHongbin Zheng   __isl_keep isl_map *AccessRelation, Value *BaseAddress,
4413b11a16aSHongbin Zheng   ValueMapT &BBMap, ValueMapT &GlobalMap) {
4423b11a16aSHongbin Zheng 
4433b11a16aSHongbin Zheng   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1)
4443b11a16aSHongbin Zheng          && "Only single dimensional access functions supported");
4453b11a16aSHongbin Zheng 
4463b11a16aSHongbin Zheng   std::vector<Value *> IVS;
4473b11a16aSHongbin Zheng   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
4483b11a16aSHongbin Zheng     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
4493b11a16aSHongbin Zheng     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
4503b11a16aSHongbin Zheng     IVS.push_back(NewIV);
4513b11a16aSHongbin Zheng   }
4523b11a16aSHongbin Zheng 
4533b11a16aSHongbin Zheng   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
4543b11a16aSHongbin Zheng   IslGenerator IslGen(Builder, IVS);
4553b11a16aSHongbin Zheng   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
4563b11a16aSHongbin Zheng 
4573b11a16aSHongbin Zheng   Type *Ty = Builder.getInt64Ty();
4583b11a16aSHongbin Zheng   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
4593b11a16aSHongbin Zheng 
4603b11a16aSHongbin Zheng   std::vector<Value*> IndexArray;
4613b11a16aSHongbin Zheng   Value *NullValue = Constant::getNullValue(Ty);
4623b11a16aSHongbin Zheng   IndexArray.push_back(NullValue);
4633b11a16aSHongbin Zheng   IndexArray.push_back(OffsetValue);
4643b11a16aSHongbin Zheng   return IndexArray;
4653b11a16aSHongbin Zheng }
4663b11a16aSHongbin Zheng 
4673b11a16aSHongbin Zheng Value *BlockGenerator::getNewAccessOperand(
4683b11a16aSHongbin Zheng   __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
4693b11a16aSHongbin Zheng   ValueMapT &BBMap, ValueMapT &GlobalMap) {
4703b11a16aSHongbin Zheng   std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
4713b11a16aSHongbin Zheng                                                         BaseAddress,
4723b11a16aSHongbin Zheng                                                         BBMap, GlobalMap);
4733b11a16aSHongbin Zheng   Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
4743b11a16aSHongbin Zheng                                         "p_newarrayidx_");
4753b11a16aSHongbin Zheng   return NewOperand;
4763b11a16aSHongbin Zheng }
4773b11a16aSHongbin Zheng 
4783b11a16aSHongbin Zheng Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
4793b11a16aSHongbin Zheng                                                 const Value *Pointer,
4803b11a16aSHongbin Zheng                                                 ValueMapT &BBMap,
4813b11a16aSHongbin Zheng                                                 ValueMapT &GlobalMap) {
4823b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Inst);
4833b11a16aSHongbin Zheng   isl_map *CurrentAccessRelation = Access.getAccessRelation();
4843b11a16aSHongbin Zheng   isl_map *NewAccessRelation = Access.getNewAccessRelation();
4853b11a16aSHongbin Zheng 
4863b11a16aSHongbin Zheng   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
4873b11a16aSHongbin Zheng          && "Current and new access function use different spaces");
4883b11a16aSHongbin Zheng 
4893b11a16aSHongbin Zheng   Value *NewPointer;
4903b11a16aSHongbin Zheng 
4913b11a16aSHongbin Zheng   if (!NewAccessRelation) {
4923b11a16aSHongbin Zheng     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
4933b11a16aSHongbin Zheng   } else {
4943b11a16aSHongbin Zheng     Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
4953b11a16aSHongbin Zheng     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
4963b11a16aSHongbin Zheng                                      BBMap, GlobalMap);
4973b11a16aSHongbin Zheng   }
4983b11a16aSHongbin Zheng 
4993b11a16aSHongbin Zheng   isl_map_free(CurrentAccessRelation);
5003b11a16aSHongbin Zheng   isl_map_free(NewAccessRelation);
5013b11a16aSHongbin Zheng   return NewPointer;
5023b11a16aSHongbin Zheng }
5033b11a16aSHongbin Zheng 
5043b11a16aSHongbin Zheng Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
5053b11a16aSHongbin Zheng                                           ValueMapT &BBMap,
5063b11a16aSHongbin Zheng                                           ValueMapT &GlobalMap) {
5073b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
5083b11a16aSHongbin Zheng   const Instruction *Inst = dyn_cast<Instruction>(Load);
5093b11a16aSHongbin Zheng   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
5103b11a16aSHongbin Zheng   Value *ScalarLoad = Builder.CreateLoad(NewPointer,
5113b11a16aSHongbin Zheng                                          Load->getName() + "_p_scalar_");
5123b11a16aSHongbin Zheng   return ScalarLoad;
5133b11a16aSHongbin Zheng }
5143b11a16aSHongbin Zheng 
5153b11a16aSHongbin Zheng Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
5163b11a16aSHongbin Zheng                                            ValueMapT &BBMap,
5173b11a16aSHongbin Zheng                                            ValueMapT &GlobalMap) {
5183b11a16aSHongbin Zheng   const Value *Pointer = Store->getPointerOperand();
5193b11a16aSHongbin Zheng   Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
5203b11a16aSHongbin Zheng                                                GlobalMap);
5213b11a16aSHongbin Zheng   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
5223b11a16aSHongbin Zheng 
5233b11a16aSHongbin Zheng   return Builder.CreateStore(ValueOperand, NewPointer);
5243b11a16aSHongbin Zheng }
5253b11a16aSHongbin Zheng 
5263b11a16aSHongbin Zheng void BlockGenerator::copyInstruction(const Instruction *Inst,
5273b11a16aSHongbin Zheng                                      ValueMapT &BBMap, ValueMapT &GlobalMap) {
5283b11a16aSHongbin Zheng   // Terminator instructions control the control flow. They are explicitly
5293b11a16aSHongbin Zheng   // expressed in the clast and do not need to be copied.
5303b11a16aSHongbin Zheng   if (Inst->isTerminator())
5313b11a16aSHongbin Zheng     return;
5323b11a16aSHongbin Zheng 
533e71c6ab5STobias Grosser   if (isSCEVIgnore(Inst))
534e71c6ab5STobias Grosser     return;
535e71c6ab5STobias Grosser 
5363b11a16aSHongbin Zheng   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
5373b11a16aSHongbin Zheng     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
5383b11a16aSHongbin Zheng     return;
5393b11a16aSHongbin Zheng   }
5403b11a16aSHongbin Zheng 
5413b11a16aSHongbin Zheng   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
5423b11a16aSHongbin Zheng     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
5433b11a16aSHongbin Zheng     return;
5443b11a16aSHongbin Zheng   }
5453b11a16aSHongbin Zheng 
5463b11a16aSHongbin Zheng   copyInstScalar(Inst, BBMap, GlobalMap);
5473b11a16aSHongbin Zheng }
5483b11a16aSHongbin Zheng 
5493b11a16aSHongbin Zheng 
5503b11a16aSHongbin Zheng void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
5513b11a16aSHongbin Zheng   BasicBlock *BB = Statement.getBasicBlock();
5523b11a16aSHongbin Zheng   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
5533b11a16aSHongbin Zheng                                   Builder.GetInsertPoint(), P);
5543b11a16aSHongbin Zheng   CopyBB->setName("polly.stmt." + BB->getName());
5553b11a16aSHongbin Zheng   Builder.SetInsertPoint(CopyBB->begin());
5563b11a16aSHongbin Zheng 
5573b11a16aSHongbin Zheng   ValueMapT BBMap;
5583b11a16aSHongbin Zheng 
5593b11a16aSHongbin Zheng   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
5603b11a16aSHongbin Zheng        ++II)
5613b11a16aSHongbin Zheng       copyInstruction(II, BBMap, GlobalMap);
5623b11a16aSHongbin Zheng }
5633b11a16aSHongbin Zheng 
5643b11a16aSHongbin Zheng VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
565*a00a0291SSebastian Pop                                            VectorValueMapT &GlobalMaps,
566*a00a0291SSebastian Pop                                            ScopStmt &Stmt,
567*a00a0291SSebastian Pop                                            __isl_keep isl_map *Schedule,
568*a00a0291SSebastian Pop                                            Pass *P)
569*a00a0291SSebastian Pop   : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
5703b11a16aSHongbin Zheng   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
571*a00a0291SSebastian Pop   assert(Schedule && "No statement domain provided");
5723b11a16aSHongbin Zheng }
5733b11a16aSHongbin Zheng 
5743b11a16aSHongbin Zheng Value *VectorBlockGenerator::getVectorValue(const Value *Old,
5753b11a16aSHongbin Zheng                                             ValueMapT &VectorMap,
5763b11a16aSHongbin Zheng                                             VectorValueMapT &ScalarMaps) {
5773b11a16aSHongbin Zheng   if (VectorMap.count(Old))
5783b11a16aSHongbin Zheng     return VectorMap[Old];
5793b11a16aSHongbin Zheng 
5803b11a16aSHongbin Zheng   int Width = getVectorWidth();
5813b11a16aSHongbin Zheng 
5823b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
5833b11a16aSHongbin Zheng 
5843b11a16aSHongbin Zheng   for (int Lane = 0; Lane < Width; Lane++)
5853b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector,
5863b11a16aSHongbin Zheng                                          getNewValue(Old,
5873b11a16aSHongbin Zheng                                                      ScalarMaps[Lane],
5883b11a16aSHongbin Zheng                                                      GlobalMaps[Lane]),
5893b11a16aSHongbin Zheng                                          Builder.getInt32(Lane));
5903b11a16aSHongbin Zheng 
5913b11a16aSHongbin Zheng   VectorMap[Old] = Vector;
5923b11a16aSHongbin Zheng 
5933b11a16aSHongbin Zheng   return Vector;
5943b11a16aSHongbin Zheng }
5953b11a16aSHongbin Zheng 
5963b11a16aSHongbin Zheng Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
5973b11a16aSHongbin Zheng   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
5983b11a16aSHongbin Zheng   assert(PointerTy && "PointerType expected");
5993b11a16aSHongbin Zheng 
6003b11a16aSHongbin Zheng   Type *ScalarType = PointerTy->getElementType();
6013b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(ScalarType, Width);
6023b11a16aSHongbin Zheng 
6033b11a16aSHongbin Zheng   return PointerType::getUnqual(VectorType);
6043b11a16aSHongbin Zheng }
6053b11a16aSHongbin Zheng 
6063b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
6073b11a16aSHongbin Zheng                                                    ValueMapT &BBMap) {
6083b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6093b11a16aSHongbin Zheng   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
6103b11a16aSHongbin Zheng   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
6113b11a16aSHongbin Zheng   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
6123b11a16aSHongbin Zheng                                            "vector_ptr");
6133b11a16aSHongbin Zheng   LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
6143b11a16aSHongbin Zheng                                          Load->getName() + "_p_vec_full");
6153b11a16aSHongbin Zheng   if (!Aligned)
6163b11a16aSHongbin Zheng     VecLoad->setAlignment(8);
6173b11a16aSHongbin Zheng 
6183b11a16aSHongbin Zheng   return VecLoad;
6193b11a16aSHongbin Zheng }
6203b11a16aSHongbin Zheng 
6213b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
6223b11a16aSHongbin Zheng                                                     ValueMapT &BBMap) {
6233b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6243b11a16aSHongbin Zheng   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
6253b11a16aSHongbin Zheng   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
6263b11a16aSHongbin Zheng   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
6273b11a16aSHongbin Zheng                                            Load->getName() + "_p_vec_p");
6283b11a16aSHongbin Zheng   LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
6293b11a16aSHongbin Zheng                                            Load->getName() + "_p_splat_one");
6303b11a16aSHongbin Zheng 
6313b11a16aSHongbin Zheng   if (!Aligned)
6323b11a16aSHongbin Zheng     ScalarLoad->setAlignment(8);
6333b11a16aSHongbin Zheng 
6343b11a16aSHongbin Zheng   Constant *SplatVector =
6353b11a16aSHongbin Zheng     Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
6363b11a16aSHongbin Zheng                                            getVectorWidth()));
6373b11a16aSHongbin Zheng 
6383b11a16aSHongbin Zheng   Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
6393b11a16aSHongbin Zheng                                                   SplatVector,
6403b11a16aSHongbin Zheng                                                   Load->getName()
6413b11a16aSHongbin Zheng                                                   + "_p_splat");
6423b11a16aSHongbin Zheng   return VectorLoad;
6433b11a16aSHongbin Zheng }
6443b11a16aSHongbin Zheng 
6453b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
6463b11a16aSHongbin Zheng   VectorValueMapT &ScalarMaps) {
6473b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
6483b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6493b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(
6503b11a16aSHongbin Zheng     dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
6513b11a16aSHongbin Zheng 
6523b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType);
6533b11a16aSHongbin Zheng 
6543b11a16aSHongbin Zheng   for (int i = 0; i < VectorWidth; i++) {
6553b11a16aSHongbin Zheng     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
6563b11a16aSHongbin Zheng     Value *ScalarLoad = Builder.CreateLoad(NewPointer,
6573b11a16aSHongbin Zheng                                            Load->getName() + "_p_scalar_");
6583b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
6593b11a16aSHongbin Zheng                                          Builder.getInt32(i),
6603b11a16aSHongbin Zheng                                          Load->getName() + "_p_vec_");
6613b11a16aSHongbin Zheng   }
6623b11a16aSHongbin Zheng 
6633b11a16aSHongbin Zheng   return Vector;
6643b11a16aSHongbin Zheng }
6653b11a16aSHongbin Zheng 
6663b11a16aSHongbin Zheng void VectorBlockGenerator::generateLoad(const LoadInst *Load,
6673b11a16aSHongbin Zheng                                         ValueMapT &VectorMap,
6683b11a16aSHongbin Zheng                                         VectorValueMapT &ScalarMaps) {
66968794217SHongbin Zheng   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
67068794217SHongbin Zheng       !VectorType::isValidElementType(Load->getType())) {
6713b11a16aSHongbin Zheng     for (int i = 0; i < getVectorWidth(); i++)
6723b11a16aSHongbin Zheng       ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
6733b11a16aSHongbin Zheng                                                GlobalMaps[i]);
6743b11a16aSHongbin Zheng     return;
6753b11a16aSHongbin Zheng   }
6763b11a16aSHongbin Zheng 
6773b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Load);
6783b11a16aSHongbin Zheng 
6793b11a16aSHongbin Zheng   Value *NewLoad;
680*a00a0291SSebastian Pop   if (Access.isStrideZero(isl_map_copy(Schedule)))
6813b11a16aSHongbin Zheng     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
682*a00a0291SSebastian Pop   else if (Access.isStrideOne(isl_map_copy(Schedule)))
6833b11a16aSHongbin Zheng     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
6843b11a16aSHongbin Zheng   else
6853b11a16aSHongbin Zheng     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
6863b11a16aSHongbin Zheng 
6873b11a16aSHongbin Zheng   VectorMap[Load] = NewLoad;
6883b11a16aSHongbin Zheng }
6893b11a16aSHongbin Zheng 
6903b11a16aSHongbin Zheng void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
6913b11a16aSHongbin Zheng                                          ValueMapT &VectorMap,
6923b11a16aSHongbin Zheng                                          VectorValueMapT &ScalarMaps) {
6933b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
6943b11a16aSHongbin Zheng   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
6953b11a16aSHongbin Zheng                                      ScalarMaps);
6963b11a16aSHongbin Zheng 
6973b11a16aSHongbin Zheng   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
6983b11a16aSHongbin Zheng 
6993b11a16aSHongbin Zheng   const CastInst *Cast = dyn_cast<CastInst>(Inst);
7003b11a16aSHongbin Zheng   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
7013b11a16aSHongbin Zheng   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
7023b11a16aSHongbin Zheng }
7033b11a16aSHongbin Zheng 
7043b11a16aSHongbin Zheng void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
7053b11a16aSHongbin Zheng                                           ValueMapT &VectorMap,
7063b11a16aSHongbin Zheng                                           VectorValueMapT &ScalarMaps) {
7073b11a16aSHongbin Zheng   Value *OpZero = Inst->getOperand(0);
7083b11a16aSHongbin Zheng   Value *OpOne = Inst->getOperand(1);
7093b11a16aSHongbin Zheng 
7103b11a16aSHongbin Zheng   Value *NewOpZero, *NewOpOne;
7113b11a16aSHongbin Zheng   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
7123b11a16aSHongbin Zheng   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
7133b11a16aSHongbin Zheng 
7143b11a16aSHongbin Zheng   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero,
7153b11a16aSHongbin Zheng                                        NewOpOne,
7163b11a16aSHongbin Zheng                                        Inst->getName() + "p_vec");
7173b11a16aSHongbin Zheng   VectorMap[Inst] = NewInst;
7183b11a16aSHongbin Zheng }
7193b11a16aSHongbin Zheng 
7203b11a16aSHongbin Zheng void VectorBlockGenerator::copyStore(const StoreInst *Store,
7213b11a16aSHongbin Zheng                                      ValueMapT &VectorMap,
7223b11a16aSHongbin Zheng                                      VectorValueMapT &ScalarMaps) {
7233b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7243b11a16aSHongbin Zheng 
7253b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Store);
7263b11a16aSHongbin Zheng 
7273b11a16aSHongbin Zheng   const Value *Pointer = Store->getPointerOperand();
7283b11a16aSHongbin Zheng   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
7293b11a16aSHongbin Zheng                                    ScalarMaps);
7303b11a16aSHongbin Zheng 
731*a00a0291SSebastian Pop   if (Access.isStrideOne(isl_map_copy(Schedule))) {
7323b11a16aSHongbin Zheng     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
7333b11a16aSHongbin Zheng     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
7343b11a16aSHongbin Zheng 
7353b11a16aSHongbin Zheng     Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
7363b11a16aSHongbin Zheng                                              "vector_ptr");
7373b11a16aSHongbin Zheng     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
7383b11a16aSHongbin Zheng 
7393b11a16aSHongbin Zheng     if (!Aligned)
7403b11a16aSHongbin Zheng       Store->setAlignment(8);
7413b11a16aSHongbin Zheng   } else {
7423b11a16aSHongbin Zheng     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
7433b11a16aSHongbin Zheng       Value *Scalar = Builder.CreateExtractElement(Vector,
7443b11a16aSHongbin Zheng                                                    Builder.getInt32(i));
7453b11a16aSHongbin Zheng       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
7463b11a16aSHongbin Zheng       Builder.CreateStore(Scalar, NewPointer);
7473b11a16aSHongbin Zheng     }
7483b11a16aSHongbin Zheng   }
7493b11a16aSHongbin Zheng }
7503b11a16aSHongbin Zheng 
7513b11a16aSHongbin Zheng bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
7523b11a16aSHongbin Zheng                                              ValueMapT &VectorMap) {
7533b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
7543b11a16aSHongbin Zheng        OE = Inst->op_end(); OI != OE; ++OI)
7553b11a16aSHongbin Zheng     if (VectorMap.count(*OI))
7563b11a16aSHongbin Zheng       return true;
7573b11a16aSHongbin Zheng   return false;
7583b11a16aSHongbin Zheng }
7593b11a16aSHongbin Zheng 
7603b11a16aSHongbin Zheng bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
7613b11a16aSHongbin Zheng                                                ValueMapT &VectorMap,
7623b11a16aSHongbin Zheng                                                VectorValueMapT &ScalarMaps) {
7633b11a16aSHongbin Zheng   bool HasVectorOperand = false;
7643b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7653b11a16aSHongbin Zheng 
7663b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
7673b11a16aSHongbin Zheng        OE = Inst->op_end(); OI != OE; ++OI) {
7683b11a16aSHongbin Zheng     ValueMapT::iterator VecOp = VectorMap.find(*OI);
7693b11a16aSHongbin Zheng 
7703b11a16aSHongbin Zheng     if (VecOp == VectorMap.end())
7713b11a16aSHongbin Zheng       continue;
7723b11a16aSHongbin Zheng 
7733b11a16aSHongbin Zheng     HasVectorOperand = true;
7743b11a16aSHongbin Zheng     Value *NewVector = VecOp->second;
7753b11a16aSHongbin Zheng 
7763b11a16aSHongbin Zheng     for (int i = 0; i < VectorWidth; ++i) {
7773b11a16aSHongbin Zheng       ValueMapT &SM = ScalarMaps[i];
7783b11a16aSHongbin Zheng 
7793b11a16aSHongbin Zheng       // If there is one scalar extracted, all scalar elements should have
7803b11a16aSHongbin Zheng       // already been extracted by the code here. So no need to check for the
7813b11a16aSHongbin Zheng       // existance of all of them.
7823b11a16aSHongbin Zheng       if (SM.count(*OI))
7833b11a16aSHongbin Zheng         break;
7843b11a16aSHongbin Zheng 
7853b11a16aSHongbin Zheng       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
7863b11a16aSHongbin Zheng     }
7873b11a16aSHongbin Zheng   }
7883b11a16aSHongbin Zheng 
7893b11a16aSHongbin Zheng   return HasVectorOperand;
7903b11a16aSHongbin Zheng }
7913b11a16aSHongbin Zheng 
7923b11a16aSHongbin Zheng void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
7933b11a16aSHongbin Zheng                                               ValueMapT &VectorMap,
7943b11a16aSHongbin Zheng                                               VectorValueMapT &ScalarMaps) {
7953b11a16aSHongbin Zheng   bool HasVectorOperand;
7963b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7973b11a16aSHongbin Zheng 
7983b11a16aSHongbin Zheng   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
7993b11a16aSHongbin Zheng 
8003b11a16aSHongbin Zheng   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
8013b11a16aSHongbin Zheng     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
8023b11a16aSHongbin Zheng 
8033b11a16aSHongbin Zheng   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
8043b11a16aSHongbin Zheng     return;
8053b11a16aSHongbin Zheng 
8063b11a16aSHongbin Zheng   // Make the result available as vector value.
8073b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
8083b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType);
8093b11a16aSHongbin Zheng 
8103b11a16aSHongbin Zheng   for (int i = 0; i < VectorWidth; i++)
8113b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
8123b11a16aSHongbin Zheng                                          Builder.getInt32(i));
8133b11a16aSHongbin Zheng 
8143b11a16aSHongbin Zheng   VectorMap[Inst] = Vector;
8153b11a16aSHongbin Zheng }
8163b11a16aSHongbin Zheng 
8173b11a16aSHongbin Zheng int VectorBlockGenerator::getVectorWidth() {
8183b11a16aSHongbin Zheng   return GlobalMaps.size();
8193b11a16aSHongbin Zheng }
8203b11a16aSHongbin Zheng 
8213b11a16aSHongbin Zheng void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
8223b11a16aSHongbin Zheng                                            ValueMapT &VectorMap,
8233b11a16aSHongbin Zheng                                            VectorValueMapT &ScalarMaps) {
8243b11a16aSHongbin Zheng   // Terminator instructions control the control flow. They are explicitly
8253b11a16aSHongbin Zheng   // expressed in the clast and do not need to be copied.
8263b11a16aSHongbin Zheng   if (Inst->isTerminator())
8273b11a16aSHongbin Zheng     return;
8283b11a16aSHongbin Zheng 
829e71c6ab5STobias Grosser   if (isSCEVIgnore(Inst))
830e71c6ab5STobias Grosser     return;
831e71c6ab5STobias Grosser 
8323b11a16aSHongbin Zheng   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
8333b11a16aSHongbin Zheng     generateLoad(Load, VectorMap, ScalarMaps);
8343b11a16aSHongbin Zheng     return;
8353b11a16aSHongbin Zheng   }
8363b11a16aSHongbin Zheng 
8373b11a16aSHongbin Zheng   if (hasVectorOperands(Inst, VectorMap)) {
8383b11a16aSHongbin Zheng     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
8393b11a16aSHongbin Zheng       copyStore(Store, VectorMap, ScalarMaps);
8403b11a16aSHongbin Zheng       return;
8413b11a16aSHongbin Zheng     }
8423b11a16aSHongbin Zheng 
8433b11a16aSHongbin Zheng     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
8443b11a16aSHongbin Zheng       copyUnaryInst(Unary, VectorMap, ScalarMaps);
8453b11a16aSHongbin Zheng       return;
8463b11a16aSHongbin Zheng     }
8473b11a16aSHongbin Zheng 
8483b11a16aSHongbin Zheng     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
8493b11a16aSHongbin Zheng       copyBinaryInst(Binary, VectorMap, ScalarMaps);
8503b11a16aSHongbin Zheng       return;
8513b11a16aSHongbin Zheng     }
8523b11a16aSHongbin Zheng 
8533b11a16aSHongbin Zheng     // Falltrough: We generate scalar instructions, if we don't know how to
8543b11a16aSHongbin Zheng     // generate vector code.
8553b11a16aSHongbin Zheng   }
8563b11a16aSHongbin Zheng 
8573b11a16aSHongbin Zheng   copyInstScalarized(Inst, VectorMap, ScalarMaps);
8583b11a16aSHongbin Zheng }
8593b11a16aSHongbin Zheng 
8603b11a16aSHongbin Zheng void VectorBlockGenerator::copyBB() {
8613b11a16aSHongbin Zheng   BasicBlock *BB = Statement.getBasicBlock();
8623b11a16aSHongbin Zheng   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
8633b11a16aSHongbin Zheng                                   Builder.GetInsertPoint(), P);
8643b11a16aSHongbin Zheng   CopyBB->setName("polly.stmt." + BB->getName());
8653b11a16aSHongbin Zheng   Builder.SetInsertPoint(CopyBB->begin());
8663b11a16aSHongbin Zheng 
8673b11a16aSHongbin Zheng   // Create two maps that store the mapping from the original instructions of
8683b11a16aSHongbin Zheng   // the old basic block to their copies in the new basic block. Those maps
8693b11a16aSHongbin Zheng   // are basic block local.
8703b11a16aSHongbin Zheng   //
8713b11a16aSHongbin Zheng   // As vector code generation is supported there is one map for scalar values
8723b11a16aSHongbin Zheng   // and one for vector values.
8733b11a16aSHongbin Zheng   //
8743b11a16aSHongbin Zheng   // In case we just do scalar code generation, the vectorMap is not used and
8753b11a16aSHongbin Zheng   // the scalarMap has just one dimension, which contains the mapping.
8763b11a16aSHongbin Zheng   //
8773b11a16aSHongbin Zheng   // In case vector code generation is done, an instruction may either appear
8783b11a16aSHongbin Zheng   // in the vector map once (as it is calculating >vectorwidth< values at a
8793b11a16aSHongbin Zheng   // time. Or (if the values are calculated using scalar operations), it
8803b11a16aSHongbin Zheng   // appears once in every dimension of the scalarMap.
8813b11a16aSHongbin Zheng   VectorValueMapT ScalarBlockMap(getVectorWidth());
8823b11a16aSHongbin Zheng   ValueMapT VectorBlockMap;
8833b11a16aSHongbin Zheng 
8843b11a16aSHongbin Zheng   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
8853b11a16aSHongbin Zheng        II != IE; ++II)
8863b11a16aSHongbin Zheng       copyInstruction(II, VectorBlockMap, ScalarBlockMap);
8873b11a16aSHongbin Zheng }
888