13b11a16aSHongbin Zheng //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
23b11a16aSHongbin Zheng //
33b11a16aSHongbin Zheng //                     The LLVM Compiler Infrastructure
43b11a16aSHongbin Zheng //
53b11a16aSHongbin Zheng // This file is distributed under the University of Illinois Open Source
63b11a16aSHongbin Zheng // License. See LICENSE.TXT for details.
73b11a16aSHongbin Zheng //
83b11a16aSHongbin Zheng //===----------------------------------------------------------------------===//
93b11a16aSHongbin Zheng //
103b11a16aSHongbin Zheng // This file implements the BlockGenerator and VectorBlockGenerator classes,
113b11a16aSHongbin Zheng // which generate sequential code and vectorized code for a polyhedral
123b11a16aSHongbin Zheng // statement, respectively.
133b11a16aSHongbin Zheng //
143b11a16aSHongbin Zheng //===----------------------------------------------------------------------===//
153b11a16aSHongbin Zheng 
163b11a16aSHongbin Zheng #include "polly/ScopInfo.h"
1768794217SHongbin Zheng #include "polly/CodeGen/CodeGeneration.h"
188a846610SHongbin Zheng #include "polly/CodeGen/BlockGenerators.h"
193b11a16aSHongbin Zheng #include "polly/Support/GICHelper.h"
203b11a16aSHongbin Zheng 
21e71c6ab5STobias Grosser #include "llvm/Analysis/LoopInfo.h"
22e71c6ab5STobias Grosser #include "llvm/Analysis/ScalarEvolution.h"
23e71c6ab5STobias Grosser #include "llvm/Analysis/ScalarEvolutionExpander.h"
243b11a16aSHongbin Zheng #include "llvm/Transforms/Utils/BasicBlockUtils.h"
253b11a16aSHongbin Zheng #include "llvm/Support/CommandLine.h"
263b11a16aSHongbin Zheng 
273b11a16aSHongbin Zheng #include "isl/aff.h"
283b11a16aSHongbin Zheng #include "isl/set.h"
293b11a16aSHongbin Zheng 
303b11a16aSHongbin Zheng using namespace llvm;
313b11a16aSHongbin Zheng using namespace polly;
323b11a16aSHongbin Zheng 
333b11a16aSHongbin Zheng static cl::opt<bool>
343b11a16aSHongbin Zheng Aligned("enable-polly-aligned",
353b11a16aSHongbin Zheng        cl::desc("Assumed aligned memory accesses."), cl::Hidden,
363b11a16aSHongbin Zheng        cl::value_desc("OpenMP code generation enabled if true"),
373b11a16aSHongbin Zheng        cl::init(false), cl::ZeroOrMore);
383b11a16aSHongbin Zheng 
393b11a16aSHongbin Zheng static cl::opt<bool>
40e71c6ab5STobias Grosser SCEVCodegen("polly-codegen-scev",
41e71c6ab5STobias Grosser             cl::desc("Use SCEV based code generation."), cl::Hidden,
42e71c6ab5STobias Grosser             cl::init(false), cl::ZeroOrMore);
43e71c6ab5STobias Grosser 
44e71c6ab5STobias Grosser /// The SCEVRewriter takes a scalar evolution expression and updates the
45e71c6ab5STobias Grosser /// following components:
46e71c6ab5STobias Grosser ///
47e71c6ab5STobias Grosser /// - SCEVUnknown
48e71c6ab5STobias Grosser ///
49e71c6ab5STobias Grosser ///   Values referenced in SCEVUnknown subexpressions are looked up in
50e71c6ab5STobias Grosser ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
51e71c6ab5STobias Grosser ///   replaced by a reference to the value they map to.
52e71c6ab5STobias Grosser ///
53e71c6ab5STobias Grosser /// - SCEVAddRecExpr
54e71c6ab5STobias Grosser ///
55e71c6ab5STobias Grosser ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
56e71c6ab5STobias Grosser ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
57e71c6ab5STobias Grosser ///   AddRecExpr's with more than two operands can not be translated.
58e71c6ab5STobias Grosser ///
59e71c6ab5STobias Grosser ///   FIXME: The comment above is not yet reality. At the moment we derive
60e71c6ab5STobias Grosser ///   %Value by looking up the canonical IV of the loop and by defining
61e71c6ab5STobias Grosser ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
62e71c6ab5STobias Grosser ///   canonical induction variables.
63e71c6ab5STobias Grosser ///
64e71c6ab5STobias Grosser ///
65e71c6ab5STobias Grosser /// How can this be used?
66e71c6ab5STobias Grosser /// ====================
67e71c6ab5STobias Grosser ///
68e71c6ab5STobias Grosser /// SCEVRewrite based code generation works on virtually independent blocks.
69e71c6ab5STobias Grosser /// This means we do not run the independent blocks pass to rewrite scalar
70e71c6ab5STobias Grosser /// instructions, but just ignore instructions that we can analyze with scalar
71e71c6ab5STobias Grosser /// evolution. Virtually independent blocks are blocks that only reference the
72e71c6ab5STobias Grosser /// following values:
73e71c6ab5STobias Grosser ///
74e71c6ab5STobias Grosser /// o Values calculated within a basic block
75e71c6ab5STobias Grosser /// o Values representable by SCEV
76e71c6ab5STobias Grosser ///
77e71c6ab5STobias Grosser /// During code generation we can ignore all instructions:
78e71c6ab5STobias Grosser ///
79e71c6ab5STobias Grosser /// - Ignore all instructions except:
80e71c6ab5STobias Grosser ///   - Load instructions
81e71c6ab5STobias Grosser ///   - Instructions that reference operands already calculated within the
82e71c6ab5STobias Grosser ///     basic block.
83e71c6ab5STobias Grosser ///   - Store instructions
84e71c6ab5STobias Grosser struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV*> {
85e71c6ab5STobias Grosser public:
86e71c6ab5STobias Grosser   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
87e71c6ab5STobias Grosser                              ValueMapT &GlobalMap, ValueMapT &BBMap) {
88e71c6ab5STobias Grosser     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
89e71c6ab5STobias Grosser     return Rewriter.visit(scev);
90e71c6ab5STobias Grosser   }
91e71c6ab5STobias Grosser 
92e71c6ab5STobias Grosser   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
93e71c6ab5STobias Grosser                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
94e71c6ab5STobias Grosser                BBMap(BBMap) {}
95e71c6ab5STobias Grosser 
96e71c6ab5STobias Grosser   const SCEV *visit(const SCEV *Expr) {
97e71c6ab5STobias Grosser     // FIXME: The parameter handling is incorrect.
98e71c6ab5STobias Grosser     //
99e71c6ab5STobias Grosser     // Polly does only detect parameters in Access function and loop iteration
100e71c6ab5STobias Grosser     // counters, but it does not get parameters that are just used by
101e71c6ab5STobias Grosser     // instructions within the basic block.
102e71c6ab5STobias Grosser     //
103e71c6ab5STobias Grosser     // There are two options to solve this:
104e71c6ab5STobias Grosser     //  o Iterate over all instructions of the SCoP and find the actual
105e71c6ab5STobias Grosser     //    parameters.
106e71c6ab5STobias Grosser     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
107e71c6ab5STobias Grosser     //    and detect parameters on the fly.
108e71c6ab5STobias Grosser     //
109e71c6ab5STobias Grosser     // This is especially important for OpenMP and GPGPU code generation, as
110e71c6ab5STobias Grosser     // they require us to detect and possibly rewrite the corresponding
111e71c6ab5STobias Grosser     // parameters.
112e71c6ab5STobias Grosser     if (isl_id *Id = S.getIdForParam(Expr)) {
113e71c6ab5STobias Grosser       isl_id_free(Id);
114e71c6ab5STobias Grosser       return Expr;
115e71c6ab5STobias Grosser     }
116e71c6ab5STobias Grosser 
117e71c6ab5STobias Grosser 
118e71c6ab5STobias Grosser     return SCEVVisitor<SCEVRewriter, const SCEV*>::visit(Expr);
119e71c6ab5STobias Grosser   }
120e71c6ab5STobias Grosser 
121e71c6ab5STobias Grosser   const SCEV *visitConstant(const SCEVConstant *Constant) {
122e71c6ab5STobias Grosser     return Constant;
123e71c6ab5STobias Grosser   }
124e71c6ab5STobias Grosser 
125e71c6ab5STobias Grosser   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
126e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
127e71c6ab5STobias Grosser     return SE.getTruncateExpr(Operand, Expr->getType());
128e71c6ab5STobias Grosser   }
129e71c6ab5STobias Grosser 
130e71c6ab5STobias Grosser   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
131e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
132e71c6ab5STobias Grosser     return SE.getZeroExtendExpr(Operand, Expr->getType());
133e71c6ab5STobias Grosser   }
134e71c6ab5STobias Grosser 
135e71c6ab5STobias Grosser   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
136e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
137e71c6ab5STobias Grosser     return SE.getSignExtendExpr(Operand, Expr->getType());
138e71c6ab5STobias Grosser   }
139e71c6ab5STobias Grosser 
140e71c6ab5STobias Grosser   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
141e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
142e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
143e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
144e71c6ab5STobias Grosser       Operands.push_back(Operand);
145e71c6ab5STobias Grosser     }
146e71c6ab5STobias Grosser 
147e71c6ab5STobias Grosser     return SE.getAddExpr(Operands);
148e71c6ab5STobias Grosser   }
149e71c6ab5STobias Grosser 
150e71c6ab5STobias Grosser   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
151e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
152e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
153e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
154e71c6ab5STobias Grosser       Operands.push_back(Operand);
155e71c6ab5STobias Grosser     }
156e71c6ab5STobias Grosser 
157e71c6ab5STobias Grosser     return SE.getMulExpr(Operands);
158e71c6ab5STobias Grosser   }
159e71c6ab5STobias Grosser 
160e71c6ab5STobias Grosser   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
161e71c6ab5STobias Grosser     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
162e71c6ab5STobias Grosser   }
163e71c6ab5STobias Grosser 
164e71c6ab5STobias Grosser   // Return a new induction variable if the loop is within the original SCoP
165e71c6ab5STobias Grosser   // or NULL otherwise.
166e71c6ab5STobias Grosser   Value *getNewIV(const Loop *L) {
167e71c6ab5STobias Grosser     Value *IV = L->getCanonicalInductionVariable();
168e71c6ab5STobias Grosser     if (!IV)
169e71c6ab5STobias Grosser       return NULL;
170e71c6ab5STobias Grosser 
171e71c6ab5STobias Grosser     ValueMapT::iterator NewIV = GlobalMap.find(IV);
172e71c6ab5STobias Grosser 
173e71c6ab5STobias Grosser     if (NewIV == GlobalMap.end())
174e71c6ab5STobias Grosser       return NULL;
175e71c6ab5STobias Grosser 
176e71c6ab5STobias Grosser     return NewIV->second;
177e71c6ab5STobias Grosser   }
178e71c6ab5STobias Grosser 
179e71c6ab5STobias Grosser   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
180e71c6ab5STobias Grosser     Value *IV;
181e71c6ab5STobias Grosser 
182e71c6ab5STobias Grosser     IV = getNewIV(Expr->getLoop());
183e71c6ab5STobias Grosser 
184e71c6ab5STobias Grosser     // The IV is not within the GlobalMaps. So do not rewrite it and also do
185e71c6ab5STobias Grosser     // not rewrite any descendants.
186e71c6ab5STobias Grosser     if (!IV)
187e71c6ab5STobias Grosser       return Expr;
188e71c6ab5STobias Grosser 
189*ae2d83ecSTobias Grosser     assert(Expr->getNumOperands() == 2 &&
190*ae2d83ecSTobias Grosser            "An AddRecExpr with more than two operands can not be rewritten.");
191e71c6ab5STobias Grosser 
192e71c6ab5STobias Grosser     const SCEV *Base, *Step, *IVExpr, *Product;
193e71c6ab5STobias Grosser 
194e71c6ab5STobias Grosser     Base = visit(Expr->getStart());
195e71c6ab5STobias Grosser     Step = visit(Expr->getOperand(1));
196e71c6ab5STobias Grosser     IVExpr = SE.getUnknown(IV);
197e71c6ab5STobias Grosser     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
198e71c6ab5STobias Grosser     Product = SE.getMulExpr(Step, IVExpr);
199e71c6ab5STobias Grosser 
200e71c6ab5STobias Grosser     return SE.getAddExpr(Base, Product);
201e71c6ab5STobias Grosser   }
202e71c6ab5STobias Grosser 
203e71c6ab5STobias Grosser   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
204e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
205e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
206e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
207e71c6ab5STobias Grosser       Operands.push_back(Operand);
208e71c6ab5STobias Grosser     }
209e71c6ab5STobias Grosser 
210e71c6ab5STobias Grosser     return SE.getSMaxExpr(Operands);
211e71c6ab5STobias Grosser   }
212e71c6ab5STobias Grosser 
213e71c6ab5STobias Grosser   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
214e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
215e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
216e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
217e71c6ab5STobias Grosser       Operands.push_back(Operand);
218e71c6ab5STobias Grosser     }
219e71c6ab5STobias Grosser 
220e71c6ab5STobias Grosser     return SE.getUMaxExpr(Operands);
221e71c6ab5STobias Grosser   }
222e71c6ab5STobias Grosser 
223e71c6ab5STobias Grosser   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
224e71c6ab5STobias Grosser     Value *V = Expr->getValue();
225e71c6ab5STobias Grosser 
226e71c6ab5STobias Grosser     if (GlobalMap.count(V))
227e71c6ab5STobias Grosser       return SE.getUnknown(GlobalMap[V]);
228e71c6ab5STobias Grosser 
229e71c6ab5STobias Grosser     if (BBMap.count(V))
230e71c6ab5STobias Grosser       return SE.getUnknown(BBMap[V]);
231e71c6ab5STobias Grosser 
232e71c6ab5STobias Grosser     return Expr;
233e71c6ab5STobias Grosser   }
234e71c6ab5STobias Grosser 
235e71c6ab5STobias Grosser private:
236e71c6ab5STobias Grosser   Scop &S;
237e71c6ab5STobias Grosser   ScalarEvolution &SE;
238e71c6ab5STobias Grosser   ValueMapT &GlobalMap;
239e71c6ab5STobias Grosser   ValueMapT &BBMap;
240e71c6ab5STobias Grosser };
241e71c6ab5STobias Grosser 
2423b11a16aSHongbin Zheng // Helper class to generate memory location.
2433b11a16aSHongbin Zheng namespace {
2443b11a16aSHongbin Zheng class IslGenerator {
2453b11a16aSHongbin Zheng public:
2463b11a16aSHongbin Zheng   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
2473b11a16aSHongbin Zheng     Builder(Builder), IVS(IVS) {}
2483b11a16aSHongbin Zheng   Value *generateIslInt(__isl_take isl_int Int);
2493b11a16aSHongbin Zheng   Value *generateIslAff(__isl_take isl_aff *Aff);
2503b11a16aSHongbin Zheng   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
2513b11a16aSHongbin Zheng 
2523b11a16aSHongbin Zheng private:
2533b11a16aSHongbin Zheng   typedef struct {
2543b11a16aSHongbin Zheng     Value *Result;
2553b11a16aSHongbin Zheng     class IslGenerator *Generator;
2563b11a16aSHongbin Zheng   } IslGenInfo;
2573b11a16aSHongbin Zheng 
2583b11a16aSHongbin Zheng   IRBuilder<> &Builder;
2593b11a16aSHongbin Zheng   std::vector<Value *> &IVS;
2601bb59b0dSTobias Grosser   static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff,
2611bb59b0dSTobias Grosser                                void *User);
2623b11a16aSHongbin Zheng };
2633b11a16aSHongbin Zheng }
2643b11a16aSHongbin Zheng 
2653b11a16aSHongbin Zheng Value *IslGenerator::generateIslInt(isl_int Int) {
2663b11a16aSHongbin Zheng   mpz_t IntMPZ;
2673b11a16aSHongbin Zheng   mpz_init(IntMPZ);
2683b11a16aSHongbin Zheng   isl_int_get_gmp(Int, IntMPZ);
2693b11a16aSHongbin Zheng   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
2703b11a16aSHongbin Zheng   mpz_clear(IntMPZ);
2713b11a16aSHongbin Zheng   return IntValue;
2723b11a16aSHongbin Zheng }
2733b11a16aSHongbin Zheng 
2743b11a16aSHongbin Zheng Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
2753b11a16aSHongbin Zheng   Value *Result;
2763b11a16aSHongbin Zheng   Value *ConstValue;
2773b11a16aSHongbin Zheng   isl_int ConstIsl;
2783b11a16aSHongbin Zheng 
2793b11a16aSHongbin Zheng   isl_int_init(ConstIsl);
2803b11a16aSHongbin Zheng   isl_aff_get_constant(Aff, &ConstIsl);
2813b11a16aSHongbin Zheng   ConstValue = generateIslInt(ConstIsl);
2823b11a16aSHongbin Zheng   Type *Ty = Builder.getInt64Ty();
2833b11a16aSHongbin Zheng 
2843b11a16aSHongbin Zheng   // FIXME: We should give the constant and coefficients the right type. Here
2853b11a16aSHongbin Zheng   // we force it into i64.
2863b11a16aSHongbin Zheng   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
2873b11a16aSHongbin Zheng 
2883b11a16aSHongbin Zheng   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
2893b11a16aSHongbin Zheng 
2903b11a16aSHongbin Zheng   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
2913b11a16aSHongbin Zheng          "must match the dimension of the affine space.");
2923b11a16aSHongbin Zheng 
2933b11a16aSHongbin Zheng   isl_int CoefficientIsl;
2943b11a16aSHongbin Zheng   isl_int_init(CoefficientIsl);
2953b11a16aSHongbin Zheng 
2963b11a16aSHongbin Zheng   for (unsigned int i = 0; i < NbInputDims; ++i) {
2973b11a16aSHongbin Zheng     Value *CoefficientValue;
2983b11a16aSHongbin Zheng     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
2993b11a16aSHongbin Zheng 
3003b11a16aSHongbin Zheng     if (isl_int_is_zero(CoefficientIsl))
3013b11a16aSHongbin Zheng       continue;
3023b11a16aSHongbin Zheng 
3033b11a16aSHongbin Zheng     CoefficientValue = generateIslInt(CoefficientIsl);
3043b11a16aSHongbin Zheng     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
3053b11a16aSHongbin Zheng     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
3063b11a16aSHongbin Zheng     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
3073b11a16aSHongbin Zheng     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
3083b11a16aSHongbin Zheng   }
3093b11a16aSHongbin Zheng 
3103b11a16aSHongbin Zheng   isl_int_clear(CoefficientIsl);
3113b11a16aSHongbin Zheng   isl_int_clear(ConstIsl);
3123b11a16aSHongbin Zheng   isl_aff_free(Aff);
3133b11a16aSHongbin Zheng 
3143b11a16aSHongbin Zheng   return Result;
3153b11a16aSHongbin Zheng }
3163b11a16aSHongbin Zheng 
3173b11a16aSHongbin Zheng int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
3183b11a16aSHongbin Zheng                                     __isl_take isl_aff *Aff, void *User) {
3193b11a16aSHongbin Zheng   IslGenInfo *GenInfo = (IslGenInfo *)User;
3203b11a16aSHongbin Zheng 
3213b11a16aSHongbin Zheng   assert((GenInfo->Result == NULL) && "Result is already set."
3223b11a16aSHongbin Zheng          "Currently only single isl_aff is supported");
3233b11a16aSHongbin Zheng   assert(isl_set_plain_is_universe(Set)
3243b11a16aSHongbin Zheng          && "Code generation failed because the set is not universe");
3253b11a16aSHongbin Zheng 
3263b11a16aSHongbin Zheng   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
3273b11a16aSHongbin Zheng 
3283b11a16aSHongbin Zheng   isl_set_free(Set);
3293b11a16aSHongbin Zheng   return 0;
3303b11a16aSHongbin Zheng }
3313b11a16aSHongbin Zheng 
3323b11a16aSHongbin Zheng Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
3333b11a16aSHongbin Zheng   IslGenInfo User;
3343b11a16aSHongbin Zheng   User.Result = NULL;
3353b11a16aSHongbin Zheng   User.Generator = this;
3363b11a16aSHongbin Zheng   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
3373b11a16aSHongbin Zheng   assert(User.Result && "Code generation for isl_pw_aff failed");
3383b11a16aSHongbin Zheng 
3393b11a16aSHongbin Zheng   isl_pw_aff_free(PwAff);
3403b11a16aSHongbin Zheng   return User.Result;
3413b11a16aSHongbin Zheng }
3423b11a16aSHongbin Zheng 
3433b11a16aSHongbin Zheng 
3443b11a16aSHongbin Zheng BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
345e71c6ab5STobias Grosser   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
346e71c6ab5STobias Grosser 
347e71c6ab5STobias Grosser bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
348e71c6ab5STobias Grosser   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
349e71c6ab5STobias Grosser     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
350e71c6ab5STobias Grosser       if (!isa<SCEVCouldNotCompute>(Scev)) {
351e71c6ab5STobias Grosser         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
352e71c6ab5STobias Grosser           if (Unknown->getValue() != Inst)
353e71c6ab5STobias Grosser             return true;
354e71c6ab5STobias Grosser         } else {
355e71c6ab5STobias Grosser           return true;
356e71c6ab5STobias Grosser         }
357e71c6ab5STobias Grosser       }
358e71c6ab5STobias Grosser 
359e71c6ab5STobias Grosser   return false;
360e71c6ab5STobias Grosser }
3613b11a16aSHongbin Zheng 
3623b11a16aSHongbin Zheng Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
3633b11a16aSHongbin Zheng                                    ValueMapT &GlobalMap) {
3643b11a16aSHongbin Zheng   // We assume constants never change.
3653b11a16aSHongbin Zheng   // This avoids map lookups for many calls to this function.
3663b11a16aSHongbin Zheng   if (isa<Constant>(Old))
3673b11a16aSHongbin Zheng     return const_cast<Value*>(Old);
3683b11a16aSHongbin Zheng 
3693b11a16aSHongbin Zheng   if (GlobalMap.count(Old)) {
3703b11a16aSHongbin Zheng     Value *New = GlobalMap[Old];
3713b11a16aSHongbin Zheng 
3723b11a16aSHongbin Zheng     if (Old->getType()->getScalarSizeInBits()
3733b11a16aSHongbin Zheng         < New->getType()->getScalarSizeInBits())
3743b11a16aSHongbin Zheng       New = Builder.CreateTruncOrBitCast(New, Old->getType());
3753b11a16aSHongbin Zheng 
3763b11a16aSHongbin Zheng     return New;
3773b11a16aSHongbin Zheng   }
3783b11a16aSHongbin Zheng 
3793b11a16aSHongbin Zheng   if (BBMap.count(Old)) {
3803b11a16aSHongbin Zheng     return BBMap[Old];
3813b11a16aSHongbin Zheng   }
3823b11a16aSHongbin Zheng 
383e71c6ab5STobias Grosser   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
384e71c6ab5STobias Grosser     if (const SCEV *Scev = SE.getSCEV(const_cast<Value*>(Old)))
385e71c6ab5STobias Grosser       if (!isa<SCEVCouldNotCompute>(Scev)) {
386e71c6ab5STobias Grosser         const SCEV *NewScev = SCEVRewriter::rewrite(Scev,
387e71c6ab5STobias Grosser                                                     *Statement.getParent(), SE,
388e71c6ab5STobias Grosser                                                     GlobalMap, BBMap);
389e71c6ab5STobias Grosser         SCEVExpander Expander(SE, "polly");
390e71c6ab5STobias Grosser         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
391e71c6ab5STobias Grosser                                                  Builder.GetInsertPoint());
392e71c6ab5STobias Grosser 
393e71c6ab5STobias Grosser         BBMap[Old] = Expanded;
394e71c6ab5STobias Grosser         return Expanded;
395e71c6ab5STobias Grosser       }
396e71c6ab5STobias Grosser 
3973b11a16aSHongbin Zheng   // 'Old' is within the original SCoP, but was not rewritten.
3983b11a16aSHongbin Zheng   //
3993b11a16aSHongbin Zheng   // Such values appear, if they only calculate information already available in
4003b11a16aSHongbin Zheng   // the polyhedral description (e.g.  an induction variable increment). They
4013b11a16aSHongbin Zheng   // can be safely ignored.
4023b11a16aSHongbin Zheng   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
4033b11a16aSHongbin Zheng     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
4043b11a16aSHongbin Zheng       return NULL;
4053b11a16aSHongbin Zheng 
4063b11a16aSHongbin Zheng   // Everything else is probably a scop-constant value defined as global,
4073b11a16aSHongbin Zheng   // function parameter or an instruction not within the scop.
4083b11a16aSHongbin Zheng   return const_cast<Value*>(Old);
4093b11a16aSHongbin Zheng }
4103b11a16aSHongbin Zheng 
4113b11a16aSHongbin Zheng void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
4123b11a16aSHongbin Zheng                                     ValueMapT &GlobalMap) {
4133b11a16aSHongbin Zheng   Instruction *NewInst = Inst->clone();
4143b11a16aSHongbin Zheng 
4153b11a16aSHongbin Zheng   // Replace old operands with the new ones.
4163b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
4173b11a16aSHongbin Zheng        OE = Inst->op_end(); OI != OE; ++OI) {
4183b11a16aSHongbin Zheng     Value *OldOperand = *OI;
4193b11a16aSHongbin Zheng     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap);
4203b11a16aSHongbin Zheng 
4213b11a16aSHongbin Zheng     if (!NewOperand) {
4223b11a16aSHongbin Zheng       assert(!isa<StoreInst>(NewInst)
4233b11a16aSHongbin Zheng              && "Store instructions are always needed!");
4243b11a16aSHongbin Zheng       delete NewInst;
4253b11a16aSHongbin Zheng       return;
4263b11a16aSHongbin Zheng     }
4273b11a16aSHongbin Zheng 
4283b11a16aSHongbin Zheng     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
4293b11a16aSHongbin Zheng   }
4303b11a16aSHongbin Zheng 
4313b11a16aSHongbin Zheng   Builder.Insert(NewInst);
4323b11a16aSHongbin Zheng   BBMap[Inst] = NewInst;
4333b11a16aSHongbin Zheng 
4343b11a16aSHongbin Zheng   if (!NewInst->getType()->isVoidTy())
4353b11a16aSHongbin Zheng     NewInst->setName("p_" + Inst->getName());
4363b11a16aSHongbin Zheng }
4373b11a16aSHongbin Zheng 
4383b11a16aSHongbin Zheng std::vector<Value*> BlockGenerator::getMemoryAccessIndex(
4393b11a16aSHongbin Zheng   __isl_keep isl_map *AccessRelation, Value *BaseAddress,
4403b11a16aSHongbin Zheng   ValueMapT &BBMap, ValueMapT &GlobalMap) {
4413b11a16aSHongbin Zheng 
442*ae2d83ecSTobias Grosser   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
443*ae2d83ecSTobias Grosser          "Only single dimensional access functions supported");
4443b11a16aSHongbin Zheng 
4453b11a16aSHongbin Zheng   std::vector<Value *> IVS;
4463b11a16aSHongbin Zheng   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
4473b11a16aSHongbin Zheng     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
4483b11a16aSHongbin Zheng     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap);
4493b11a16aSHongbin Zheng     IVS.push_back(NewIV);
4503b11a16aSHongbin Zheng   }
4513b11a16aSHongbin Zheng 
4523b11a16aSHongbin Zheng   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
4533b11a16aSHongbin Zheng   IslGenerator IslGen(Builder, IVS);
4543b11a16aSHongbin Zheng   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
4553b11a16aSHongbin Zheng 
4563b11a16aSHongbin Zheng   Type *Ty = Builder.getInt64Ty();
4573b11a16aSHongbin Zheng   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
4583b11a16aSHongbin Zheng 
4593b11a16aSHongbin Zheng   std::vector<Value*> IndexArray;
4603b11a16aSHongbin Zheng   Value *NullValue = Constant::getNullValue(Ty);
4613b11a16aSHongbin Zheng   IndexArray.push_back(NullValue);
4623b11a16aSHongbin Zheng   IndexArray.push_back(OffsetValue);
4633b11a16aSHongbin Zheng   return IndexArray;
4643b11a16aSHongbin Zheng }
4653b11a16aSHongbin Zheng 
4663b11a16aSHongbin Zheng Value *BlockGenerator::getNewAccessOperand(
4673b11a16aSHongbin Zheng   __isl_keep isl_map *NewAccessRelation, Value *BaseAddress,
4683b11a16aSHongbin Zheng   ValueMapT &BBMap, ValueMapT &GlobalMap) {
4693b11a16aSHongbin Zheng   std::vector<Value*> IndexArray = getMemoryAccessIndex(NewAccessRelation,
4703b11a16aSHongbin Zheng                                                         BaseAddress,
4713b11a16aSHongbin Zheng                                                         BBMap, GlobalMap);
4723b11a16aSHongbin Zheng   Value *NewOperand = Builder.CreateGEP(BaseAddress, IndexArray,
4733b11a16aSHongbin Zheng                                         "p_newarrayidx_");
4743b11a16aSHongbin Zheng   return NewOperand;
4753b11a16aSHongbin Zheng }
4763b11a16aSHongbin Zheng 
4773b11a16aSHongbin Zheng Value *BlockGenerator::generateLocationAccessed(const Instruction *Inst,
4783b11a16aSHongbin Zheng                                                 const Value *Pointer,
4793b11a16aSHongbin Zheng                                                 ValueMapT &BBMap,
4803b11a16aSHongbin Zheng                                                 ValueMapT &GlobalMap) {
4813b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Inst);
4823b11a16aSHongbin Zheng   isl_map *CurrentAccessRelation = Access.getAccessRelation();
4833b11a16aSHongbin Zheng   isl_map *NewAccessRelation = Access.getNewAccessRelation();
4843b11a16aSHongbin Zheng 
485*ae2d83ecSTobias Grosser   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) &&
486*ae2d83ecSTobias Grosser          "Current and new access function use different spaces");
4873b11a16aSHongbin Zheng 
4883b11a16aSHongbin Zheng   Value *NewPointer;
4893b11a16aSHongbin Zheng 
4903b11a16aSHongbin Zheng   if (!NewAccessRelation) {
4913b11a16aSHongbin Zheng     NewPointer = getNewValue(Pointer, BBMap, GlobalMap);
4923b11a16aSHongbin Zheng   } else {
4933b11a16aSHongbin Zheng     Value *BaseAddress = const_cast<Value*>(Access.getBaseAddr());
4943b11a16aSHongbin Zheng     NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddress,
4953b11a16aSHongbin Zheng                                      BBMap, GlobalMap);
4963b11a16aSHongbin Zheng   }
4973b11a16aSHongbin Zheng 
4983b11a16aSHongbin Zheng   isl_map_free(CurrentAccessRelation);
4993b11a16aSHongbin Zheng   isl_map_free(NewAccessRelation);
5003b11a16aSHongbin Zheng   return NewPointer;
5013b11a16aSHongbin Zheng }
5023b11a16aSHongbin Zheng 
5033b11a16aSHongbin Zheng Value *BlockGenerator::generateScalarLoad(const LoadInst *Load,
5043b11a16aSHongbin Zheng                                           ValueMapT &BBMap,
5053b11a16aSHongbin Zheng                                           ValueMapT &GlobalMap) {
5063b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
5073b11a16aSHongbin Zheng   const Instruction *Inst = dyn_cast<Instruction>(Load);
5083b11a16aSHongbin Zheng   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap);
5093b11a16aSHongbin Zheng   Value *ScalarLoad = Builder.CreateLoad(NewPointer,
5103b11a16aSHongbin Zheng                                          Load->getName() + "_p_scalar_");
5113b11a16aSHongbin Zheng   return ScalarLoad;
5123b11a16aSHongbin Zheng }
5133b11a16aSHongbin Zheng 
5143b11a16aSHongbin Zheng Value *BlockGenerator::generateScalarStore(const StoreInst *Store,
5153b11a16aSHongbin Zheng                                            ValueMapT &BBMap,
5163b11a16aSHongbin Zheng                                            ValueMapT &GlobalMap) {
5173b11a16aSHongbin Zheng   const Value *Pointer = Store->getPointerOperand();
5183b11a16aSHongbin Zheng   Value *NewPointer = generateLocationAccessed(Store, Pointer, BBMap,
5193b11a16aSHongbin Zheng                                                GlobalMap);
5203b11a16aSHongbin Zheng   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap);
5213b11a16aSHongbin Zheng 
5223b11a16aSHongbin Zheng   return Builder.CreateStore(ValueOperand, NewPointer);
5233b11a16aSHongbin Zheng }
5243b11a16aSHongbin Zheng 
5251bb59b0dSTobias Grosser void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
5261bb59b0dSTobias Grosser                                      ValueMapT &GlobalMap) {
5273b11a16aSHongbin Zheng   // Terminator instructions control the control flow. They are explicitly
5283b11a16aSHongbin Zheng   // expressed in the clast and do not need to be copied.
5293b11a16aSHongbin Zheng   if (Inst->isTerminator())
5303b11a16aSHongbin Zheng     return;
5313b11a16aSHongbin Zheng 
532e71c6ab5STobias Grosser   if (isSCEVIgnore(Inst))
533e71c6ab5STobias Grosser     return;
534e71c6ab5STobias Grosser 
5353b11a16aSHongbin Zheng   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
5363b11a16aSHongbin Zheng     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap);
5373b11a16aSHongbin Zheng     return;
5383b11a16aSHongbin Zheng   }
5393b11a16aSHongbin Zheng 
5403b11a16aSHongbin Zheng   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
5413b11a16aSHongbin Zheng     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap);
5423b11a16aSHongbin Zheng     return;
5433b11a16aSHongbin Zheng   }
5443b11a16aSHongbin Zheng 
5453b11a16aSHongbin Zheng   copyInstScalar(Inst, BBMap, GlobalMap);
5463b11a16aSHongbin Zheng }
5473b11a16aSHongbin Zheng 
5483b11a16aSHongbin Zheng void BlockGenerator::copyBB(ValueMapT &GlobalMap) {
5493b11a16aSHongbin Zheng   BasicBlock *BB = Statement.getBasicBlock();
5503b11a16aSHongbin Zheng   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
5513b11a16aSHongbin Zheng                                   Builder.GetInsertPoint(), P);
5523b11a16aSHongbin Zheng   CopyBB->setName("polly.stmt." + BB->getName());
5533b11a16aSHongbin Zheng   Builder.SetInsertPoint(CopyBB->begin());
5543b11a16aSHongbin Zheng 
5553b11a16aSHongbin Zheng   ValueMapT BBMap;
5563b11a16aSHongbin Zheng 
5573b11a16aSHongbin Zheng   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
5583b11a16aSHongbin Zheng        ++II)
5593b11a16aSHongbin Zheng       copyInstruction(II, BBMap, GlobalMap);
5603b11a16aSHongbin Zheng }
5613b11a16aSHongbin Zheng 
5623b11a16aSHongbin Zheng VectorBlockGenerator::VectorBlockGenerator(IRBuilder<> &B,
563a00a0291SSebastian Pop                                            VectorValueMapT &GlobalMaps,
564a00a0291SSebastian Pop                                            ScopStmt &Stmt,
565a00a0291SSebastian Pop                                            __isl_keep isl_map *Schedule,
566a00a0291SSebastian Pop                                            Pass *P)
567a00a0291SSebastian Pop   : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), Schedule(Schedule) {
5683b11a16aSHongbin Zheng   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
569a00a0291SSebastian Pop   assert(Schedule && "No statement domain provided");
5703b11a16aSHongbin Zheng }
5713b11a16aSHongbin Zheng 
5723b11a16aSHongbin Zheng Value *VectorBlockGenerator::getVectorValue(const Value *Old,
5733b11a16aSHongbin Zheng                                             ValueMapT &VectorMap,
5743b11a16aSHongbin Zheng                                             VectorValueMapT &ScalarMaps) {
5753b11a16aSHongbin Zheng   if (VectorMap.count(Old))
5763b11a16aSHongbin Zheng     return VectorMap[Old];
5773b11a16aSHongbin Zheng 
5783b11a16aSHongbin Zheng   int Width = getVectorWidth();
5793b11a16aSHongbin Zheng 
5803b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
5813b11a16aSHongbin Zheng 
5823b11a16aSHongbin Zheng   for (int Lane = 0; Lane < Width; Lane++)
5833b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector,
5843b11a16aSHongbin Zheng                                          getNewValue(Old,
5853b11a16aSHongbin Zheng                                                      ScalarMaps[Lane],
5863b11a16aSHongbin Zheng                                                      GlobalMaps[Lane]),
5873b11a16aSHongbin Zheng                                          Builder.getInt32(Lane));
5883b11a16aSHongbin Zheng 
5893b11a16aSHongbin Zheng   VectorMap[Old] = Vector;
5903b11a16aSHongbin Zheng 
5913b11a16aSHongbin Zheng   return Vector;
5923b11a16aSHongbin Zheng }
5933b11a16aSHongbin Zheng 
5943b11a16aSHongbin Zheng Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
5953b11a16aSHongbin Zheng   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
5963b11a16aSHongbin Zheng   assert(PointerTy && "PointerType expected");
5973b11a16aSHongbin Zheng 
5983b11a16aSHongbin Zheng   Type *ScalarType = PointerTy->getElementType();
5993b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(ScalarType, Width);
6003b11a16aSHongbin Zheng 
6013b11a16aSHongbin Zheng   return PointerType::getUnqual(VectorType);
6023b11a16aSHongbin Zheng }
6033b11a16aSHongbin Zheng 
6043b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
6053b11a16aSHongbin Zheng                                                    ValueMapT &BBMap) {
6063b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6073b11a16aSHongbin Zheng   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
6083b11a16aSHongbin Zheng   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
6093b11a16aSHongbin Zheng   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
6103b11a16aSHongbin Zheng                                            "vector_ptr");
6113b11a16aSHongbin Zheng   LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
6123b11a16aSHongbin Zheng                                          Load->getName() + "_p_vec_full");
6133b11a16aSHongbin Zheng   if (!Aligned)
6143b11a16aSHongbin Zheng     VecLoad->setAlignment(8);
6153b11a16aSHongbin Zheng 
6163b11a16aSHongbin Zheng   return VecLoad;
6173b11a16aSHongbin Zheng }
6183b11a16aSHongbin Zheng 
6193b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
6203b11a16aSHongbin Zheng                                                     ValueMapT &BBMap) {
6213b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6223b11a16aSHongbin Zheng   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
6233b11a16aSHongbin Zheng   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0]);
6243b11a16aSHongbin Zheng   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
6253b11a16aSHongbin Zheng                                            Load->getName() + "_p_vec_p");
6263b11a16aSHongbin Zheng   LoadInst *ScalarLoad= Builder.CreateLoad(VectorPtr,
6273b11a16aSHongbin Zheng                                            Load->getName() + "_p_splat_one");
6283b11a16aSHongbin Zheng 
6293b11a16aSHongbin Zheng   if (!Aligned)
6303b11a16aSHongbin Zheng     ScalarLoad->setAlignment(8);
6313b11a16aSHongbin Zheng 
6323b11a16aSHongbin Zheng   Constant *SplatVector =
6333b11a16aSHongbin Zheng     Constant::getNullValue(VectorType::get(Builder.getInt32Ty(),
6343b11a16aSHongbin Zheng                                            getVectorWidth()));
6353b11a16aSHongbin Zheng 
6363b11a16aSHongbin Zheng   Value *VectorLoad = Builder.CreateShuffleVector(ScalarLoad, ScalarLoad,
6373b11a16aSHongbin Zheng                                                   SplatVector,
6383b11a16aSHongbin Zheng                                                   Load->getName()
6393b11a16aSHongbin Zheng                                                   + "_p_splat");
6403b11a16aSHongbin Zheng   return VectorLoad;
6413b11a16aSHongbin Zheng }
6423b11a16aSHongbin Zheng 
6433b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
6443b11a16aSHongbin Zheng   VectorValueMapT &ScalarMaps) {
6453b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
6463b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6473b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(
6483b11a16aSHongbin Zheng     dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
6493b11a16aSHongbin Zheng 
6503b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType);
6513b11a16aSHongbin Zheng 
6523b11a16aSHongbin Zheng   for (int i = 0; i < VectorWidth; i++) {
6533b11a16aSHongbin Zheng     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
6543b11a16aSHongbin Zheng     Value *ScalarLoad = Builder.CreateLoad(NewPointer,
6553b11a16aSHongbin Zheng                                            Load->getName() + "_p_scalar_");
6563b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector, ScalarLoad,
6573b11a16aSHongbin Zheng                                          Builder.getInt32(i),
6583b11a16aSHongbin Zheng                                          Load->getName() + "_p_vec_");
6593b11a16aSHongbin Zheng   }
6603b11a16aSHongbin Zheng 
6613b11a16aSHongbin Zheng   return Vector;
6623b11a16aSHongbin Zheng }
6633b11a16aSHongbin Zheng 
6643b11a16aSHongbin Zheng void VectorBlockGenerator::generateLoad(const LoadInst *Load,
6653b11a16aSHongbin Zheng                                         ValueMapT &VectorMap,
6663b11a16aSHongbin Zheng                                         VectorValueMapT &ScalarMaps) {
66768794217SHongbin Zheng   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
66868794217SHongbin Zheng       !VectorType::isValidElementType(Load->getType())) {
6693b11a16aSHongbin Zheng     for (int i = 0; i < getVectorWidth(); i++)
6703b11a16aSHongbin Zheng       ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
6713b11a16aSHongbin Zheng                                                GlobalMaps[i]);
6723b11a16aSHongbin Zheng     return;
6733b11a16aSHongbin Zheng   }
6743b11a16aSHongbin Zheng 
6753b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Load);
6763b11a16aSHongbin Zheng 
6773b11a16aSHongbin Zheng   Value *NewLoad;
678a00a0291SSebastian Pop   if (Access.isStrideZero(isl_map_copy(Schedule)))
6793b11a16aSHongbin Zheng     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
680a00a0291SSebastian Pop   else if (Access.isStrideOne(isl_map_copy(Schedule)))
6813b11a16aSHongbin Zheng     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
6823b11a16aSHongbin Zheng   else
6833b11a16aSHongbin Zheng     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
6843b11a16aSHongbin Zheng 
6853b11a16aSHongbin Zheng   VectorMap[Load] = NewLoad;
6863b11a16aSHongbin Zheng }
6873b11a16aSHongbin Zheng 
6883b11a16aSHongbin Zheng void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
6893b11a16aSHongbin Zheng                                          ValueMapT &VectorMap,
6903b11a16aSHongbin Zheng                                          VectorValueMapT &ScalarMaps) {
6913b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
6923b11a16aSHongbin Zheng   Value *NewOperand = getVectorValue(Inst->getOperand(0), VectorMap,
6933b11a16aSHongbin Zheng                                      ScalarMaps);
6943b11a16aSHongbin Zheng 
6953b11a16aSHongbin Zheng   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
6963b11a16aSHongbin Zheng 
6973b11a16aSHongbin Zheng   const CastInst *Cast = dyn_cast<CastInst>(Inst);
6983b11a16aSHongbin Zheng   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
6993b11a16aSHongbin Zheng   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
7003b11a16aSHongbin Zheng }
7013b11a16aSHongbin Zheng 
7023b11a16aSHongbin Zheng void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
7033b11a16aSHongbin Zheng                                           ValueMapT &VectorMap,
7043b11a16aSHongbin Zheng                                           VectorValueMapT &ScalarMaps) {
7053b11a16aSHongbin Zheng   Value *OpZero = Inst->getOperand(0);
7063b11a16aSHongbin Zheng   Value *OpOne = Inst->getOperand(1);
7073b11a16aSHongbin Zheng 
7083b11a16aSHongbin Zheng   Value *NewOpZero, *NewOpOne;
7093b11a16aSHongbin Zheng   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
7103b11a16aSHongbin Zheng   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
7113b11a16aSHongbin Zheng 
7121bb59b0dSTobias Grosser   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
7133b11a16aSHongbin Zheng                                        Inst->getName() + "p_vec");
7143b11a16aSHongbin Zheng   VectorMap[Inst] = NewInst;
7153b11a16aSHongbin Zheng }
7163b11a16aSHongbin Zheng 
7173b11a16aSHongbin Zheng void VectorBlockGenerator::copyStore(const StoreInst *Store,
7183b11a16aSHongbin Zheng                                      ValueMapT &VectorMap,
7193b11a16aSHongbin Zheng                                      VectorValueMapT &ScalarMaps) {
7203b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7213b11a16aSHongbin Zheng 
7223b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Store);
7233b11a16aSHongbin Zheng 
7243b11a16aSHongbin Zheng   const Value *Pointer = Store->getPointerOperand();
7253b11a16aSHongbin Zheng   Value *Vector = getVectorValue(Store->getValueOperand(), VectorMap,
7263b11a16aSHongbin Zheng                                    ScalarMaps);
7273b11a16aSHongbin Zheng 
728a00a0291SSebastian Pop   if (Access.isStrideOne(isl_map_copy(Schedule))) {
7293b11a16aSHongbin Zheng     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
7303b11a16aSHongbin Zheng     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0]);
7313b11a16aSHongbin Zheng 
7323b11a16aSHongbin Zheng     Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
7333b11a16aSHongbin Zheng                                              "vector_ptr");
7343b11a16aSHongbin Zheng     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
7353b11a16aSHongbin Zheng 
7363b11a16aSHongbin Zheng     if (!Aligned)
7373b11a16aSHongbin Zheng       Store->setAlignment(8);
7383b11a16aSHongbin Zheng   } else {
7393b11a16aSHongbin Zheng     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
7401bb59b0dSTobias Grosser       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
7413b11a16aSHongbin Zheng       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i]);
7423b11a16aSHongbin Zheng       Builder.CreateStore(Scalar, NewPointer);
7433b11a16aSHongbin Zheng     }
7443b11a16aSHongbin Zheng   }
7453b11a16aSHongbin Zheng }
7463b11a16aSHongbin Zheng 
7473b11a16aSHongbin Zheng bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
7483b11a16aSHongbin Zheng                                              ValueMapT &VectorMap) {
7493b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
7503b11a16aSHongbin Zheng        OE = Inst->op_end(); OI != OE; ++OI)
7513b11a16aSHongbin Zheng     if (VectorMap.count(*OI))
7523b11a16aSHongbin Zheng       return true;
7533b11a16aSHongbin Zheng   return false;
7543b11a16aSHongbin Zheng }
7553b11a16aSHongbin Zheng 
7563b11a16aSHongbin Zheng bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
7573b11a16aSHongbin Zheng                                                ValueMapT &VectorMap,
7583b11a16aSHongbin Zheng                                                VectorValueMapT &ScalarMaps) {
7593b11a16aSHongbin Zheng   bool HasVectorOperand = false;
7603b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7613b11a16aSHongbin Zheng 
7623b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
7633b11a16aSHongbin Zheng        OE = Inst->op_end(); OI != OE; ++OI) {
7643b11a16aSHongbin Zheng     ValueMapT::iterator VecOp = VectorMap.find(*OI);
7653b11a16aSHongbin Zheng 
7663b11a16aSHongbin Zheng     if (VecOp == VectorMap.end())
7673b11a16aSHongbin Zheng       continue;
7683b11a16aSHongbin Zheng 
7693b11a16aSHongbin Zheng     HasVectorOperand = true;
7703b11a16aSHongbin Zheng     Value *NewVector = VecOp->second;
7713b11a16aSHongbin Zheng 
7723b11a16aSHongbin Zheng     for (int i = 0; i < VectorWidth; ++i) {
7733b11a16aSHongbin Zheng       ValueMapT &SM = ScalarMaps[i];
7743b11a16aSHongbin Zheng 
7753b11a16aSHongbin Zheng       // If there is one scalar extracted, all scalar elements should have
7763b11a16aSHongbin Zheng       // already been extracted by the code here. So no need to check for the
7773b11a16aSHongbin Zheng       // existance of all of them.
7783b11a16aSHongbin Zheng       if (SM.count(*OI))
7793b11a16aSHongbin Zheng         break;
7803b11a16aSHongbin Zheng 
7813b11a16aSHongbin Zheng       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
7823b11a16aSHongbin Zheng     }
7833b11a16aSHongbin Zheng   }
7843b11a16aSHongbin Zheng 
7853b11a16aSHongbin Zheng   return HasVectorOperand;
7863b11a16aSHongbin Zheng }
7873b11a16aSHongbin Zheng 
7883b11a16aSHongbin Zheng void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
7893b11a16aSHongbin Zheng                                               ValueMapT &VectorMap,
7903b11a16aSHongbin Zheng                                               VectorValueMapT &ScalarMaps) {
7913b11a16aSHongbin Zheng   bool HasVectorOperand;
7923b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7933b11a16aSHongbin Zheng 
7943b11a16aSHongbin Zheng   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
7953b11a16aSHongbin Zheng 
7963b11a16aSHongbin Zheng   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
7973b11a16aSHongbin Zheng     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
7983b11a16aSHongbin Zheng 
7993b11a16aSHongbin Zheng   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
8003b11a16aSHongbin Zheng     return;
8013b11a16aSHongbin Zheng 
8023b11a16aSHongbin Zheng   // Make the result available as vector value.
8033b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
8043b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType);
8053b11a16aSHongbin Zheng 
8063b11a16aSHongbin Zheng   for (int i = 0; i < VectorWidth; i++)
8073b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
8083b11a16aSHongbin Zheng                                          Builder.getInt32(i));
8093b11a16aSHongbin Zheng 
8103b11a16aSHongbin Zheng   VectorMap[Inst] = Vector;
8113b11a16aSHongbin Zheng }
8123b11a16aSHongbin Zheng 
8133b11a16aSHongbin Zheng int VectorBlockGenerator::getVectorWidth() {
8143b11a16aSHongbin Zheng   return GlobalMaps.size();
8153b11a16aSHongbin Zheng }
8163b11a16aSHongbin Zheng 
8173b11a16aSHongbin Zheng void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
8183b11a16aSHongbin Zheng                                            ValueMapT &VectorMap,
8193b11a16aSHongbin Zheng                                            VectorValueMapT &ScalarMaps) {
8203b11a16aSHongbin Zheng   // Terminator instructions control the control flow. They are explicitly
8213b11a16aSHongbin Zheng   // expressed in the clast and do not need to be copied.
8223b11a16aSHongbin Zheng   if (Inst->isTerminator())
8233b11a16aSHongbin Zheng     return;
8243b11a16aSHongbin Zheng 
825e71c6ab5STobias Grosser   if (isSCEVIgnore(Inst))
826e71c6ab5STobias Grosser     return;
827e71c6ab5STobias Grosser 
8283b11a16aSHongbin Zheng   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
8293b11a16aSHongbin Zheng     generateLoad(Load, VectorMap, ScalarMaps);
8303b11a16aSHongbin Zheng     return;
8313b11a16aSHongbin Zheng   }
8323b11a16aSHongbin Zheng 
8333b11a16aSHongbin Zheng   if (hasVectorOperands(Inst, VectorMap)) {
8343b11a16aSHongbin Zheng     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
8353b11a16aSHongbin Zheng       copyStore(Store, VectorMap, ScalarMaps);
8363b11a16aSHongbin Zheng       return;
8373b11a16aSHongbin Zheng     }
8383b11a16aSHongbin Zheng 
8393b11a16aSHongbin Zheng     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
8403b11a16aSHongbin Zheng       copyUnaryInst(Unary, VectorMap, ScalarMaps);
8413b11a16aSHongbin Zheng       return;
8423b11a16aSHongbin Zheng     }
8433b11a16aSHongbin Zheng 
8443b11a16aSHongbin Zheng     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
8453b11a16aSHongbin Zheng       copyBinaryInst(Binary, VectorMap, ScalarMaps);
8463b11a16aSHongbin Zheng       return;
8473b11a16aSHongbin Zheng     }
8483b11a16aSHongbin Zheng 
8493b11a16aSHongbin Zheng     // Falltrough: We generate scalar instructions, if we don't know how to
8503b11a16aSHongbin Zheng     // generate vector code.
8513b11a16aSHongbin Zheng   }
8523b11a16aSHongbin Zheng 
8533b11a16aSHongbin Zheng   copyInstScalarized(Inst, VectorMap, ScalarMaps);
8543b11a16aSHongbin Zheng }
8553b11a16aSHongbin Zheng 
8563b11a16aSHongbin Zheng void VectorBlockGenerator::copyBB() {
8573b11a16aSHongbin Zheng   BasicBlock *BB = Statement.getBasicBlock();
8583b11a16aSHongbin Zheng   BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
8593b11a16aSHongbin Zheng                                   Builder.GetInsertPoint(), P);
8603b11a16aSHongbin Zheng   CopyBB->setName("polly.stmt." + BB->getName());
8613b11a16aSHongbin Zheng   Builder.SetInsertPoint(CopyBB->begin());
8623b11a16aSHongbin Zheng 
8633b11a16aSHongbin Zheng   // Create two maps that store the mapping from the original instructions of
8643b11a16aSHongbin Zheng   // the old basic block to their copies in the new basic block. Those maps
8653b11a16aSHongbin Zheng   // are basic block local.
8663b11a16aSHongbin Zheng   //
8673b11a16aSHongbin Zheng   // As vector code generation is supported there is one map for scalar values
8683b11a16aSHongbin Zheng   // and one for vector values.
8693b11a16aSHongbin Zheng   //
8703b11a16aSHongbin Zheng   // In case we just do scalar code generation, the vectorMap is not used and
8713b11a16aSHongbin Zheng   // the scalarMap has just one dimension, which contains the mapping.
8723b11a16aSHongbin Zheng   //
8733b11a16aSHongbin Zheng   // In case vector code generation is done, an instruction may either appear
8743b11a16aSHongbin Zheng   // in the vector map once (as it is calculating >vectorwidth< values at a
8753b11a16aSHongbin Zheng   // time. Or (if the values are calculated using scalar operations), it
8763b11a16aSHongbin Zheng   // appears once in every dimension of the scalarMap.
8773b11a16aSHongbin Zheng   VectorValueMapT ScalarBlockMap(getVectorWidth());
8783b11a16aSHongbin Zheng   ValueMapT VectorBlockMap;
8793b11a16aSHongbin Zheng 
8803b11a16aSHongbin Zheng   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
8813b11a16aSHongbin Zheng        II != IE; ++II)
8823b11a16aSHongbin Zheng       copyInstruction(II, VectorBlockMap, ScalarBlockMap);
8833b11a16aSHongbin Zheng }
884