13b11a16aSHongbin Zheng //===--- BlockGenerators.cpp - Generate code for statements -----*- C++ -*-===//
23b11a16aSHongbin Zheng //
33b11a16aSHongbin Zheng //                     The LLVM Compiler Infrastructure
43b11a16aSHongbin Zheng //
53b11a16aSHongbin Zheng // This file is distributed under the University of Illinois Open Source
63b11a16aSHongbin Zheng // License. See LICENSE.TXT for details.
73b11a16aSHongbin Zheng //
83b11a16aSHongbin Zheng //===----------------------------------------------------------------------===//
93b11a16aSHongbin Zheng //
103b11a16aSHongbin Zheng // This file implements the BlockGenerator and VectorBlockGenerator classes,
113b11a16aSHongbin Zheng // which generate sequential code and vectorized code for a polyhedral
123b11a16aSHongbin Zheng // statement, respectively.
133b11a16aSHongbin Zheng //
143b11a16aSHongbin Zheng //===----------------------------------------------------------------------===//
153b11a16aSHongbin Zheng 
163b11a16aSHongbin Zheng #include "polly/ScopInfo.h"
1768794217SHongbin Zheng #include "polly/CodeGen/CodeGeneration.h"
188a846610SHongbin Zheng #include "polly/CodeGen/BlockGenerators.h"
193b11a16aSHongbin Zheng #include "polly/Support/GICHelper.h"
203b11a16aSHongbin Zheng 
21e71c6ab5STobias Grosser #include "llvm/Analysis/LoopInfo.h"
22e71c6ab5STobias Grosser #include "llvm/Analysis/ScalarEvolution.h"
23e71c6ab5STobias Grosser #include "llvm/Analysis/ScalarEvolutionExpander.h"
243b11a16aSHongbin Zheng #include "llvm/Transforms/Utils/BasicBlockUtils.h"
253b11a16aSHongbin Zheng #include "llvm/Support/CommandLine.h"
263b11a16aSHongbin Zheng 
273b11a16aSHongbin Zheng #include "isl/aff.h"
283b11a16aSHongbin Zheng #include "isl/set.h"
293b11a16aSHongbin Zheng 
303b11a16aSHongbin Zheng using namespace llvm;
313b11a16aSHongbin Zheng using namespace polly;
323b11a16aSHongbin Zheng 
333b11a16aSHongbin Zheng static cl::opt<bool>
34c14582f2STobias Grosser Aligned("enable-polly-aligned", cl::desc("Assumed aligned memory accesses."),
35c14582f2STobias Grosser         cl::Hidden, cl::value_desc("OpenMP code generation enabled if true"),
363b11a16aSHongbin Zheng         cl::init(false), cl::ZeroOrMore);
373b11a16aSHongbin Zheng 
383b11a16aSHongbin Zheng static cl::opt<bool>
39c14582f2STobias Grosser SCEVCodegen("polly-codegen-scev", cl::desc("Use SCEV based code generation."),
40c14582f2STobias Grosser             cl::Hidden, cl::init(false), cl::ZeroOrMore);
41e71c6ab5STobias Grosser 
42e71c6ab5STobias Grosser /// The SCEVRewriter takes a scalar evolution expression and updates the
43e71c6ab5STobias Grosser /// following components:
44e71c6ab5STobias Grosser ///
45e71c6ab5STobias Grosser /// - SCEVUnknown
46e71c6ab5STobias Grosser ///
47e71c6ab5STobias Grosser ///   Values referenced in SCEVUnknown subexpressions are looked up in
48e71c6ab5STobias Grosser ///   two Value to Value maps (GlobalMap and BBMap). If they are found they are
49e71c6ab5STobias Grosser ///   replaced by a reference to the value they map to.
50e71c6ab5STobias Grosser ///
51e71c6ab5STobias Grosser /// - SCEVAddRecExpr
52e71c6ab5STobias Grosser ///
53e71c6ab5STobias Grosser ///   Based on a Loop -> Value map {Loop_1: %Value}, an expression
54e71c6ab5STobias Grosser ///   {%Base, +, %Step}<Loop_1> is rewritten to %Base + %Value * %Step.
55e71c6ab5STobias Grosser ///   AddRecExpr's with more than two operands can not be translated.
56e71c6ab5STobias Grosser ///
57e71c6ab5STobias Grosser ///   FIXME: The comment above is not yet reality. At the moment we derive
58e71c6ab5STobias Grosser ///   %Value by looking up the canonical IV of the loop and by defining
59e71c6ab5STobias Grosser ///   %Value = GlobalMap[%IV]. This needs to be changed to remove the need for
60e71c6ab5STobias Grosser ///   canonical induction variables.
61e71c6ab5STobias Grosser ///
62e71c6ab5STobias Grosser ///
63e71c6ab5STobias Grosser /// How can this be used?
64e71c6ab5STobias Grosser /// ====================
65e71c6ab5STobias Grosser ///
66e71c6ab5STobias Grosser /// SCEVRewrite based code generation works on virtually independent blocks.
67e71c6ab5STobias Grosser /// This means we do not run the independent blocks pass to rewrite scalar
68e71c6ab5STobias Grosser /// instructions, but just ignore instructions that we can analyze with scalar
69e71c6ab5STobias Grosser /// evolution. Virtually independent blocks are blocks that only reference the
70e71c6ab5STobias Grosser /// following values:
71e71c6ab5STobias Grosser ///
72e71c6ab5STobias Grosser /// o Values calculated within a basic block
73e71c6ab5STobias Grosser /// o Values representable by SCEV
74e71c6ab5STobias Grosser ///
75e71c6ab5STobias Grosser /// During code generation we can ignore all instructions:
76e71c6ab5STobias Grosser ///
77e71c6ab5STobias Grosser /// - Ignore all instructions except:
78e71c6ab5STobias Grosser ///   - Load instructions
79e71c6ab5STobias Grosser ///   - Instructions that reference operands already calculated within the
80e71c6ab5STobias Grosser ///     basic block.
81e71c6ab5STobias Grosser ///   - Store instructions
82e71c6ab5STobias Grosser struct SCEVRewriter : public SCEVVisitor<SCEVRewriter, const SCEV *> {
83e71c6ab5STobias Grosser public:
84e71c6ab5STobias Grosser   static const SCEV *rewrite(const SCEV *scev, Scop &S, ScalarEvolution &SE,
85e71c6ab5STobias Grosser                              ValueMapT &GlobalMap, ValueMapT &BBMap) {
86e71c6ab5STobias Grosser     SCEVRewriter Rewriter(S, SE, GlobalMap, BBMap);
87e71c6ab5STobias Grosser     return Rewriter.visit(scev);
88e71c6ab5STobias Grosser   }
89e71c6ab5STobias Grosser 
90e71c6ab5STobias Grosser   SCEVRewriter(Scop &S, ScalarEvolution &SE, ValueMapT &GlobalMap,
91e71c6ab5STobias Grosser                ValueMapT &BBMap) : S(S), SE(SE), GlobalMap(GlobalMap),
92e71c6ab5STobias Grosser                BBMap(BBMap) {}
93e71c6ab5STobias Grosser 
94e71c6ab5STobias Grosser   const SCEV *visit(const SCEV *Expr) {
95e71c6ab5STobias Grosser     // FIXME: The parameter handling is incorrect.
96e71c6ab5STobias Grosser     //
97e71c6ab5STobias Grosser     // Polly does only detect parameters in Access function and loop iteration
98e71c6ab5STobias Grosser     // counters, but it does not get parameters that are just used by
99e71c6ab5STobias Grosser     // instructions within the basic block.
100e71c6ab5STobias Grosser     //
101e71c6ab5STobias Grosser     // There are two options to solve this:
102e71c6ab5STobias Grosser     //  o Iterate over all instructions of the SCoP and find the actual
103e71c6ab5STobias Grosser     //    parameters.
104e71c6ab5STobias Grosser     //  o Just check within the SCEVRewriter if Values lay outside of the SCoP
105e71c6ab5STobias Grosser     //    and detect parameters on the fly.
106e71c6ab5STobias Grosser     //
107e71c6ab5STobias Grosser     // This is especially important for OpenMP and GPGPU code generation, as
108e71c6ab5STobias Grosser     // they require us to detect and possibly rewrite the corresponding
109e71c6ab5STobias Grosser     // parameters.
110e71c6ab5STobias Grosser     if (isl_id *Id = S.getIdForParam(Expr)) {
111e71c6ab5STobias Grosser       isl_id_free(Id);
112e71c6ab5STobias Grosser       return Expr;
113e71c6ab5STobias Grosser     }
114e71c6ab5STobias Grosser 
115e71c6ab5STobias Grosser     return SCEVVisitor<SCEVRewriter, const SCEV *>::visit(Expr);
116e71c6ab5STobias Grosser   }
117e71c6ab5STobias Grosser 
118c14582f2STobias Grosser   const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; }
119e71c6ab5STobias Grosser 
120e71c6ab5STobias Grosser   const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
121e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
122e71c6ab5STobias Grosser     return SE.getTruncateExpr(Operand, Expr->getType());
123e71c6ab5STobias Grosser   }
124e71c6ab5STobias Grosser 
125e71c6ab5STobias Grosser   const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
126e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
127e71c6ab5STobias Grosser     return SE.getZeroExtendExpr(Operand, Expr->getType());
128e71c6ab5STobias Grosser   }
129e71c6ab5STobias Grosser 
130e71c6ab5STobias Grosser   const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
131e71c6ab5STobias Grosser     const SCEV *Operand = visit(Expr->getOperand());
132e71c6ab5STobias Grosser     return SE.getSignExtendExpr(Operand, Expr->getType());
133e71c6ab5STobias Grosser   }
134e71c6ab5STobias Grosser 
135e71c6ab5STobias Grosser   const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
136e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
137e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
138e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
139e71c6ab5STobias Grosser       Operands.push_back(Operand);
140e71c6ab5STobias Grosser     }
141e71c6ab5STobias Grosser 
142e71c6ab5STobias Grosser     return SE.getAddExpr(Operands);
143e71c6ab5STobias Grosser   }
144e71c6ab5STobias Grosser 
145e71c6ab5STobias Grosser   const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
146e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
147e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
148e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
149e71c6ab5STobias Grosser       Operands.push_back(Operand);
150e71c6ab5STobias Grosser     }
151e71c6ab5STobias Grosser 
152e71c6ab5STobias Grosser     return SE.getMulExpr(Operands);
153e71c6ab5STobias Grosser   }
154e71c6ab5STobias Grosser 
155e71c6ab5STobias Grosser   const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
156e71c6ab5STobias Grosser     return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
157e71c6ab5STobias Grosser   }
158e71c6ab5STobias Grosser 
159e71c6ab5STobias Grosser   // Return a new induction variable if the loop is within the original SCoP
160e71c6ab5STobias Grosser   // or NULL otherwise.
161e71c6ab5STobias Grosser   Value *getNewIV(const Loop *L) {
162e71c6ab5STobias Grosser     Value *IV = L->getCanonicalInductionVariable();
163e71c6ab5STobias Grosser     if (!IV)
164e71c6ab5STobias Grosser       return NULL;
165e71c6ab5STobias Grosser 
166e71c6ab5STobias Grosser     ValueMapT::iterator NewIV = GlobalMap.find(IV);
167e71c6ab5STobias Grosser 
168e71c6ab5STobias Grosser     if (NewIV == GlobalMap.end())
169e71c6ab5STobias Grosser       return NULL;
170e71c6ab5STobias Grosser 
171e71c6ab5STobias Grosser     return NewIV->second;
172e71c6ab5STobias Grosser   }
173e71c6ab5STobias Grosser 
174e71c6ab5STobias Grosser   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
175e71c6ab5STobias Grosser     Value *IV;
176e71c6ab5STobias Grosser 
177e71c6ab5STobias Grosser     IV = getNewIV(Expr->getLoop());
178e71c6ab5STobias Grosser 
179e71c6ab5STobias Grosser     // The IV is not within the GlobalMaps. So do not rewrite it and also do
180e71c6ab5STobias Grosser     // not rewrite any descendants.
181e71c6ab5STobias Grosser     if (!IV)
182e71c6ab5STobias Grosser       return Expr;
183e71c6ab5STobias Grosser 
184ae2d83ecSTobias Grosser     assert(Expr->getNumOperands() == 2 &&
185ae2d83ecSTobias Grosser            "An AddRecExpr with more than two operands can not be rewritten.");
186e71c6ab5STobias Grosser 
187e71c6ab5STobias Grosser     const SCEV *Base, *Step, *IVExpr, *Product;
188e71c6ab5STobias Grosser 
189e71c6ab5STobias Grosser     Base = visit(Expr->getStart());
190e71c6ab5STobias Grosser     Step = visit(Expr->getOperand(1));
191e71c6ab5STobias Grosser     IVExpr = SE.getUnknown(IV);
192e71c6ab5STobias Grosser     IVExpr = SE.getTruncateOrSignExtend(IVExpr, Step->getType());
193e71c6ab5STobias Grosser     Product = SE.getMulExpr(Step, IVExpr);
194e71c6ab5STobias Grosser 
195e71c6ab5STobias Grosser     return SE.getAddExpr(Base, Product);
196e71c6ab5STobias Grosser   }
197e71c6ab5STobias Grosser 
198e71c6ab5STobias Grosser   const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
199e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
200e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
201e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
202e71c6ab5STobias Grosser       Operands.push_back(Operand);
203e71c6ab5STobias Grosser     }
204e71c6ab5STobias Grosser 
205e71c6ab5STobias Grosser     return SE.getSMaxExpr(Operands);
206e71c6ab5STobias Grosser   }
207e71c6ab5STobias Grosser 
208e71c6ab5STobias Grosser   const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
209e71c6ab5STobias Grosser     SmallVector<const SCEV *, 2> Operands;
210e71c6ab5STobias Grosser     for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
211e71c6ab5STobias Grosser       const SCEV *Operand = visit(Expr->getOperand(i));
212e71c6ab5STobias Grosser       Operands.push_back(Operand);
213e71c6ab5STobias Grosser     }
214e71c6ab5STobias Grosser 
215e71c6ab5STobias Grosser     return SE.getUMaxExpr(Operands);
216e71c6ab5STobias Grosser   }
217e71c6ab5STobias Grosser 
218e71c6ab5STobias Grosser   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
219e71c6ab5STobias Grosser     Value *V = Expr->getValue();
220e71c6ab5STobias Grosser 
221e71c6ab5STobias Grosser     if (GlobalMap.count(V))
222e71c6ab5STobias Grosser       return SE.getUnknown(GlobalMap[V]);
223e71c6ab5STobias Grosser 
224e71c6ab5STobias Grosser     if (BBMap.count(V))
225e71c6ab5STobias Grosser       return SE.getUnknown(BBMap[V]);
226e71c6ab5STobias Grosser 
227e71c6ab5STobias Grosser     return Expr;
228e71c6ab5STobias Grosser   }
229e71c6ab5STobias Grosser 
230e71c6ab5STobias Grosser private:
231e71c6ab5STobias Grosser   Scop &S;
232e71c6ab5STobias Grosser   ScalarEvolution &SE;
233e71c6ab5STobias Grosser   ValueMapT &GlobalMap;
234e71c6ab5STobias Grosser   ValueMapT &BBMap;
235e71c6ab5STobias Grosser };
236e71c6ab5STobias Grosser 
2373b11a16aSHongbin Zheng // Helper class to generate memory location.
2383b11a16aSHongbin Zheng namespace {
2393b11a16aSHongbin Zheng class IslGenerator {
2403b11a16aSHongbin Zheng public:
2413b11a16aSHongbin Zheng   IslGenerator(IRBuilder<> &Builder, std::vector<Value *> &IVS) :
2423b11a16aSHongbin Zheng     Builder(Builder), IVS(IVS) {}
2433b11a16aSHongbin Zheng   Value *generateIslInt(__isl_take isl_int Int);
2443b11a16aSHongbin Zheng   Value *generateIslAff(__isl_take isl_aff *Aff);
2453b11a16aSHongbin Zheng   Value *generateIslPwAff(__isl_take isl_pw_aff *PwAff);
2463b11a16aSHongbin Zheng 
2473b11a16aSHongbin Zheng private:
2483b11a16aSHongbin Zheng   typedef struct {
2493b11a16aSHongbin Zheng     Value *Result;
2503b11a16aSHongbin Zheng     class IslGenerator *Generator;
2513b11a16aSHongbin Zheng   } IslGenInfo;
2523b11a16aSHongbin Zheng 
2533b11a16aSHongbin Zheng   IRBuilder<> &Builder;
2543b11a16aSHongbin Zheng   std::vector<Value *> &IVS;
2551bb59b0dSTobias Grosser   static int mergeIslAffValues(__isl_take isl_set *Set, __isl_take isl_aff *Aff,
2561bb59b0dSTobias Grosser                                void *User);
2573b11a16aSHongbin Zheng };
2583b11a16aSHongbin Zheng }
2593b11a16aSHongbin Zheng 
2603b11a16aSHongbin Zheng Value *IslGenerator::generateIslInt(isl_int Int) {
2613b11a16aSHongbin Zheng   mpz_t IntMPZ;
2623b11a16aSHongbin Zheng   mpz_init(IntMPZ);
2633b11a16aSHongbin Zheng   isl_int_get_gmp(Int, IntMPZ);
2643b11a16aSHongbin Zheng   Value *IntValue = Builder.getInt(APInt_from_MPZ(IntMPZ));
2653b11a16aSHongbin Zheng   mpz_clear(IntMPZ);
2663b11a16aSHongbin Zheng   return IntValue;
2673b11a16aSHongbin Zheng }
2683b11a16aSHongbin Zheng 
2693b11a16aSHongbin Zheng Value *IslGenerator::generateIslAff(__isl_take isl_aff *Aff) {
2703b11a16aSHongbin Zheng   Value *Result;
2713b11a16aSHongbin Zheng   Value *ConstValue;
2723b11a16aSHongbin Zheng   isl_int ConstIsl;
2733b11a16aSHongbin Zheng 
2743b11a16aSHongbin Zheng   isl_int_init(ConstIsl);
2753b11a16aSHongbin Zheng   isl_aff_get_constant(Aff, &ConstIsl);
2763b11a16aSHongbin Zheng   ConstValue = generateIslInt(ConstIsl);
2773b11a16aSHongbin Zheng   Type *Ty = Builder.getInt64Ty();
2783b11a16aSHongbin Zheng 
2793b11a16aSHongbin Zheng   // FIXME: We should give the constant and coefficients the right type. Here
2803b11a16aSHongbin Zheng   // we force it into i64.
2813b11a16aSHongbin Zheng   Result = Builder.CreateSExtOrBitCast(ConstValue, Ty);
2823b11a16aSHongbin Zheng 
2833b11a16aSHongbin Zheng   unsigned int NbInputDims = isl_aff_dim(Aff, isl_dim_in);
2843b11a16aSHongbin Zheng 
2853b11a16aSHongbin Zheng   assert((IVS.size() == NbInputDims) && "The Dimension of Induction Variables"
2863b11a16aSHongbin Zheng          "must match the dimension of the affine space.");
2873b11a16aSHongbin Zheng 
2883b11a16aSHongbin Zheng   isl_int CoefficientIsl;
2893b11a16aSHongbin Zheng   isl_int_init(CoefficientIsl);
2903b11a16aSHongbin Zheng 
2913b11a16aSHongbin Zheng   for (unsigned int i = 0; i < NbInputDims; ++i) {
2923b11a16aSHongbin Zheng     Value *CoefficientValue;
2933b11a16aSHongbin Zheng     isl_aff_get_coefficient(Aff, isl_dim_in, i, &CoefficientIsl);
2943b11a16aSHongbin Zheng 
2953b11a16aSHongbin Zheng     if (isl_int_is_zero(CoefficientIsl))
2963b11a16aSHongbin Zheng       continue;
2973b11a16aSHongbin Zheng 
2983b11a16aSHongbin Zheng     CoefficientValue = generateIslInt(CoefficientIsl);
2993b11a16aSHongbin Zheng     CoefficientValue = Builder.CreateIntCast(CoefficientValue, Ty, true);
3003b11a16aSHongbin Zheng     Value *IV = Builder.CreateIntCast(IVS[i], Ty, true);
3013b11a16aSHongbin Zheng     Value *PAdd = Builder.CreateMul(CoefficientValue, IV, "p_mul_coeff");
3023b11a16aSHongbin Zheng     Result = Builder.CreateAdd(Result, PAdd, "p_sum_coeff");
3033b11a16aSHongbin Zheng   }
3043b11a16aSHongbin Zheng 
3053b11a16aSHongbin Zheng   isl_int_clear(CoefficientIsl);
3063b11a16aSHongbin Zheng   isl_int_clear(ConstIsl);
3073b11a16aSHongbin Zheng   isl_aff_free(Aff);
3083b11a16aSHongbin Zheng 
3093b11a16aSHongbin Zheng   return Result;
3103b11a16aSHongbin Zheng }
3113b11a16aSHongbin Zheng 
3123b11a16aSHongbin Zheng int IslGenerator::mergeIslAffValues(__isl_take isl_set *Set,
3133b11a16aSHongbin Zheng                                     __isl_take isl_aff *Aff, void *User) {
3143b11a16aSHongbin Zheng   IslGenInfo *GenInfo = (IslGenInfo *)User;
3153b11a16aSHongbin Zheng 
3163b11a16aSHongbin Zheng   assert((GenInfo->Result == NULL) && "Result is already set."
3173b11a16aSHongbin Zheng          "Currently only single isl_aff is supported");
3183b11a16aSHongbin Zheng   assert(isl_set_plain_is_universe(Set)
3193b11a16aSHongbin Zheng          && "Code generation failed because the set is not universe");
3203b11a16aSHongbin Zheng 
3213b11a16aSHongbin Zheng   GenInfo->Result = GenInfo->Generator->generateIslAff(Aff);
3223b11a16aSHongbin Zheng 
3233b11a16aSHongbin Zheng   isl_set_free(Set);
3243b11a16aSHongbin Zheng   return 0;
3253b11a16aSHongbin Zheng }
3263b11a16aSHongbin Zheng 
3273b11a16aSHongbin Zheng Value *IslGenerator::generateIslPwAff(__isl_take isl_pw_aff *PwAff) {
3283b11a16aSHongbin Zheng   IslGenInfo User;
3293b11a16aSHongbin Zheng   User.Result = NULL;
3303b11a16aSHongbin Zheng   User.Generator = this;
3313b11a16aSHongbin Zheng   isl_pw_aff_foreach_piece(PwAff, mergeIslAffValues, &User);
3323b11a16aSHongbin Zheng   assert(User.Result && "Code generation for isl_pw_aff failed");
3333b11a16aSHongbin Zheng 
3343b11a16aSHongbin Zheng   isl_pw_aff_free(PwAff);
3353b11a16aSHongbin Zheng   return User.Result;
3363b11a16aSHongbin Zheng }
3373b11a16aSHongbin Zheng 
3383b11a16aSHongbin Zheng 
3393b11a16aSHongbin Zheng BlockGenerator::BlockGenerator(IRBuilder<> &B, ScopStmt &Stmt, Pass *P):
340e71c6ab5STobias Grosser   Builder(B), Statement(Stmt), P(P), SE(P->getAnalysis<ScalarEvolution>()) {}
341e71c6ab5STobias Grosser 
342e71c6ab5STobias Grosser bool BlockGenerator::isSCEVIgnore(const Instruction *Inst) {
343e71c6ab5STobias Grosser   if (SCEVCodegen && SE.isSCEVable(Inst->getType()))
344e71c6ab5STobias Grosser     if (const SCEV *Scev = SE.getSCEV(const_cast<Instruction*>(Inst)))
345e71c6ab5STobias Grosser       if (!isa<SCEVCouldNotCompute>(Scev)) {
346e71c6ab5STobias Grosser         if (const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Scev)) {
347e71c6ab5STobias Grosser           if (Unknown->getValue() != Inst)
348e71c6ab5STobias Grosser             return true;
349e71c6ab5STobias Grosser         } else {
350e71c6ab5STobias Grosser           return true;
351e71c6ab5STobias Grosser         }
352e71c6ab5STobias Grosser       }
353e71c6ab5STobias Grosser 
354e71c6ab5STobias Grosser   return false;
355e71c6ab5STobias Grosser }
3563b11a16aSHongbin Zheng 
3573b11a16aSHongbin Zheng Value *BlockGenerator::getNewValue(const Value *Old, ValueMapT &BBMap,
358*9d10fffaSSebastian Pop                                    ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
3593b11a16aSHongbin Zheng   // We assume constants never change.
3603b11a16aSHongbin Zheng   // This avoids map lookups for many calls to this function.
3613b11a16aSHongbin Zheng   if (isa<Constant>(Old))
3623b11a16aSHongbin Zheng     return const_cast<Value *>(Old);
3633b11a16aSHongbin Zheng 
3643b11a16aSHongbin Zheng   if (GlobalMap.count(Old)) {
3653b11a16aSHongbin Zheng     Value *New = GlobalMap[Old];
3663b11a16aSHongbin Zheng 
367c14582f2STobias Grosser     if (Old->getType()->getScalarSizeInBits() <
368c14582f2STobias Grosser         New->getType()->getScalarSizeInBits())
3693b11a16aSHongbin Zheng       New = Builder.CreateTruncOrBitCast(New, Old->getType());
3703b11a16aSHongbin Zheng 
3713b11a16aSHongbin Zheng     return New;
3723b11a16aSHongbin Zheng   }
3733b11a16aSHongbin Zheng 
3743b11a16aSHongbin Zheng   if (BBMap.count(Old)) {
3753b11a16aSHongbin Zheng     return BBMap[Old];
3763b11a16aSHongbin Zheng   }
3773b11a16aSHongbin Zheng 
378e71c6ab5STobias Grosser   if (SCEVCodegen && SE.isSCEVable(Old->getType()))
379e71c6ab5STobias Grosser     if (const SCEV *Scev = SE.getSCEV(const_cast<Value *>(Old)))
380e71c6ab5STobias Grosser       if (!isa<SCEVCouldNotCompute>(Scev)) {
381c14582f2STobias Grosser         const SCEV *NewScev = SCEVRewriter::rewrite(
382c14582f2STobias Grosser             Scev, *Statement.getParent(), SE, GlobalMap, BBMap);
383e71c6ab5STobias Grosser         SCEVExpander Expander(SE, "polly");
384e71c6ab5STobias Grosser         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
385e71c6ab5STobias Grosser                                                  Builder.GetInsertPoint());
386e71c6ab5STobias Grosser 
387e71c6ab5STobias Grosser         BBMap[Old] = Expanded;
388e71c6ab5STobias Grosser         return Expanded;
389e71c6ab5STobias Grosser       }
390e71c6ab5STobias Grosser 
3913b11a16aSHongbin Zheng   // 'Old' is within the original SCoP, but was not rewritten.
3923b11a16aSHongbin Zheng   //
3933b11a16aSHongbin Zheng   // Such values appear, if they only calculate information already available in
3943b11a16aSHongbin Zheng   // the polyhedral description (e.g.  an induction variable increment). They
3953b11a16aSHongbin Zheng   // can be safely ignored.
3963b11a16aSHongbin Zheng   if (const Instruction *Inst = dyn_cast<Instruction>(Old))
3973b11a16aSHongbin Zheng     if (Statement.getParent()->getRegion().contains(Inst->getParent()))
3983b11a16aSHongbin Zheng       return NULL;
3993b11a16aSHongbin Zheng 
4003b11a16aSHongbin Zheng   // Everything else is probably a scop-constant value defined as global,
4013b11a16aSHongbin Zheng   // function parameter or an instruction not within the scop.
4023b11a16aSHongbin Zheng   return const_cast<Value *>(Old);
4033b11a16aSHongbin Zheng }
4043b11a16aSHongbin Zheng 
4053b11a16aSHongbin Zheng void BlockGenerator::copyInstScalar(const Instruction *Inst, ValueMapT &BBMap,
406*9d10fffaSSebastian Pop                                     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
4073b11a16aSHongbin Zheng   Instruction *NewInst = Inst->clone();
4083b11a16aSHongbin Zheng 
4093b11a16aSHongbin Zheng   // Replace old operands with the new ones.
4103b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
411c14582f2STobias Grosser                                       OE = Inst->op_end();
412c14582f2STobias Grosser        OI != OE; ++OI) {
4133b11a16aSHongbin Zheng     Value *OldOperand = *OI;
414*9d10fffaSSebastian Pop     Value *NewOperand = getNewValue(OldOperand, BBMap, GlobalMap, LTS);
4153b11a16aSHongbin Zheng 
4163b11a16aSHongbin Zheng     if (!NewOperand) {
417c14582f2STobias Grosser       assert(!isa<StoreInst>(NewInst) &&
418c14582f2STobias Grosser              "Store instructions are always needed!");
4193b11a16aSHongbin Zheng       delete NewInst;
4203b11a16aSHongbin Zheng       return;
4213b11a16aSHongbin Zheng     }
4223b11a16aSHongbin Zheng 
4233b11a16aSHongbin Zheng     NewInst->replaceUsesOfWith(OldOperand, NewOperand);
4243b11a16aSHongbin Zheng   }
4253b11a16aSHongbin Zheng 
4263b11a16aSHongbin Zheng   Builder.Insert(NewInst);
4273b11a16aSHongbin Zheng   BBMap[Inst] = NewInst;
4283b11a16aSHongbin Zheng 
4293b11a16aSHongbin Zheng   if (!NewInst->getType()->isVoidTy())
4303b11a16aSHongbin Zheng     NewInst->setName("p_" + Inst->getName());
4313b11a16aSHongbin Zheng }
4323b11a16aSHongbin Zheng 
4333b11a16aSHongbin Zheng std::vector<Value *> BlockGenerator::getMemoryAccessIndex(
434c14582f2STobias Grosser     __isl_keep isl_map *AccessRelation, Value *BaseAddress, ValueMapT &BBMap,
435*9d10fffaSSebastian Pop     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
4363b11a16aSHongbin Zheng 
437ae2d83ecSTobias Grosser   assert((isl_map_dim(AccessRelation, isl_dim_out) == 1) &&
438ae2d83ecSTobias Grosser          "Only single dimensional access functions supported");
4393b11a16aSHongbin Zheng 
4403b11a16aSHongbin Zheng   std::vector<Value *> IVS;
4413b11a16aSHongbin Zheng   for (unsigned i = 0; i < Statement.getNumIterators(); ++i) {
4423b11a16aSHongbin Zheng     const Value *OriginalIV = Statement.getInductionVariableForDimension(i);
443*9d10fffaSSebastian Pop     Value *NewIV = getNewValue(OriginalIV, BBMap, GlobalMap, LTS);
4443b11a16aSHongbin Zheng     IVS.push_back(NewIV);
4453b11a16aSHongbin Zheng   }
4463b11a16aSHongbin Zheng 
4473b11a16aSHongbin Zheng   isl_pw_aff *PwAff = isl_map_dim_max(isl_map_copy(AccessRelation), 0);
4483b11a16aSHongbin Zheng   IslGenerator IslGen(Builder, IVS);
4493b11a16aSHongbin Zheng   Value *OffsetValue = IslGen.generateIslPwAff(PwAff);
4503b11a16aSHongbin Zheng 
4513b11a16aSHongbin Zheng   Type *Ty = Builder.getInt64Ty();
4523b11a16aSHongbin Zheng   OffsetValue = Builder.CreateIntCast(OffsetValue, Ty, true);
4533b11a16aSHongbin Zheng 
4543b11a16aSHongbin Zheng   std::vector<Value *> IndexArray;
4553b11a16aSHongbin Zheng   Value *NullValue = Constant::getNullValue(Ty);
4563b11a16aSHongbin Zheng   IndexArray.push_back(NullValue);
4573b11a16aSHongbin Zheng   IndexArray.push_back(OffsetValue);
4583b11a16aSHongbin Zheng   return IndexArray;
4593b11a16aSHongbin Zheng }
4603b11a16aSHongbin Zheng 
4613b11a16aSHongbin Zheng Value *BlockGenerator::getNewAccessOperand(
462c14582f2STobias Grosser     __isl_keep isl_map *NewAccessRelation, Value *BaseAddress, ValueMapT &BBMap,
463*9d10fffaSSebastian Pop     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
464c14582f2STobias Grosser   std::vector<Value *> IndexArray =
465*9d10fffaSSebastian Pop       getMemoryAccessIndex(NewAccessRelation, BaseAddress, BBMap, GlobalMap,
466*9d10fffaSSebastian Pop                            LTS);
467c14582f2STobias Grosser   Value *NewOperand =
468c14582f2STobias Grosser       Builder.CreateGEP(BaseAddress, IndexArray, "p_newarrayidx_");
4693b11a16aSHongbin Zheng   return NewOperand;
4703b11a16aSHongbin Zheng }
4713b11a16aSHongbin Zheng 
472c14582f2STobias Grosser Value *BlockGenerator::generateLocationAccessed(
473c14582f2STobias Grosser     const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap,
474*9d10fffaSSebastian Pop     ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
4753b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Inst);
4763b11a16aSHongbin Zheng   isl_map *CurrentAccessRelation = Access.getAccessRelation();
4773b11a16aSHongbin Zheng   isl_map *NewAccessRelation = Access.getNewAccessRelation();
4783b11a16aSHongbin Zheng 
479ae2d83ecSTobias Grosser   assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation) &&
480ae2d83ecSTobias Grosser          "Current and new access function use different spaces");
4813b11a16aSHongbin Zheng 
4823b11a16aSHongbin Zheng   Value *NewPointer;
4833b11a16aSHongbin Zheng 
4843b11a16aSHongbin Zheng   if (!NewAccessRelation) {
485*9d10fffaSSebastian Pop     NewPointer = getNewValue(Pointer, BBMap, GlobalMap, LTS);
4863b11a16aSHongbin Zheng   } else {
4873b11a16aSHongbin Zheng     Value *BaseAddress = const_cast<Value *>(Access.getBaseAddr());
488c14582f2STobias Grosser     NewPointer =
489*9d10fffaSSebastian Pop         getNewAccessOperand(NewAccessRelation, BaseAddress, BBMap, GlobalMap,
490*9d10fffaSSebastian Pop                             LTS);
4913b11a16aSHongbin Zheng   }
4923b11a16aSHongbin Zheng 
4933b11a16aSHongbin Zheng   isl_map_free(CurrentAccessRelation);
4943b11a16aSHongbin Zheng   isl_map_free(NewAccessRelation);
4953b11a16aSHongbin Zheng   return NewPointer;
4963b11a16aSHongbin Zheng }
4973b11a16aSHongbin Zheng 
498c14582f2STobias Grosser Value *BlockGenerator::generateScalarLoad(
499*9d10fffaSSebastian Pop     const LoadInst *Load, ValueMapT &BBMap, ValueMapT &GlobalMap,
500*9d10fffaSSebastian Pop     LoopToScevMapT &LTS) {
5013b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
5023b11a16aSHongbin Zheng   const Instruction *Inst = dyn_cast<Instruction>(Load);
503*9d10fffaSSebastian Pop   Value *NewPointer = generateLocationAccessed(Inst, Pointer, BBMap, GlobalMap,
504*9d10fffaSSebastian Pop                                                LTS);
505c14582f2STobias Grosser   Value *ScalarLoad =
506c14582f2STobias Grosser       Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
5073b11a16aSHongbin Zheng   return ScalarLoad;
5083b11a16aSHongbin Zheng }
5093b11a16aSHongbin Zheng 
510c14582f2STobias Grosser Value *BlockGenerator::generateScalarStore(
511*9d10fffaSSebastian Pop     const StoreInst *Store, ValueMapT &BBMap, ValueMapT &GlobalMap,
512*9d10fffaSSebastian Pop     LoopToScevMapT &LTS) {
5133b11a16aSHongbin Zheng   const Value *Pointer = Store->getPointerOperand();
514c14582f2STobias Grosser   Value *NewPointer =
515*9d10fffaSSebastian Pop       generateLocationAccessed(Store, Pointer, BBMap, GlobalMap, LTS);
516*9d10fffaSSebastian Pop   Value *ValueOperand = getNewValue(Store->getValueOperand(), BBMap, GlobalMap,
517*9d10fffaSSebastian Pop                                     LTS);
5183b11a16aSHongbin Zheng 
5193b11a16aSHongbin Zheng   return Builder.CreateStore(ValueOperand, NewPointer);
5203b11a16aSHongbin Zheng }
5213b11a16aSHongbin Zheng 
5221bb59b0dSTobias Grosser void BlockGenerator::copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
523*9d10fffaSSebastian Pop                                      ValueMapT &GlobalMap,
524*9d10fffaSSebastian Pop                                      LoopToScevMapT &LTS) {
5253b11a16aSHongbin Zheng   // Terminator instructions control the control flow. They are explicitly
5263b11a16aSHongbin Zheng   // expressed in the clast and do not need to be copied.
5273b11a16aSHongbin Zheng   if (Inst->isTerminator())
5283b11a16aSHongbin Zheng     return;
5293b11a16aSHongbin Zheng 
530e71c6ab5STobias Grosser   if (isSCEVIgnore(Inst))
531e71c6ab5STobias Grosser     return;
532e71c6ab5STobias Grosser 
5333b11a16aSHongbin Zheng   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
534*9d10fffaSSebastian Pop     BBMap[Load] = generateScalarLoad(Load, BBMap, GlobalMap, LTS);
5353b11a16aSHongbin Zheng     return;
5363b11a16aSHongbin Zheng   }
5373b11a16aSHongbin Zheng 
5383b11a16aSHongbin Zheng   if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
539*9d10fffaSSebastian Pop     BBMap[Store] = generateScalarStore(Store, BBMap, GlobalMap, LTS);
5403b11a16aSHongbin Zheng     return;
5413b11a16aSHongbin Zheng   }
5423b11a16aSHongbin Zheng 
543*9d10fffaSSebastian Pop   copyInstScalar(Inst, BBMap, GlobalMap, LTS);
5443b11a16aSHongbin Zheng }
5453b11a16aSHongbin Zheng 
546*9d10fffaSSebastian Pop void BlockGenerator::copyBB(ValueMapT &GlobalMap, LoopToScevMapT &LTS) {
5473b11a16aSHongbin Zheng   BasicBlock *BB = Statement.getBasicBlock();
548c14582f2STobias Grosser   BasicBlock *CopyBB =
549c14582f2STobias Grosser       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
5503b11a16aSHongbin Zheng   CopyBB->setName("polly.stmt." + BB->getName());
5513b11a16aSHongbin Zheng   Builder.SetInsertPoint(CopyBB->begin());
5523b11a16aSHongbin Zheng 
5533b11a16aSHongbin Zheng   ValueMapT BBMap;
5543b11a16aSHongbin Zheng 
5553b11a16aSHongbin Zheng   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
5563b11a16aSHongbin Zheng        ++II)
557*9d10fffaSSebastian Pop     copyInstruction(II, BBMap, GlobalMap, LTS);
5583b11a16aSHongbin Zheng }
5593b11a16aSHongbin Zheng 
560c14582f2STobias Grosser VectorBlockGenerator::VectorBlockGenerator(
561*9d10fffaSSebastian Pop     IRBuilder<> &B, VectorValueMapT &GlobalMaps, std::vector<LoopToScevMapT> &VLTS,
562*9d10fffaSSebastian Pop     ScopStmt &Stmt, __isl_keep isl_map *Schedule, Pass *P)
563*9d10fffaSSebastian Pop     : BlockGenerator(B, Stmt, P), GlobalMaps(GlobalMaps), VLTS(VLTS),
564*9d10fffaSSebastian Pop       Schedule(Schedule) {
5653b11a16aSHongbin Zheng   assert(GlobalMaps.size() > 1 && "Only one vector lane found");
566a00a0291SSebastian Pop   assert(Schedule && "No statement domain provided");
5673b11a16aSHongbin Zheng }
5683b11a16aSHongbin Zheng 
569c14582f2STobias Grosser Value *VectorBlockGenerator::getVectorValue(
570c14582f2STobias Grosser     const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
5713b11a16aSHongbin Zheng   if (VectorMap.count(Old))
5723b11a16aSHongbin Zheng     return VectorMap[Old];
5733b11a16aSHongbin Zheng 
5743b11a16aSHongbin Zheng   int Width = getVectorWidth();
5753b11a16aSHongbin Zheng 
5763b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType::get(Old->getType(), Width));
5773b11a16aSHongbin Zheng 
5783b11a16aSHongbin Zheng   for (int Lane = 0; Lane < Width; Lane++)
579c14582f2STobias Grosser     Vector = Builder.CreateInsertElement(
580*9d10fffaSSebastian Pop         Vector, getNewValue(Old, ScalarMaps[Lane], GlobalMaps[Lane],
581*9d10fffaSSebastian Pop                             VLTS[Lane]), Builder.getInt32(Lane));
5823b11a16aSHongbin Zheng 
5833b11a16aSHongbin Zheng   VectorMap[Old] = Vector;
5843b11a16aSHongbin Zheng 
5853b11a16aSHongbin Zheng   return Vector;
5863b11a16aSHongbin Zheng }
5873b11a16aSHongbin Zheng 
5883b11a16aSHongbin Zheng Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
5893b11a16aSHongbin Zheng   PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
5903b11a16aSHongbin Zheng   assert(PointerTy && "PointerType expected");
5913b11a16aSHongbin Zheng 
5923b11a16aSHongbin Zheng   Type *ScalarType = PointerTy->getElementType();
5933b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(ScalarType, Width);
5943b11a16aSHongbin Zheng 
5953b11a16aSHongbin Zheng   return PointerType::getUnqual(VectorType);
5963b11a16aSHongbin Zheng }
5973b11a16aSHongbin Zheng 
5983b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateStrideOneLoad(const LoadInst *Load,
5993b11a16aSHongbin Zheng                                                    ValueMapT &BBMap) {
6003b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6013b11a16aSHongbin Zheng   Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
602*9d10fffaSSebastian Pop   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0]);
603c14582f2STobias Grosser   Value *VectorPtr =
604c14582f2STobias Grosser       Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
605c14582f2STobias Grosser   LoadInst *VecLoad =
606c14582f2STobias Grosser       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full");
6073b11a16aSHongbin Zheng   if (!Aligned)
6083b11a16aSHongbin Zheng     VecLoad->setAlignment(8);
6093b11a16aSHongbin Zheng 
6103b11a16aSHongbin Zheng   return VecLoad;
6113b11a16aSHongbin Zheng }
6123b11a16aSHongbin Zheng 
6133b11a16aSHongbin Zheng Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load,
6143b11a16aSHongbin Zheng                                                     ValueMapT &BBMap) {
6153b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6163b11a16aSHongbin Zheng   Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
617*9d10fffaSSebastian Pop   Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0]);
6183b11a16aSHongbin Zheng   Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
6193b11a16aSHongbin Zheng                                            Load->getName() + "_p_vec_p");
620c14582f2STobias Grosser   LoadInst *ScalarLoad =
621c14582f2STobias Grosser       Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one");
6223b11a16aSHongbin Zheng 
6233b11a16aSHongbin Zheng   if (!Aligned)
6243b11a16aSHongbin Zheng     ScalarLoad->setAlignment(8);
6253b11a16aSHongbin Zheng 
626c14582f2STobias Grosser   Constant *SplatVector = Constant::getNullValue(
627c14582f2STobias Grosser       VectorType::get(Builder.getInt32Ty(), getVectorWidth()));
6283b11a16aSHongbin Zheng 
629c14582f2STobias Grosser   Value *VectorLoad = Builder.CreateShuffleVector(
630c14582f2STobias Grosser       ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat");
6313b11a16aSHongbin Zheng   return VectorLoad;
6323b11a16aSHongbin Zheng }
6333b11a16aSHongbin Zheng 
634c14582f2STobias Grosser Value *VectorBlockGenerator::generateUnknownStrideLoad(
635c14582f2STobias Grosser     const LoadInst *Load, VectorValueMapT &ScalarMaps) {
6363b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
6373b11a16aSHongbin Zheng   const Value *Pointer = Load->getPointerOperand();
6383b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(
6393b11a16aSHongbin Zheng       dyn_cast<PointerType>(Pointer->getType())->getElementType(), VectorWidth);
6403b11a16aSHongbin Zheng 
6413b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType);
6423b11a16aSHongbin Zheng 
6433b11a16aSHongbin Zheng   for (int i = 0; i < VectorWidth; i++) {
644*9d10fffaSSebastian Pop     Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
645*9d10fffaSSebastian Pop                                     VLTS[i]);
646c14582f2STobias Grosser     Value *ScalarLoad =
647c14582f2STobias Grosser         Builder.CreateLoad(NewPointer, Load->getName() + "_p_scalar_");
648c14582f2STobias Grosser     Vector = Builder.CreateInsertElement(
649c14582f2STobias Grosser         Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_");
6503b11a16aSHongbin Zheng   }
6513b11a16aSHongbin Zheng 
6523b11a16aSHongbin Zheng   return Vector;
6533b11a16aSHongbin Zheng }
6543b11a16aSHongbin Zheng 
655c14582f2STobias Grosser void VectorBlockGenerator::generateLoad(
656c14582f2STobias Grosser     const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
65768794217SHongbin Zheng   if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL ||
65868794217SHongbin Zheng       !VectorType::isValidElementType(Load->getType())) {
6593b11a16aSHongbin Zheng     for (int i = 0; i < getVectorWidth(); i++)
660c14582f2STobias Grosser       ScalarMaps[i][Load] =
661*9d10fffaSSebastian Pop           generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]);
6623b11a16aSHongbin Zheng     return;
6633b11a16aSHongbin Zheng   }
6643b11a16aSHongbin Zheng 
6653b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Load);
6663b11a16aSHongbin Zheng 
6673b11a16aSHongbin Zheng   Value *NewLoad;
668a00a0291SSebastian Pop   if (Access.isStrideZero(isl_map_copy(Schedule)))
6693b11a16aSHongbin Zheng     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
670a00a0291SSebastian Pop   else if (Access.isStrideOne(isl_map_copy(Schedule)))
6713b11a16aSHongbin Zheng     NewLoad = generateStrideOneLoad(Load, ScalarMaps[0]);
6723b11a16aSHongbin Zheng   else
6733b11a16aSHongbin Zheng     NewLoad = generateUnknownStrideLoad(Load, ScalarMaps);
6743b11a16aSHongbin Zheng 
6753b11a16aSHongbin Zheng   VectorMap[Load] = NewLoad;
6763b11a16aSHongbin Zheng }
6773b11a16aSHongbin Zheng 
6783b11a16aSHongbin Zheng void VectorBlockGenerator::copyUnaryInst(const UnaryInstruction *Inst,
6793b11a16aSHongbin Zheng                                          ValueMapT &VectorMap,
6803b11a16aSHongbin Zheng                                          VectorValueMapT &ScalarMaps) {
6813b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
682c14582f2STobias Grosser   Value *NewOperand =
683c14582f2STobias Grosser       getVectorValue(Inst->getOperand(0), VectorMap, ScalarMaps);
6843b11a16aSHongbin Zheng 
6853b11a16aSHongbin Zheng   assert(isa<CastInst>(Inst) && "Can not generate vector code for instruction");
6863b11a16aSHongbin Zheng 
6873b11a16aSHongbin Zheng   const CastInst *Cast = dyn_cast<CastInst>(Inst);
6883b11a16aSHongbin Zheng   VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
6893b11a16aSHongbin Zheng   VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand, DestType);
6903b11a16aSHongbin Zheng }
6913b11a16aSHongbin Zheng 
6923b11a16aSHongbin Zheng void VectorBlockGenerator::copyBinaryInst(const BinaryOperator *Inst,
6933b11a16aSHongbin Zheng                                           ValueMapT &VectorMap,
6943b11a16aSHongbin Zheng                                           VectorValueMapT &ScalarMaps) {
6953b11a16aSHongbin Zheng   Value *OpZero = Inst->getOperand(0);
6963b11a16aSHongbin Zheng   Value *OpOne = Inst->getOperand(1);
6973b11a16aSHongbin Zheng 
6983b11a16aSHongbin Zheng   Value *NewOpZero, *NewOpOne;
6993b11a16aSHongbin Zheng   NewOpZero = getVectorValue(OpZero, VectorMap, ScalarMaps);
7003b11a16aSHongbin Zheng   NewOpOne = getVectorValue(OpOne, VectorMap, ScalarMaps);
7013b11a16aSHongbin Zheng 
7021bb59b0dSTobias Grosser   Value *NewInst = Builder.CreateBinOp(Inst->getOpcode(), NewOpZero, NewOpOne,
7033b11a16aSHongbin Zheng                                        Inst->getName() + "p_vec");
7043b11a16aSHongbin Zheng   VectorMap[Inst] = NewInst;
7053b11a16aSHongbin Zheng }
7063b11a16aSHongbin Zheng 
707c14582f2STobias Grosser void VectorBlockGenerator::copyStore(
708c14582f2STobias Grosser     const StoreInst *Store, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps) {
7093b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7103b11a16aSHongbin Zheng 
7113b11a16aSHongbin Zheng   MemoryAccess &Access = Statement.getAccessFor(Store);
7123b11a16aSHongbin Zheng 
7133b11a16aSHongbin Zheng   const Value *Pointer = Store->getPointerOperand();
714c14582f2STobias Grosser   Value *Vector =
715c14582f2STobias Grosser       getVectorValue(Store->getValueOperand(), VectorMap, ScalarMaps);
7163b11a16aSHongbin Zheng 
717a00a0291SSebastian Pop   if (Access.isStrideOne(isl_map_copy(Schedule))) {
7183b11a16aSHongbin Zheng     Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
719*9d10fffaSSebastian Pop     Value *NewPointer = getNewValue(Pointer, ScalarMaps[0], GlobalMaps[0],
720*9d10fffaSSebastian Pop                                     VLTS[0]);
7213b11a16aSHongbin Zheng 
722c14582f2STobias Grosser     Value *VectorPtr =
723c14582f2STobias Grosser         Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr");
7243b11a16aSHongbin Zheng     StoreInst *Store = Builder.CreateStore(Vector, VectorPtr);
7253b11a16aSHongbin Zheng 
7263b11a16aSHongbin Zheng     if (!Aligned)
7273b11a16aSHongbin Zheng       Store->setAlignment(8);
7283b11a16aSHongbin Zheng   } else {
7293b11a16aSHongbin Zheng     for (unsigned i = 0; i < ScalarMaps.size(); i++) {
7301bb59b0dSTobias Grosser       Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i));
731*9d10fffaSSebastian Pop       Value *NewPointer = getNewValue(Pointer, ScalarMaps[i], GlobalMaps[i],
732*9d10fffaSSebastian Pop                                       VLTS[i]);
7333b11a16aSHongbin Zheng       Builder.CreateStore(Scalar, NewPointer);
7343b11a16aSHongbin Zheng     }
7353b11a16aSHongbin Zheng   }
7363b11a16aSHongbin Zheng }
7373b11a16aSHongbin Zheng 
7383b11a16aSHongbin Zheng bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
7393b11a16aSHongbin Zheng                                              ValueMapT &VectorMap) {
7403b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
741c14582f2STobias Grosser                                       OE = Inst->op_end();
742c14582f2STobias Grosser        OI != OE; ++OI)
7433b11a16aSHongbin Zheng     if (VectorMap.count(*OI))
7443b11a16aSHongbin Zheng       return true;
7453b11a16aSHongbin Zheng   return false;
7463b11a16aSHongbin Zheng }
7473b11a16aSHongbin Zheng 
7483b11a16aSHongbin Zheng bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
7493b11a16aSHongbin Zheng                                                ValueMapT &VectorMap,
7503b11a16aSHongbin Zheng                                                VectorValueMapT &ScalarMaps) {
7513b11a16aSHongbin Zheng   bool HasVectorOperand = false;
7523b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7533b11a16aSHongbin Zheng 
7543b11a16aSHongbin Zheng   for (Instruction::const_op_iterator OI = Inst->op_begin(),
755c14582f2STobias Grosser                                       OE = Inst->op_end();
756c14582f2STobias Grosser        OI != OE; ++OI) {
7573b11a16aSHongbin Zheng     ValueMapT::iterator VecOp = VectorMap.find(*OI);
7583b11a16aSHongbin Zheng 
7593b11a16aSHongbin Zheng     if (VecOp == VectorMap.end())
7603b11a16aSHongbin Zheng       continue;
7613b11a16aSHongbin Zheng 
7623b11a16aSHongbin Zheng     HasVectorOperand = true;
7633b11a16aSHongbin Zheng     Value *NewVector = VecOp->second;
7643b11a16aSHongbin Zheng 
7653b11a16aSHongbin Zheng     for (int i = 0; i < VectorWidth; ++i) {
7663b11a16aSHongbin Zheng       ValueMapT &SM = ScalarMaps[i];
7673b11a16aSHongbin Zheng 
7683b11a16aSHongbin Zheng       // If there is one scalar extracted, all scalar elements should have
7693b11a16aSHongbin Zheng       // already been extracted by the code here. So no need to check for the
7703b11a16aSHongbin Zheng       // existance of all of them.
7713b11a16aSHongbin Zheng       if (SM.count(*OI))
7723b11a16aSHongbin Zheng         break;
7733b11a16aSHongbin Zheng 
7743b11a16aSHongbin Zheng       SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
7753b11a16aSHongbin Zheng     }
7763b11a16aSHongbin Zheng   }
7773b11a16aSHongbin Zheng 
7783b11a16aSHongbin Zheng   return HasVectorOperand;
7793b11a16aSHongbin Zheng }
7803b11a16aSHongbin Zheng 
7813b11a16aSHongbin Zheng void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
7823b11a16aSHongbin Zheng                                               ValueMapT &VectorMap,
7833b11a16aSHongbin Zheng                                               VectorValueMapT &ScalarMaps) {
7843b11a16aSHongbin Zheng   bool HasVectorOperand;
7853b11a16aSHongbin Zheng   int VectorWidth = getVectorWidth();
7863b11a16aSHongbin Zheng 
7873b11a16aSHongbin Zheng   HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
7883b11a16aSHongbin Zheng 
7893b11a16aSHongbin Zheng   for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
790*9d10fffaSSebastian Pop     copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane],
791*9d10fffaSSebastian Pop                    VLTS[VectorLane]);
7923b11a16aSHongbin Zheng 
7933b11a16aSHongbin Zheng   if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
7943b11a16aSHongbin Zheng     return;
7953b11a16aSHongbin Zheng 
7963b11a16aSHongbin Zheng   // Make the result available as vector value.
7973b11a16aSHongbin Zheng   VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
7983b11a16aSHongbin Zheng   Value *Vector = UndefValue::get(VectorType);
7993b11a16aSHongbin Zheng 
8003b11a16aSHongbin Zheng   for (int i = 0; i < VectorWidth; i++)
8013b11a16aSHongbin Zheng     Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
8023b11a16aSHongbin Zheng                                          Builder.getInt32(i));
8033b11a16aSHongbin Zheng 
8043b11a16aSHongbin Zheng   VectorMap[Inst] = Vector;
8053b11a16aSHongbin Zheng }
8063b11a16aSHongbin Zheng 
807c14582f2STobias Grosser int VectorBlockGenerator::getVectorWidth() { return GlobalMaps.size(); }
8083b11a16aSHongbin Zheng 
8093b11a16aSHongbin Zheng void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
8103b11a16aSHongbin Zheng                                            ValueMapT &VectorMap,
8113b11a16aSHongbin Zheng                                            VectorValueMapT &ScalarMaps) {
8123b11a16aSHongbin Zheng   // Terminator instructions control the control flow. They are explicitly
8133b11a16aSHongbin Zheng   // expressed in the clast and do not need to be copied.
8143b11a16aSHongbin Zheng   if (Inst->isTerminator())
8153b11a16aSHongbin Zheng     return;
8163b11a16aSHongbin Zheng 
817e71c6ab5STobias Grosser   if (isSCEVIgnore(Inst))
818e71c6ab5STobias Grosser     return;
819e71c6ab5STobias Grosser 
8203b11a16aSHongbin Zheng   if (const LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
8213b11a16aSHongbin Zheng     generateLoad(Load, VectorMap, ScalarMaps);
8223b11a16aSHongbin Zheng     return;
8233b11a16aSHongbin Zheng   }
8243b11a16aSHongbin Zheng 
8253b11a16aSHongbin Zheng   if (hasVectorOperands(Inst, VectorMap)) {
8263b11a16aSHongbin Zheng     if (const StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
8273b11a16aSHongbin Zheng       copyStore(Store, VectorMap, ScalarMaps);
8283b11a16aSHongbin Zheng       return;
8293b11a16aSHongbin Zheng     }
8303b11a16aSHongbin Zheng 
8313b11a16aSHongbin Zheng     if (const UnaryInstruction *Unary = dyn_cast<UnaryInstruction>(Inst)) {
8323b11a16aSHongbin Zheng       copyUnaryInst(Unary, VectorMap, ScalarMaps);
8333b11a16aSHongbin Zheng       return;
8343b11a16aSHongbin Zheng     }
8353b11a16aSHongbin Zheng 
8363b11a16aSHongbin Zheng     if (const BinaryOperator *Binary = dyn_cast<BinaryOperator>(Inst)) {
8373b11a16aSHongbin Zheng       copyBinaryInst(Binary, VectorMap, ScalarMaps);
8383b11a16aSHongbin Zheng       return;
8393b11a16aSHongbin Zheng     }
8403b11a16aSHongbin Zheng 
8413b11a16aSHongbin Zheng     // Falltrough: We generate scalar instructions, if we don't know how to
8423b11a16aSHongbin Zheng     // generate vector code.
8433b11a16aSHongbin Zheng   }
8443b11a16aSHongbin Zheng 
8453b11a16aSHongbin Zheng   copyInstScalarized(Inst, VectorMap, ScalarMaps);
8463b11a16aSHongbin Zheng }
8473b11a16aSHongbin Zheng 
8483b11a16aSHongbin Zheng void VectorBlockGenerator::copyBB() {
8493b11a16aSHongbin Zheng   BasicBlock *BB = Statement.getBasicBlock();
850c14582f2STobias Grosser   BasicBlock *CopyBB =
851c14582f2STobias Grosser       SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), P);
8523b11a16aSHongbin Zheng   CopyBB->setName("polly.stmt." + BB->getName());
8533b11a16aSHongbin Zheng   Builder.SetInsertPoint(CopyBB->begin());
8543b11a16aSHongbin Zheng 
8553b11a16aSHongbin Zheng   // Create two maps that store the mapping from the original instructions of
8563b11a16aSHongbin Zheng   // the old basic block to their copies in the new basic block. Those maps
8573b11a16aSHongbin Zheng   // are basic block local.
8583b11a16aSHongbin Zheng   //
8593b11a16aSHongbin Zheng   // As vector code generation is supported there is one map for scalar values
8603b11a16aSHongbin Zheng   // and one for vector values.
8613b11a16aSHongbin Zheng   //
8623b11a16aSHongbin Zheng   // In case we just do scalar code generation, the vectorMap is not used and
8633b11a16aSHongbin Zheng   // the scalarMap has just one dimension, which contains the mapping.
8643b11a16aSHongbin Zheng   //
8653b11a16aSHongbin Zheng   // In case vector code generation is done, an instruction may either appear
8663b11a16aSHongbin Zheng   // in the vector map once (as it is calculating >vectorwidth< values at a
8673b11a16aSHongbin Zheng   // time. Or (if the values are calculated using scalar operations), it
8683b11a16aSHongbin Zheng   // appears once in every dimension of the scalarMap.
8693b11a16aSHongbin Zheng   VectorValueMapT ScalarBlockMap(getVectorWidth());
8703b11a16aSHongbin Zheng   ValueMapT VectorBlockMap;
8713b11a16aSHongbin Zheng 
872c14582f2STobias Grosser   for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
873c14582f2STobias Grosser        ++II)
8743b11a16aSHongbin Zheng     copyInstruction(II, VectorBlockMap, ScalarBlockMap);
8753b11a16aSHongbin Zheng }
876