15c0c51a9SNicolas Vasilache //===- VectorToLLVM.cpp - Conversion from Vector to the LLVM dialect ------===//
25c0c51a9SNicolas Vasilache //
35c0c51a9SNicolas Vasilache // Copyright 2019 The MLIR Authors.
45c0c51a9SNicolas Vasilache //
55c0c51a9SNicolas Vasilache // Licensed under the Apache License, Version 2.0 (the "License");
65c0c51a9SNicolas Vasilache // you may not use this file except in compliance with the License.
75c0c51a9SNicolas Vasilache // You may obtain a copy of the License at
85c0c51a9SNicolas Vasilache //
95c0c51a9SNicolas Vasilache //   http://www.apache.org/licenses/LICENSE-2.0
105c0c51a9SNicolas Vasilache //
115c0c51a9SNicolas Vasilache // Unless required by applicable law or agreed to in writing, software
125c0c51a9SNicolas Vasilache // distributed under the License is distributed on an "AS IS" BASIS,
135c0c51a9SNicolas Vasilache // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
145c0c51a9SNicolas Vasilache // See the License for the specific language governing permissions and
155c0c51a9SNicolas Vasilache // limitations under the License.
165c0c51a9SNicolas Vasilache // =============================================================================
175c0c51a9SNicolas Vasilache 
185c0c51a9SNicolas Vasilache #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
195c0c51a9SNicolas Vasilache #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
205c0c51a9SNicolas Vasilache #include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
215c0c51a9SNicolas Vasilache #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
225c0c51a9SNicolas Vasilache #include "mlir/Dialect/VectorOps/VectorOps.h"
235c0c51a9SNicolas Vasilache #include "mlir/IR/Attributes.h"
245c0c51a9SNicolas Vasilache #include "mlir/IR/Builders.h"
255c0c51a9SNicolas Vasilache #include "mlir/IR/MLIRContext.h"
265c0c51a9SNicolas Vasilache #include "mlir/IR/Module.h"
275c0c51a9SNicolas Vasilache #include "mlir/IR/Operation.h"
285c0c51a9SNicolas Vasilache #include "mlir/IR/PatternMatch.h"
295c0c51a9SNicolas Vasilache #include "mlir/IR/StandardTypes.h"
305c0c51a9SNicolas Vasilache #include "mlir/IR/Types.h"
315c0c51a9SNicolas Vasilache #include "mlir/Pass/Pass.h"
325c0c51a9SNicolas Vasilache #include "mlir/Pass/PassManager.h"
335c0c51a9SNicolas Vasilache #include "mlir/Transforms/DialectConversion.h"
345c0c51a9SNicolas Vasilache #include "mlir/Transforms/Passes.h"
355c0c51a9SNicolas Vasilache 
365c0c51a9SNicolas Vasilache #include "llvm/IR/DerivedTypes.h"
375c0c51a9SNicolas Vasilache #include "llvm/IR/Module.h"
385c0c51a9SNicolas Vasilache #include "llvm/IR/Type.h"
395c0c51a9SNicolas Vasilache #include "llvm/Support/Allocator.h"
405c0c51a9SNicolas Vasilache #include "llvm/Support/ErrorHandling.h"
415c0c51a9SNicolas Vasilache 
425c0c51a9SNicolas Vasilache using namespace mlir;
435c0c51a9SNicolas Vasilache 
445c0c51a9SNicolas Vasilache template <typename T>
455c0c51a9SNicolas Vasilache static LLVM::LLVMType getPtrToElementType(T containerType,
465c0c51a9SNicolas Vasilache                                           LLVMTypeConverter &lowering) {
475c0c51a9SNicolas Vasilache   return lowering.convertType(containerType.getElementType())
485c0c51a9SNicolas Vasilache       .template cast<LLVM::LLVMType>()
495c0c51a9SNicolas Vasilache       .getPointerTo();
505c0c51a9SNicolas Vasilache }
515c0c51a9SNicolas Vasilache 
52*b36aaeafSAart Bik class VectorBroadcastOpConversion : public LLVMOpLowering {
53*b36aaeafSAart Bik public:
54*b36aaeafSAart Bik   explicit VectorBroadcastOpConversion(MLIRContext *context,
55*b36aaeafSAart Bik                                        LLVMTypeConverter &typeConverter)
56*b36aaeafSAart Bik       : LLVMOpLowering(vector::BroadcastOp::getOperationName(), context,
57*b36aaeafSAart Bik                        typeConverter) {}
58*b36aaeafSAart Bik 
59*b36aaeafSAart Bik   PatternMatchResult
60*b36aaeafSAart Bik   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
61*b36aaeafSAart Bik                   ConversionPatternRewriter &rewriter) const override {
62*b36aaeafSAart Bik     auto broadcastOp = cast<vector::BroadcastOp>(op);
63*b36aaeafSAart Bik     VectorType dstVectorType = broadcastOp.getVectorType();
64*b36aaeafSAart Bik     if (lowering.convertType(dstVectorType) == nullptr)
65*b36aaeafSAart Bik       return matchFailure();
66*b36aaeafSAart Bik     // Rewrite when the full vector type can be lowered (which
67*b36aaeafSAart Bik     // implies all 'reduced' types can be lowered too).
68*b36aaeafSAart Bik     VectorType srcVectorType =
69*b36aaeafSAart Bik         broadcastOp.getSourceType().dyn_cast<VectorType>();
70*b36aaeafSAart Bik     rewriter.replaceOp(
71*b36aaeafSAart Bik         op, expandRanks(operands[0],  // source value to be expanded
72*b36aaeafSAart Bik                         op->getLoc(), // location of original broadcast
73*b36aaeafSAart Bik                         srcVectorType, dstVectorType, rewriter));
74*b36aaeafSAart Bik     return matchSuccess();
75*b36aaeafSAart Bik   }
76*b36aaeafSAart Bik 
77*b36aaeafSAart Bik private:
78*b36aaeafSAart Bik   // Expands the given source value over all the ranks, as defined
79*b36aaeafSAart Bik   // by the source and destination type (a null source type denotes
80*b36aaeafSAart Bik   // expansion from a scalar value into a vector).
81*b36aaeafSAart Bik   //
82*b36aaeafSAart Bik   // TODO(ajcbik): consider replacing this one-pattern lowering
83*b36aaeafSAart Bik   //               with a two-pattern lowering using other vector
84*b36aaeafSAart Bik   //               ops once all insert/extract/shuffle operations
85*b36aaeafSAart Bik   //               are available with lowering implemention.
86*b36aaeafSAart Bik   //
87*b36aaeafSAart Bik   Value *expandRanks(Value *value, Location loc, VectorType srcVectorType,
88*b36aaeafSAart Bik                      VectorType dstVectorType,
89*b36aaeafSAart Bik                      ConversionPatternRewriter &rewriter) const {
90*b36aaeafSAart Bik     assert((dstVectorType != nullptr) && "invalid result type in broadcast");
91*b36aaeafSAart Bik     // Determine rank of source and destination.
92*b36aaeafSAart Bik     int64_t srcRank = srcVectorType ? srcVectorType.getRank() : 0;
93*b36aaeafSAart Bik     int64_t dstRank = dstVectorType.getRank();
94*b36aaeafSAart Bik     int64_t curDim = dstVectorType.getDimSize(0);
95*b36aaeafSAart Bik     if (srcRank < dstRank)
96*b36aaeafSAart Bik       // Duplicate this rank.
97*b36aaeafSAart Bik       return duplicateOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
98*b36aaeafSAart Bik                               curDim, rewriter);
99*b36aaeafSAart Bik     // If all trailing dimensions are the same, the broadcast consists of
100*b36aaeafSAart Bik     // simply passing through the source value and we are done. Otherwise,
101*b36aaeafSAart Bik     // any non-matching dimension forces a stretch along this rank.
102*b36aaeafSAart Bik     assert((srcVectorType != nullptr) && (srcRank > 0) &&
103*b36aaeafSAart Bik            (srcRank == dstRank) && "invalid rank in broadcast");
104*b36aaeafSAart Bik     for (int64_t r = 0; r < dstRank; r++) {
105*b36aaeafSAart Bik       if (srcVectorType.getDimSize(r) != dstVectorType.getDimSize(r)) {
106*b36aaeafSAart Bik         return stretchOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
107*b36aaeafSAart Bik                               curDim, rewriter);
108*b36aaeafSAart Bik       }
109*b36aaeafSAart Bik     }
110*b36aaeafSAart Bik     return value;
111*b36aaeafSAart Bik   }
112*b36aaeafSAart Bik 
113*b36aaeafSAart Bik   // Picks the best way to duplicate a single rank. For the 1-D case, a
114*b36aaeafSAart Bik   // single insert-elt/shuffle is the most efficient expansion. For higher
115*b36aaeafSAart Bik   // dimensions, however, we need dim x insert-values on a new broadcast
116*b36aaeafSAart Bik   // with one less leading dimension, which will be lowered "recursively"
117*b36aaeafSAart Bik   // to matching LLVM IR.
118*b36aaeafSAart Bik   // For example:
119*b36aaeafSAart Bik   //   v = broadcast s : f32 to vector<4x2xf32>
120*b36aaeafSAart Bik   // becomes:
121*b36aaeafSAart Bik   //   x = broadcast s : f32 to vector<2xf32>
122*b36aaeafSAart Bik   //   v = [x,x,x,x]
123*b36aaeafSAart Bik   // becomes:
124*b36aaeafSAart Bik   //   x = [s,s]
125*b36aaeafSAart Bik   //   v = [x,x,x,x]
126*b36aaeafSAart Bik   Value *duplicateOneRank(Value *value, Location loc, VectorType srcVectorType,
127*b36aaeafSAart Bik                           VectorType dstVectorType, int64_t rank, int64_t dim,
128*b36aaeafSAart Bik                           ConversionPatternRewriter &rewriter) const {
129*b36aaeafSAart Bik     Type llvmType = lowering.convertType(dstVectorType);
130*b36aaeafSAart Bik     assert((llvmType != nullptr) && "unlowerable vector type");
131*b36aaeafSAart Bik     if (rank == 1) {
132*b36aaeafSAart Bik       Value *undef = rewriter.create<LLVM::UndefOp>(loc, llvmType);
133*b36aaeafSAart Bik       Value *expand = insertOne(undef, value, loc, llvmType, rank, 0, rewriter);
134*b36aaeafSAart Bik       SmallVector<int32_t, 4> zeroValues(dim, 0);
135*b36aaeafSAart Bik       return rewriter.create<LLVM::ShuffleVectorOp>(
136*b36aaeafSAart Bik           loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues));
137*b36aaeafSAart Bik     }
138*b36aaeafSAart Bik     Value *expand = expandRanks(value, loc, srcVectorType,
139*b36aaeafSAart Bik                                 reducedVectorType(dstVectorType), rewriter);
140*b36aaeafSAart Bik     Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
141*b36aaeafSAart Bik     for (int64_t d = 0; d < dim; ++d) {
142*b36aaeafSAart Bik       result = insertOne(result, expand, loc, llvmType, rank, d, rewriter);
143*b36aaeafSAart Bik     }
144*b36aaeafSAart Bik     return result;
145*b36aaeafSAart Bik   }
146*b36aaeafSAart Bik 
147*b36aaeafSAart Bik   // Picks the best way to stretch a single rank. For the 1-D case, a
148*b36aaeafSAart Bik   // single insert-elt/shuffle is the most efficient expansion when at
149*b36aaeafSAart Bik   // a stretch. Otherwise, every dimension needs to be expanded
150*b36aaeafSAart Bik   // individually and individually inserted in the resulting vector.
151*b36aaeafSAart Bik   // For example:
152*b36aaeafSAart Bik   //   v = broadcast w : vector<4x1x2xf32> to vector<4x2x2xf32>
153*b36aaeafSAart Bik   // becomes:
154*b36aaeafSAart Bik   //   a = broadcast w[0] : vector<1x2xf32> to vector<2x2xf32>
155*b36aaeafSAart Bik   //   b = broadcast w[1] : vector<1x2xf32> to vector<2x2xf32>
156*b36aaeafSAart Bik   //   c = broadcast w[2] : vector<1x2xf32> to vector<2x2xf32>
157*b36aaeafSAart Bik   //   d = broadcast w[3] : vector<1x2xf32> to vector<2x2xf32>
158*b36aaeafSAart Bik   //   v = [a,b,c,d]
159*b36aaeafSAart Bik   // becomes:
160*b36aaeafSAart Bik   //   x = broadcast w[0][0] : vector<2xf32> to vector <2x2xf32>
161*b36aaeafSAart Bik   //   y = broadcast w[1][0] : vector<2xf32> to vector <2x2xf32>
162*b36aaeafSAart Bik   //   a = [x, y]
163*b36aaeafSAart Bik   //   etc.
164*b36aaeafSAart Bik   Value *stretchOneRank(Value *value, Location loc, VectorType srcVectorType,
165*b36aaeafSAart Bik                         VectorType dstVectorType, int64_t rank, int64_t dim,
166*b36aaeafSAart Bik                         ConversionPatternRewriter &rewriter) const {
167*b36aaeafSAart Bik     Type llvmType = lowering.convertType(dstVectorType);
168*b36aaeafSAart Bik     assert((llvmType != nullptr) && "unlowerable vector type");
169*b36aaeafSAart Bik     Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
170*b36aaeafSAart Bik     bool atStretch = dim != srcVectorType.getDimSize(0);
171*b36aaeafSAart Bik     if (rank == 1) {
172*b36aaeafSAart Bik       Type redLlvmType = lowering.convertType(dstVectorType.getElementType());
173*b36aaeafSAart Bik       if (atStretch) {
174*b36aaeafSAart Bik         Value *one = extractOne(value, loc, redLlvmType, rank, 0, rewriter);
175*b36aaeafSAart Bik         Value *expand =
176*b36aaeafSAart Bik             insertOne(result, one, loc, llvmType, rank, 0, rewriter);
177*b36aaeafSAart Bik         SmallVector<int32_t, 4> zeroValues(dim, 0);
178*b36aaeafSAart Bik         return rewriter.create<LLVM::ShuffleVectorOp>(
179*b36aaeafSAart Bik             loc, expand, result, rewriter.getI32ArrayAttr(zeroValues));
180*b36aaeafSAart Bik       }
181*b36aaeafSAart Bik       for (int64_t d = 0; d < dim; ++d) {
182*b36aaeafSAart Bik         Value *one = extractOne(value, loc, redLlvmType, rank, d, rewriter);
183*b36aaeafSAart Bik         result = insertOne(result, one, loc, llvmType, rank, d, rewriter);
184*b36aaeafSAart Bik       }
185*b36aaeafSAart Bik     } else {
186*b36aaeafSAart Bik       VectorType redSrcType = reducedVectorType(srcVectorType);
187*b36aaeafSAart Bik       VectorType redDstType = reducedVectorType(dstVectorType);
188*b36aaeafSAart Bik       Type redLlvmType = lowering.convertType(redSrcType);
189*b36aaeafSAart Bik       for (int64_t d = 0; d < dim; ++d) {
190*b36aaeafSAart Bik         int64_t pos = atStretch ? 0 : d;
191*b36aaeafSAart Bik         Value *one = extractOne(value, loc, redLlvmType, rank, pos, rewriter);
192*b36aaeafSAart Bik         Value *expand = expandRanks(one, loc, redSrcType, redDstType, rewriter);
193*b36aaeafSAart Bik         result = insertOne(result, expand, loc, llvmType, rank, d, rewriter);
194*b36aaeafSAart Bik       }
195*b36aaeafSAart Bik     }
196*b36aaeafSAart Bik     return result;
197*b36aaeafSAart Bik   }
198*b36aaeafSAart Bik 
199*b36aaeafSAart Bik   // Picks the proper sequence for inserting.
200*b36aaeafSAart Bik   Value *insertOne(Value *val1, Value *val2, Location loc, Type llvmType,
201*b36aaeafSAart Bik                    int64_t rank, int64_t pos,
202*b36aaeafSAart Bik                    ConversionPatternRewriter &rewriter) const {
203*b36aaeafSAart Bik     if (rank == 1) {
204*b36aaeafSAart Bik       auto idxType = rewriter.getIndexType();
205*b36aaeafSAart Bik       auto constant = rewriter.create<LLVM::ConstantOp>(
206*b36aaeafSAart Bik           loc, lowering.convertType(idxType),
207*b36aaeafSAart Bik           rewriter.getIntegerAttr(idxType, pos));
208*b36aaeafSAart Bik       return rewriter.create<LLVM::InsertElementOp>(loc, llvmType, val1, val2,
209*b36aaeafSAart Bik                                                     constant);
210*b36aaeafSAart Bik     }
211*b36aaeafSAart Bik     return rewriter.create<LLVM::InsertValueOp>(loc, llvmType, val1, val2,
212*b36aaeafSAart Bik                                                 rewriter.getI64ArrayAttr(pos));
213*b36aaeafSAart Bik   }
214*b36aaeafSAart Bik 
215*b36aaeafSAart Bik   // Picks the proper sequence for extracting.
216*b36aaeafSAart Bik   Value *extractOne(Value *value, Location loc, Type llvmType, int64_t rank,
217*b36aaeafSAart Bik                     int64_t pos, ConversionPatternRewriter &rewriter) const {
218*b36aaeafSAart Bik     if (rank == 1) {
219*b36aaeafSAart Bik       auto idxType = rewriter.getIndexType();
220*b36aaeafSAart Bik       auto constant = rewriter.create<LLVM::ConstantOp>(
221*b36aaeafSAart Bik           loc, lowering.convertType(idxType),
222*b36aaeafSAart Bik           rewriter.getIntegerAttr(idxType, pos));
223*b36aaeafSAart Bik       return rewriter.create<LLVM::ExtractElementOp>(loc, llvmType, value,
224*b36aaeafSAart Bik                                                      constant);
225*b36aaeafSAart Bik     }
226*b36aaeafSAart Bik     return rewriter.create<LLVM::ExtractValueOp>(loc, llvmType, value,
227*b36aaeafSAart Bik                                                  rewriter.getI64ArrayAttr(pos));
228*b36aaeafSAart Bik   }
229*b36aaeafSAart Bik 
230*b36aaeafSAart Bik   // Helper to reduce vector type by one rank.
231*b36aaeafSAart Bik   static VectorType reducedVectorType(VectorType tp) {
232*b36aaeafSAart Bik     assert((tp.getRank() > 1) && "unlowerable vector type");
233*b36aaeafSAart Bik     return VectorType::get(tp.getShape().drop_front(), tp.getElementType());
234*b36aaeafSAart Bik   }
235*b36aaeafSAart Bik };
236*b36aaeafSAart Bik 
2375c0c51a9SNicolas Vasilache class VectorExtractElementOpConversion : public LLVMOpLowering {
2385c0c51a9SNicolas Vasilache public:
2395c0c51a9SNicolas Vasilache   explicit VectorExtractElementOpConversion(MLIRContext *context,
2405c0c51a9SNicolas Vasilache                                             LLVMTypeConverter &typeConverter)
2415c0c51a9SNicolas Vasilache       : LLVMOpLowering(vector::ExtractElementOp::getOperationName(), context,
2425c0c51a9SNicolas Vasilache                        typeConverter) {}
2435c0c51a9SNicolas Vasilache 
2445c0c51a9SNicolas Vasilache   PatternMatchResult
2455c0c51a9SNicolas Vasilache   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
2465c0c51a9SNicolas Vasilache                   ConversionPatternRewriter &rewriter) const override {
2475c0c51a9SNicolas Vasilache     auto loc = op->getLoc();
2485c0c51a9SNicolas Vasilache     auto adaptor = vector::ExtractElementOpOperandAdaptor(operands);
2495c0c51a9SNicolas Vasilache     auto extractOp = cast<vector::ExtractElementOp>(op);
2505c0c51a9SNicolas Vasilache     auto vectorType = extractOp.vector()->getType().cast<VectorType>();
2515c0c51a9SNicolas Vasilache     auto resultType = extractOp.getResult()->getType();
2525c0c51a9SNicolas Vasilache     auto llvmResultType = lowering.convertType(resultType);
2535c0c51a9SNicolas Vasilache 
2545c0c51a9SNicolas Vasilache     auto positionArrayAttr = extractOp.position();
2555c0c51a9SNicolas Vasilache     // One-shot extraction of vector from array (only requires extractvalue).
2565c0c51a9SNicolas Vasilache     if (resultType.isa<VectorType>()) {
2575c0c51a9SNicolas Vasilache       Value *extracted = rewriter.create<LLVM::ExtractValueOp>(
2585c0c51a9SNicolas Vasilache           loc, llvmResultType, adaptor.vector(), positionArrayAttr);
2595c0c51a9SNicolas Vasilache       rewriter.replaceOp(op, extracted);
2605c0c51a9SNicolas Vasilache       return matchSuccess();
2615c0c51a9SNicolas Vasilache     }
2625c0c51a9SNicolas Vasilache 
2635c0c51a9SNicolas Vasilache     // Potential extraction of 1-D vector from struct.
2645c0c51a9SNicolas Vasilache     auto *context = op->getContext();
2655c0c51a9SNicolas Vasilache     Value *extracted = adaptor.vector();
2665c0c51a9SNicolas Vasilache     auto positionAttrs = positionArrayAttr.getValue();
2675c0c51a9SNicolas Vasilache     auto i32Type = rewriter.getIntegerType(32);
2685c0c51a9SNicolas Vasilache     if (positionAttrs.size() > 1) {
2695c0c51a9SNicolas Vasilache       auto nDVectorType = vectorType;
2705c0c51a9SNicolas Vasilache       auto oneDVectorType = VectorType::get(nDVectorType.getShape().take_back(),
2715c0c51a9SNicolas Vasilache                                             nDVectorType.getElementType());
2725c0c51a9SNicolas Vasilache       auto nMinusOnePositionAttrs =
2735c0c51a9SNicolas Vasilache           ArrayAttr::get(positionAttrs.drop_back(), context);
2745c0c51a9SNicolas Vasilache       extracted = rewriter.create<LLVM::ExtractValueOp>(
2755c0c51a9SNicolas Vasilache           loc, lowering.convertType(oneDVectorType), extracted,
2765c0c51a9SNicolas Vasilache           nMinusOnePositionAttrs);
2775c0c51a9SNicolas Vasilache     }
2785c0c51a9SNicolas Vasilache 
2795c0c51a9SNicolas Vasilache     // Remaining extraction of element from 1-D LLVM vector
2805c0c51a9SNicolas Vasilache     auto position = positionAttrs.back().cast<IntegerAttr>();
2815c0c51a9SNicolas Vasilache     auto constant = rewriter.create<LLVM::ConstantOp>(
2825c0c51a9SNicolas Vasilache         loc, lowering.convertType(i32Type), position);
2835c0c51a9SNicolas Vasilache     extracted =
2845c0c51a9SNicolas Vasilache         rewriter.create<LLVM::ExtractElementOp>(loc, extracted, constant);
2855c0c51a9SNicolas Vasilache     rewriter.replaceOp(op, extracted);
2865c0c51a9SNicolas Vasilache 
2875c0c51a9SNicolas Vasilache     return matchSuccess();
2885c0c51a9SNicolas Vasilache   }
2895c0c51a9SNicolas Vasilache };
2905c0c51a9SNicolas Vasilache 
2915c0c51a9SNicolas Vasilache class VectorOuterProductOpConversion : public LLVMOpLowering {
2925c0c51a9SNicolas Vasilache public:
2935c0c51a9SNicolas Vasilache   explicit VectorOuterProductOpConversion(MLIRContext *context,
2945c0c51a9SNicolas Vasilache                                           LLVMTypeConverter &typeConverter)
2955c0c51a9SNicolas Vasilache       : LLVMOpLowering(vector::OuterProductOp::getOperationName(), context,
2965c0c51a9SNicolas Vasilache                        typeConverter) {}
2975c0c51a9SNicolas Vasilache 
2985c0c51a9SNicolas Vasilache   PatternMatchResult
2995c0c51a9SNicolas Vasilache   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
3005c0c51a9SNicolas Vasilache                   ConversionPatternRewriter &rewriter) const override {
3015c0c51a9SNicolas Vasilache     auto loc = op->getLoc();
3025c0c51a9SNicolas Vasilache     auto adaptor = vector::OuterProductOpOperandAdaptor(operands);
3035c0c51a9SNicolas Vasilache     auto *ctx = op->getContext();
3045c0c51a9SNicolas Vasilache     auto vLHS = adaptor.lhs()->getType().cast<LLVM::LLVMType>();
3055c0c51a9SNicolas Vasilache     auto vRHS = adaptor.rhs()->getType().cast<LLVM::LLVMType>();
3065c0c51a9SNicolas Vasilache     auto rankLHS = vLHS.getUnderlyingType()->getVectorNumElements();
3075c0c51a9SNicolas Vasilache     auto rankRHS = vRHS.getUnderlyingType()->getVectorNumElements();
3085c0c51a9SNicolas Vasilache     auto llvmArrayOfVectType = lowering.convertType(
3095c0c51a9SNicolas Vasilache         cast<vector::OuterProductOp>(op).getResult()->getType());
3105c0c51a9SNicolas Vasilache     Value *desc = rewriter.create<LLVM::UndefOp>(loc, llvmArrayOfVectType);
3115c0c51a9SNicolas Vasilache     Value *a = adaptor.lhs(), *b = adaptor.rhs();
3125c0c51a9SNicolas Vasilache     Value *acc = adaptor.acc().empty() ? nullptr : adaptor.acc().front();
3135c0c51a9SNicolas Vasilache     SmallVector<Value *, 8> lhs, accs;
3145c0c51a9SNicolas Vasilache     lhs.reserve(rankLHS);
3155c0c51a9SNicolas Vasilache     accs.reserve(rankLHS);
3165c0c51a9SNicolas Vasilache     for (unsigned d = 0, e = rankLHS; d < e; ++d) {
3175c0c51a9SNicolas Vasilache       // shufflevector explicitly requires i32.
3185c0c51a9SNicolas Vasilache       auto attr = rewriter.getI32IntegerAttr(d);
3195c0c51a9SNicolas Vasilache       SmallVector<Attribute, 4> bcastAttr(rankRHS, attr);
3205c0c51a9SNicolas Vasilache       auto bcastArrayAttr = ArrayAttr::get(bcastAttr, ctx);
3215c0c51a9SNicolas Vasilache       Value *aD = nullptr, *accD = nullptr;
3225c0c51a9SNicolas Vasilache       // 1. Broadcast the element a[d] into vector aD.
3235c0c51a9SNicolas Vasilache       aD = rewriter.create<LLVM::ShuffleVectorOp>(loc, a, a, bcastArrayAttr);
3245c0c51a9SNicolas Vasilache       // 2. If acc is present, extract 1-d vector acc[d] into accD.
3255c0c51a9SNicolas Vasilache       if (acc)
3265c0c51a9SNicolas Vasilache         accD = rewriter.create<LLVM::ExtractValueOp>(
3275c0c51a9SNicolas Vasilache             loc, vRHS, acc, rewriter.getI64ArrayAttr(d));
3285c0c51a9SNicolas Vasilache       // 3. Compute aD outer b (plus accD, if relevant).
3295c0c51a9SNicolas Vasilache       Value *aOuterbD =
3305c0c51a9SNicolas Vasilache           accD ? rewriter.create<LLVM::FMulAddOp>(loc, vRHS, aD, b, accD)
3315c0c51a9SNicolas Vasilache                      .getResult()
3325c0c51a9SNicolas Vasilache                : rewriter.create<LLVM::FMulOp>(loc, aD, b).getResult();
3335c0c51a9SNicolas Vasilache       // 4. Insert as value `d` in the descriptor.
3345c0c51a9SNicolas Vasilache       desc = rewriter.create<LLVM::InsertValueOp>(loc, llvmArrayOfVectType,
3355c0c51a9SNicolas Vasilache                                                   desc, aOuterbD,
3365c0c51a9SNicolas Vasilache                                                   rewriter.getI64ArrayAttr(d));
3375c0c51a9SNicolas Vasilache     }
3385c0c51a9SNicolas Vasilache     rewriter.replaceOp(op, desc);
3395c0c51a9SNicolas Vasilache     return matchSuccess();
3405c0c51a9SNicolas Vasilache   }
3415c0c51a9SNicolas Vasilache };
3425c0c51a9SNicolas Vasilache 
3435c0c51a9SNicolas Vasilache class VectorTypeCastOpConversion : public LLVMOpLowering {
3445c0c51a9SNicolas Vasilache public:
3455c0c51a9SNicolas Vasilache   explicit VectorTypeCastOpConversion(MLIRContext *context,
3465c0c51a9SNicolas Vasilache                                       LLVMTypeConverter &typeConverter)
3475c0c51a9SNicolas Vasilache       : LLVMOpLowering(vector::TypeCastOp::getOperationName(), context,
3485c0c51a9SNicolas Vasilache                        typeConverter) {}
3495c0c51a9SNicolas Vasilache 
3505c0c51a9SNicolas Vasilache   PatternMatchResult
3515c0c51a9SNicolas Vasilache   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
3525c0c51a9SNicolas Vasilache                   ConversionPatternRewriter &rewriter) const override {
3535c0c51a9SNicolas Vasilache     auto loc = op->getLoc();
3545c0c51a9SNicolas Vasilache     vector::TypeCastOp castOp = cast<vector::TypeCastOp>(op);
3555c0c51a9SNicolas Vasilache     MemRefType sourceMemRefType =
3565c0c51a9SNicolas Vasilache         castOp.getOperand()->getType().cast<MemRefType>();
3575c0c51a9SNicolas Vasilache     MemRefType targetMemRefType =
3585c0c51a9SNicolas Vasilache         castOp.getResult()->getType().cast<MemRefType>();
3595c0c51a9SNicolas Vasilache 
3605c0c51a9SNicolas Vasilache     // Only static shape casts supported atm.
3615c0c51a9SNicolas Vasilache     if (!sourceMemRefType.hasStaticShape() ||
3625c0c51a9SNicolas Vasilache         !targetMemRefType.hasStaticShape())
3635c0c51a9SNicolas Vasilache       return matchFailure();
3645c0c51a9SNicolas Vasilache 
3655c0c51a9SNicolas Vasilache     auto llvmSourceDescriptorTy =
3665c0c51a9SNicolas Vasilache         operands[0]->getType().dyn_cast<LLVM::LLVMType>();
3675c0c51a9SNicolas Vasilache     if (!llvmSourceDescriptorTy || !llvmSourceDescriptorTy.isStructTy())
3685c0c51a9SNicolas Vasilache       return matchFailure();
3695c0c51a9SNicolas Vasilache     MemRefDescriptor sourceMemRef(operands[0]);
3705c0c51a9SNicolas Vasilache 
3715c0c51a9SNicolas Vasilache     auto llvmTargetDescriptorTy = lowering.convertType(targetMemRefType)
3725c0c51a9SNicolas Vasilache                                       .dyn_cast_or_null<LLVM::LLVMType>();
3735c0c51a9SNicolas Vasilache     if (!llvmTargetDescriptorTy || !llvmTargetDescriptorTy.isStructTy())
3745c0c51a9SNicolas Vasilache       return matchFailure();
3755c0c51a9SNicolas Vasilache 
3765c0c51a9SNicolas Vasilache     int64_t offset;
3775c0c51a9SNicolas Vasilache     SmallVector<int64_t, 4> strides;
3785c0c51a9SNicolas Vasilache     auto successStrides =
3795c0c51a9SNicolas Vasilache         getStridesAndOffset(sourceMemRefType, strides, offset);
3805c0c51a9SNicolas Vasilache     bool isContiguous = (strides.back() == 1);
3815c0c51a9SNicolas Vasilache     if (isContiguous) {
3825c0c51a9SNicolas Vasilache       auto sizes = sourceMemRefType.getShape();
3835c0c51a9SNicolas Vasilache       for (int index = 0, e = strides.size() - 2; index < e; ++index) {
3845c0c51a9SNicolas Vasilache         if (strides[index] != strides[index + 1] * sizes[index + 1]) {
3855c0c51a9SNicolas Vasilache           isContiguous = false;
3865c0c51a9SNicolas Vasilache           break;
3875c0c51a9SNicolas Vasilache         }
3885c0c51a9SNicolas Vasilache       }
3895c0c51a9SNicolas Vasilache     }
3905c0c51a9SNicolas Vasilache     // Only contiguous source tensors supported atm.
3915c0c51a9SNicolas Vasilache     if (failed(successStrides) || !isContiguous)
3925c0c51a9SNicolas Vasilache       return matchFailure();
3935c0c51a9SNicolas Vasilache 
3945c0c51a9SNicolas Vasilache     auto int64Ty = LLVM::LLVMType::getInt64Ty(lowering.getDialect());
3955c0c51a9SNicolas Vasilache 
3965c0c51a9SNicolas Vasilache     // Create descriptor.
3975c0c51a9SNicolas Vasilache     auto desc = MemRefDescriptor::undef(rewriter, loc, llvmTargetDescriptorTy);
3985c0c51a9SNicolas Vasilache     Type llvmTargetElementTy = desc.getElementType();
3995c0c51a9SNicolas Vasilache     // Set allocated ptr.
4005c0c51a9SNicolas Vasilache     Value *allocated = sourceMemRef.allocatedPtr(rewriter, loc);
4015c0c51a9SNicolas Vasilache     allocated =
4025c0c51a9SNicolas Vasilache         rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, allocated);
4035c0c51a9SNicolas Vasilache     desc.setAllocatedPtr(rewriter, loc, allocated);
4045c0c51a9SNicolas Vasilache     // Set aligned ptr.
4055c0c51a9SNicolas Vasilache     Value *ptr = sourceMemRef.alignedPtr(rewriter, loc);
4065c0c51a9SNicolas Vasilache     ptr = rewriter.create<LLVM::BitcastOp>(loc, llvmTargetElementTy, ptr);
4075c0c51a9SNicolas Vasilache     desc.setAlignedPtr(rewriter, loc, ptr);
4085c0c51a9SNicolas Vasilache     // Fill offset 0.
4095c0c51a9SNicolas Vasilache     auto attr = rewriter.getIntegerAttr(rewriter.getIndexType(), 0);
4105c0c51a9SNicolas Vasilache     auto zero = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, attr);
4115c0c51a9SNicolas Vasilache     desc.setOffset(rewriter, loc, zero);
4125c0c51a9SNicolas Vasilache 
4135c0c51a9SNicolas Vasilache     // Fill size and stride descriptors in memref.
4145c0c51a9SNicolas Vasilache     for (auto indexedSize : llvm::enumerate(targetMemRefType.getShape())) {
4155c0c51a9SNicolas Vasilache       int64_t index = indexedSize.index();
4165c0c51a9SNicolas Vasilache       auto sizeAttr =
4175c0c51a9SNicolas Vasilache           rewriter.getIntegerAttr(rewriter.getIndexType(), indexedSize.value());
4185c0c51a9SNicolas Vasilache       auto size = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, sizeAttr);
4195c0c51a9SNicolas Vasilache       desc.setSize(rewriter, loc, index, size);
4205c0c51a9SNicolas Vasilache       auto strideAttr =
4215c0c51a9SNicolas Vasilache           rewriter.getIntegerAttr(rewriter.getIndexType(), strides[index]);
4225c0c51a9SNicolas Vasilache       auto stride = rewriter.create<LLVM::ConstantOp>(loc, int64Ty, strideAttr);
4235c0c51a9SNicolas Vasilache       desc.setStride(rewriter, loc, index, stride);
4245c0c51a9SNicolas Vasilache     }
4255c0c51a9SNicolas Vasilache 
4265c0c51a9SNicolas Vasilache     rewriter.replaceOp(op, {desc});
4275c0c51a9SNicolas Vasilache     return matchSuccess();
4285c0c51a9SNicolas Vasilache   }
4295c0c51a9SNicolas Vasilache };
4305c0c51a9SNicolas Vasilache 
4315c0c51a9SNicolas Vasilache /// Populate the given list with patterns that convert from Vector to LLVM.
4325c0c51a9SNicolas Vasilache void mlir::populateVectorToLLVMConversionPatterns(
4335c0c51a9SNicolas Vasilache     LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
434*b36aaeafSAart Bik   patterns.insert<VectorBroadcastOpConversion, VectorExtractElementOpConversion,
4355c0c51a9SNicolas Vasilache                   VectorOuterProductOpConversion, VectorTypeCastOpConversion>(
4365c0c51a9SNicolas Vasilache       converter.getDialect()->getContext(), converter);
4375c0c51a9SNicolas Vasilache }
4385c0c51a9SNicolas Vasilache 
4395c0c51a9SNicolas Vasilache namespace {
4405c0c51a9SNicolas Vasilache struct LowerVectorToLLVMPass : public ModulePass<LowerVectorToLLVMPass> {
4415c0c51a9SNicolas Vasilache   void runOnModule() override;
4425c0c51a9SNicolas Vasilache };
4435c0c51a9SNicolas Vasilache } // namespace
4445c0c51a9SNicolas Vasilache 
4455c0c51a9SNicolas Vasilache void LowerVectorToLLVMPass::runOnModule() {
4465c0c51a9SNicolas Vasilache   // Convert to the LLVM IR dialect using the converter defined above.
4475c0c51a9SNicolas Vasilache   OwningRewritePatternList patterns;
4485c0c51a9SNicolas Vasilache   LLVMTypeConverter converter(&getContext());
4495c0c51a9SNicolas Vasilache   populateVectorToLLVMConversionPatterns(converter, patterns);
4505c0c51a9SNicolas Vasilache   populateStdToLLVMConversionPatterns(converter, patterns);
4515c0c51a9SNicolas Vasilache 
4525c0c51a9SNicolas Vasilache   ConversionTarget target(getContext());
4535c0c51a9SNicolas Vasilache   target.addLegalDialect<LLVM::LLVMDialect>();
4545c0c51a9SNicolas Vasilache   target.addDynamicallyLegalOp<FuncOp>(
4555c0c51a9SNicolas Vasilache       [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
4565c0c51a9SNicolas Vasilache   if (failed(
4575c0c51a9SNicolas Vasilache           applyPartialConversion(getModule(), target, patterns, &converter))) {
4585c0c51a9SNicolas Vasilache     signalPassFailure();
4595c0c51a9SNicolas Vasilache   }
4605c0c51a9SNicolas Vasilache }
4615c0c51a9SNicolas Vasilache 
4625c0c51a9SNicolas Vasilache OpPassBase<ModuleOp> *mlir::createLowerVectorToLLVMPass() {
4635c0c51a9SNicolas Vasilache   return new LowerVectorToLLVMPass();
4645c0c51a9SNicolas Vasilache }
4655c0c51a9SNicolas Vasilache 
4665c0c51a9SNicolas Vasilache static PassRegistration<LowerVectorToLLVMPass>
4675c0c51a9SNicolas Vasilache     pass("convert-vector-to-llvm",
4685c0c51a9SNicolas Vasilache          "Lower the operations from the vector dialect into the LLVM dialect");
469