1 //===- LegalizeForLLVMExport.cpp - Prepare AMX for LLVM translation ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "mlir/Dialect/AMX/Transforms.h"
10
11 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
12 #include "mlir/Conversion/LLVMCommon/Pattern.h"
13 #include "mlir/Dialect/AMX/AMXDialect.h"
14 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
15 #include "mlir/IR/BuiltinOps.h"
16 #include "mlir/IR/PatternMatch.h"
17
18 using namespace mlir;
19 using namespace mlir::amx;
20
21 namespace {
22
23 /// Maps the 2-dim vector shape to the two 16-bit tile sizes. The first
24 /// dimension directly translates into the number of rows of the tiles.
25 /// The second dimensions needs to be scaled by the number of bytes.
getTileSizes(ConversionPatternRewriter & rewriter,LLVMTypeConverter & typeConverter,VectorType vType,Location loc)26 std::pair<Value, Value> getTileSizes(ConversionPatternRewriter &rewriter,
27 LLVMTypeConverter &typeConverter,
28 VectorType vType, Location loc) {
29 Type llvmInt16Type = IntegerType::get(&typeConverter.getContext(), 16);
30 unsigned width = vType.getElementType().getIntOrFloatBitWidth();
31 assert(llvm::isPowerOf2_64(width) && width >= 8);
32 unsigned bytes = width >> 3;
33 auto mattr = rewriter.getI16IntegerAttr(vType.getDimSize(0));
34 auto nattr = rewriter.getI16IntegerAttr(vType.getDimSize(1) * bytes);
35 return std::make_pair(
36 rewriter.create<LLVM::ConstantOp>(loc, llvmInt16Type, mattr),
37 rewriter.create<LLVM::ConstantOp>(loc, llvmInt16Type, nattr));
38 }
39
40 /// Verifies if the stride matches proper tile access.
verifyStride(MemRefType mType)41 LogicalResult verifyStride(MemRefType mType) {
42 if (mType.getRank() < 2)
43 return failure();
44 int64_t last = mType.getRank() - 1;
45 int64_t offset;
46 SmallVector<int64_t, 4> strides;
47 if (failed(getStridesAndOffset(mType, strides, offset)) || strides[last] != 1)
48 return failure();
49 return success();
50 }
51
52 /// Maps the 2-dim memref shape to the 64-bit stride. Note that the buffer
53 /// shape may "envelop" the actual tile shape, and may be dynamically sized.
getStride(ConversionPatternRewriter & rewriter,LLVMTypeConverter & typeConverter,MemRefType mType,Value base,Location loc)54 Value getStride(ConversionPatternRewriter &rewriter,
55 LLVMTypeConverter &typeConverter, MemRefType mType, Value base,
56 Location loc) {
57 assert(mType.getRank() >= 2);
58 int64_t last = mType.getRank() - 1;
59 Type llvmInt64Type = IntegerType::get(&typeConverter.getContext(), 64);
60 unsigned width = mType.getElementType().getIntOrFloatBitWidth();
61 assert(llvm::isPowerOf2_64(width) && width >= 8);
62 unsigned bytes = width >> 3;
63 if (mType.isDynamicDim(last)) {
64 // Dynamic size needs code to compute the stride at runtime.
65 MemRefDescriptor memrefDescriptor(base);
66 auto attr = rewriter.getI64IntegerAttr(bytes);
67 Value scale = rewriter.create<LLVM::ConstantOp>(loc, llvmInt64Type, attr);
68 return rewriter.create<LLVM::MulOp>(
69 loc, llvmInt64Type, scale, memrefDescriptor.size(rewriter, loc, last));
70 }
71 // Use direct constant for static size.
72 auto attr = rewriter.getI64IntegerAttr(mType.getDimSize(last) * bytes);
73 return rewriter.create<LLVM::ConstantOp>(loc, llvmInt64Type, attr);
74 }
75
76 /// Cast any pointer to the !llvm.ptr<i8> pointer type.
castPtr(ConversionPatternRewriter & rewriter,Location loc,Value ptr)77 Value castPtr(ConversionPatternRewriter &rewriter, Location loc, Value ptr) {
78 auto i8Ptr =
79 LLVM::LLVMPointerType::get(IntegerType::get(ptr.getContext(), 8));
80 return rewriter.create<LLVM::BitcastOp>(loc, i8Ptr, ptr);
81 }
82
83 struct TileZeroConversion : public ConvertOpToLLVMPattern<TileZeroOp> {
84 using ConvertOpToLLVMPattern<TileZeroOp>::ConvertOpToLLVMPattern;
85 LogicalResult
matchAndRewrite__anona88611220111::TileZeroConversion86 matchAndRewrite(TileZeroOp op, OpAdaptor adaptor,
87 ConversionPatternRewriter &rewriter) const override {
88 VectorType vType = op.getVectorType();
89 // Determine m x n tile sizes.
90 std::pair<Value, Value> tsz =
91 getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc());
92 // Replace operation with intrinsic.
93 Type resType = typeConverter->convertType(vType);
94 rewriter.replaceOpWithNewOp<amx::x86_amx_tilezero>(op, resType, tsz.first,
95 tsz.second);
96 return success();
97 }
98 };
99
100 struct TileLoadConversion : public ConvertOpToLLVMPattern<TileLoadOp> {
101 using ConvertOpToLLVMPattern<TileLoadOp>::ConvertOpToLLVMPattern;
102
103 LogicalResult
matchAndRewrite__anona88611220111::TileLoadConversion104 matchAndRewrite(TileLoadOp op, OpAdaptor adaptor,
105 ConversionPatternRewriter &rewriter) const override {
106 MemRefType mType = op.getMemRefType();
107 VectorType vType = op.getVectorType();
108 // Determine m x n tile sizes.
109 std::pair<Value, Value> tsz =
110 getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc());
111 // Determine stride.
112 if (failed(verifyStride(mType)))
113 return failure();
114 Value stride = getStride(rewriter, *getTypeConverter(), mType,
115 adaptor.getBase(), op.getLoc());
116 // Replace operation with intrinsic.
117 Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(),
118 adaptor.getIndices(), rewriter);
119 ptr = castPtr(rewriter, op.getLoc(), ptr);
120 Type resType = typeConverter->convertType(vType);
121 rewriter.replaceOpWithNewOp<amx::x86_amx_tileloadd64>(
122 op, resType, tsz.first, tsz.second, ptr, stride);
123 return success();
124 }
125 };
126
127 struct TileStoreConversion : public ConvertOpToLLVMPattern<TileStoreOp> {
128 using ConvertOpToLLVMPattern<TileStoreOp>::ConvertOpToLLVMPattern;
129
130 LogicalResult
matchAndRewrite__anona88611220111::TileStoreConversion131 matchAndRewrite(TileStoreOp op, OpAdaptor adaptor,
132 ConversionPatternRewriter &rewriter) const override {
133 MemRefType mType = op.getMemRefType();
134 VectorType vType = op.getVectorType();
135 // Determine m x n tile sizes.
136 std::pair<Value, Value> tsz =
137 getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc());
138 // Determine stride.
139 if (failed(verifyStride(mType)))
140 return failure();
141 Value stride = getStride(rewriter, *getTypeConverter(), mType,
142 adaptor.getBase(), op.getLoc());
143 // Replace operation with intrinsic.
144 Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(),
145 adaptor.getIndices(), rewriter);
146 ptr = castPtr(rewriter, op.getLoc(), ptr);
147 rewriter.replaceOpWithNewOp<amx::x86_amx_tilestored64>(
148 op, tsz.first, tsz.second, ptr, stride, adaptor.getVal());
149 return success();
150 }
151 };
152
153 struct TileMulFConversion : public ConvertOpToLLVMPattern<TileMulFOp> {
154 using ConvertOpToLLVMPattern<TileMulFOp>::ConvertOpToLLVMPattern;
155 LogicalResult
matchAndRewrite__anona88611220111::TileMulFConversion156 matchAndRewrite(TileMulFOp op, OpAdaptor adaptor,
157 ConversionPatternRewriter &rewriter) const override {
158 VectorType aType = op.getLhsVectorType();
159 VectorType bType = op.getRhsVectorType();
160 VectorType cType = op.getVectorType();
161 // Determine m x n x k tile sizes.
162 std::pair<Value, Value> tsza =
163 getTileSizes(rewriter, *getTypeConverter(), aType, op.getLoc());
164 std::pair<Value, Value> tszb =
165 getTileSizes(rewriter, *getTypeConverter(), bType, op.getLoc());
166 // Replace operation with intrinsic.
167 Type resType = typeConverter->convertType(cType);
168 rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbf16ps>(
169 op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
170 adaptor.getLhs(), adaptor.getRhs());
171 return success();
172 }
173 };
174
175 struct TileMulIConversion : public ConvertOpToLLVMPattern<TileMulIOp> {
176 using ConvertOpToLLVMPattern<TileMulIOp>::ConvertOpToLLVMPattern;
177 LogicalResult
matchAndRewrite__anona88611220111::TileMulIConversion178 matchAndRewrite(TileMulIOp op, OpAdaptor adaptor,
179 ConversionPatternRewriter &rewriter) const override {
180 VectorType aType = op.getLhsVectorType();
181 VectorType bType = op.getRhsVectorType();
182 VectorType cType = op.getVectorType();
183 // Determine m x n x k tile sizes.
184 std::pair<Value, Value> tsza =
185 getTileSizes(rewriter, *getTypeConverter(), aType, op.getLoc());
186 std::pair<Value, Value> tszb =
187 getTileSizes(rewriter, *getTypeConverter(), bType, op.getLoc());
188 // Replace operation with intrinsic.
189 Type resType = typeConverter->convertType(cType);
190 bool zexta = op.getIsZextLhs();
191 bool zextb = op.getIsZextRhs();
192 if (zexta && zextb)
193 rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbuud>(
194 op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
195 adaptor.getLhs(), adaptor.getRhs());
196 else if (zexta && !zextb)
197 rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbusd>(
198 op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
199 adaptor.getLhs(), adaptor.getRhs());
200 else if (!zexta && zextb)
201 rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbsud>(
202 op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
203 adaptor.getLhs(), adaptor.getRhs());
204 else
205 rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbssd>(
206 op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
207 adaptor.getLhs(), adaptor.getRhs());
208 return success();
209 }
210 };
211
212 } // namespace
213
populateAMXLegalizeForLLVMExportPatterns(LLVMTypeConverter & converter,RewritePatternSet & patterns)214 void mlir::populateAMXLegalizeForLLVMExportPatterns(
215 LLVMTypeConverter &converter, RewritePatternSet &patterns) {
216 patterns.add<TileZeroConversion, TileLoadConversion, TileStoreConversion,
217 TileMulFConversion, TileMulIConversion>(converter);
218 }
219
configureAMXLegalizeForExportTarget(LLVMConversionTarget & target)220 void mlir::configureAMXLegalizeForExportTarget(LLVMConversionTarget &target) {
221 target.addLegalOp<x86_amx_tilezero, x86_amx_tileloadd64, x86_amx_tilestored64,
222 x86_amx_tdpbf16ps, x86_amx_tdpbssd, x86_amx_tdpbsud,
223 x86_amx_tdpbusd, x86_amx_tdpbuud>();
224 target.addIllegalOp<TileZeroOp, TileLoadOp, TileStoreOp, TileMulIOp,
225 TileMulFOp>();
226 }
227