16ad7b97eSAart Bik //===- LegalizeForLLVMExport.cpp - Prepare AMX for LLVM translation ----===//
26ad7b97eSAart Bik //
36ad7b97eSAart Bik // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
46ad7b97eSAart Bik // See https://llvm.org/LICENSE.txt for license information.
56ad7b97eSAart Bik // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66ad7b97eSAart Bik //
76ad7b97eSAart Bik //===----------------------------------------------------------------------===//
86ad7b97eSAart Bik 
96ad7b97eSAart Bik #include "mlir/Dialect/AMX/Transforms.h"
106ad7b97eSAart Bik 
1175e5f0aaSAlex Zinenko #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
1275e5f0aaSAlex Zinenko #include "mlir/Conversion/LLVMCommon/Pattern.h"
136ad7b97eSAart Bik #include "mlir/Dialect/AMX/AMXDialect.h"
146ad7b97eSAart Bik #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
156ad7b97eSAart Bik #include "mlir/IR/BuiltinOps.h"
166ad7b97eSAart Bik #include "mlir/IR/PatternMatch.h"
176ad7b97eSAart Bik 
186ad7b97eSAart Bik using namespace mlir;
196ad7b97eSAart Bik using namespace mlir::amx;
206ad7b97eSAart Bik 
216ad7b97eSAart Bik namespace {
226ad7b97eSAart Bik 
236ad7b97eSAart Bik /// Maps the 2-dim vector shape to the two 16-bit tile sizes. The first
246ad7b97eSAart Bik /// dimension directly translates into the number of rows of the tiles.
256ad7b97eSAart Bik /// The second dimensions needs to be scaled by the number of bytes.
getTileSizes(ConversionPatternRewriter & rewriter,LLVMTypeConverter & typeConverter,VectorType vType,Location loc)266ad7b97eSAart Bik std::pair<Value, Value> getTileSizes(ConversionPatternRewriter &rewriter,
276ad7b97eSAart Bik                                      LLVMTypeConverter &typeConverter,
286ad7b97eSAart Bik                                      VectorType vType, Location loc) {
296ad7b97eSAart Bik   Type llvmInt16Type = IntegerType::get(&typeConverter.getContext(), 16);
306ad7b97eSAart Bik   unsigned width = vType.getElementType().getIntOrFloatBitWidth();
316ad7b97eSAart Bik   assert(llvm::isPowerOf2_64(width) && width >= 8);
326ad7b97eSAart Bik   unsigned bytes = width >> 3;
336ad7b97eSAart Bik   auto mattr = rewriter.getI16IntegerAttr(vType.getDimSize(0));
346ad7b97eSAart Bik   auto nattr = rewriter.getI16IntegerAttr(vType.getDimSize(1) * bytes);
356ad7b97eSAart Bik   return std::make_pair(
366ad7b97eSAart Bik       rewriter.create<LLVM::ConstantOp>(loc, llvmInt16Type, mattr),
376ad7b97eSAart Bik       rewriter.create<LLVM::ConstantOp>(loc, llvmInt16Type, nattr));
386ad7b97eSAart Bik }
396ad7b97eSAart Bik 
406ad7b97eSAart Bik /// Verifies if the stride matches proper tile access.
verifyStride(MemRefType mType)416ad7b97eSAart Bik LogicalResult verifyStride(MemRefType mType) {
426ad7b97eSAart Bik   if (mType.getRank() < 2)
436ad7b97eSAart Bik     return failure();
446ad7b97eSAart Bik   int64_t last = mType.getRank() - 1;
456ad7b97eSAart Bik   int64_t offset;
466ad7b97eSAart Bik   SmallVector<int64_t, 4> strides;
476ad7b97eSAart Bik   if (failed(getStridesAndOffset(mType, strides, offset)) || strides[last] != 1)
486ad7b97eSAart Bik     return failure();
496ad7b97eSAart Bik   return success();
506ad7b97eSAart Bik }
516ad7b97eSAart Bik 
526ad7b97eSAart Bik /// Maps the 2-dim memref shape to the 64-bit stride. Note that the buffer
536ad7b97eSAart Bik /// shape may "envelop" the actual tile shape, and may be dynamically sized.
getStride(ConversionPatternRewriter & rewriter,LLVMTypeConverter & typeConverter,MemRefType mType,Value base,Location loc)546ad7b97eSAart Bik Value getStride(ConversionPatternRewriter &rewriter,
556ad7b97eSAart Bik                 LLVMTypeConverter &typeConverter, MemRefType mType, Value base,
566ad7b97eSAart Bik                 Location loc) {
576ad7b97eSAart Bik   assert(mType.getRank() >= 2);
586ad7b97eSAart Bik   int64_t last = mType.getRank() - 1;
596ad7b97eSAart Bik   Type llvmInt64Type = IntegerType::get(&typeConverter.getContext(), 64);
606ad7b97eSAart Bik   unsigned width = mType.getElementType().getIntOrFloatBitWidth();
616ad7b97eSAart Bik   assert(llvm::isPowerOf2_64(width) && width >= 8);
626ad7b97eSAart Bik   unsigned bytes = width >> 3;
636ad7b97eSAart Bik   if (mType.isDynamicDim(last)) {
646ad7b97eSAart Bik     // Dynamic size needs code to compute the stride at runtime.
656ad7b97eSAart Bik     MemRefDescriptor memrefDescriptor(base);
666ad7b97eSAart Bik     auto attr = rewriter.getI64IntegerAttr(bytes);
676ad7b97eSAart Bik     Value scale = rewriter.create<LLVM::ConstantOp>(loc, llvmInt64Type, attr);
686ad7b97eSAart Bik     return rewriter.create<LLVM::MulOp>(
696ad7b97eSAart Bik         loc, llvmInt64Type, scale, memrefDescriptor.size(rewriter, loc, last));
706ad7b97eSAart Bik   }
716ad7b97eSAart Bik   // Use direct constant for static size.
726ad7b97eSAart Bik   auto attr = rewriter.getI64IntegerAttr(mType.getDimSize(last) * bytes);
736ad7b97eSAart Bik   return rewriter.create<LLVM::ConstantOp>(loc, llvmInt64Type, attr);
746ad7b97eSAart Bik }
756ad7b97eSAart Bik 
766ad7b97eSAart Bik /// Cast any pointer to the !llvm.ptr<i8> pointer type.
castPtr(ConversionPatternRewriter & rewriter,Location loc,Value ptr)776ad7b97eSAart Bik Value castPtr(ConversionPatternRewriter &rewriter, Location loc, Value ptr) {
786ad7b97eSAart Bik   auto i8Ptr =
796ad7b97eSAart Bik       LLVM::LLVMPointerType::get(IntegerType::get(ptr.getContext(), 8));
806ad7b97eSAart Bik   return rewriter.create<LLVM::BitcastOp>(loc, i8Ptr, ptr);
816ad7b97eSAart Bik }
826ad7b97eSAart Bik 
836ad7b97eSAart Bik struct TileZeroConversion : public ConvertOpToLLVMPattern<TileZeroOp> {
846ad7b97eSAart Bik   using ConvertOpToLLVMPattern<TileZeroOp>::ConvertOpToLLVMPattern;
856ad7b97eSAart Bik   LogicalResult
matchAndRewrite__anona88611220111::TileZeroConversion86b54c724bSRiver Riddle   matchAndRewrite(TileZeroOp op, OpAdaptor adaptor,
876ad7b97eSAart Bik                   ConversionPatternRewriter &rewriter) const override {
886ad7b97eSAart Bik     VectorType vType = op.getVectorType();
896ad7b97eSAart Bik     // Determine m x n tile sizes.
906ad7b97eSAart Bik     std::pair<Value, Value> tsz =
916ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc());
926ad7b97eSAart Bik     // Replace operation with intrinsic.
936ad7b97eSAart Bik     Type resType = typeConverter->convertType(vType);
946ad7b97eSAart Bik     rewriter.replaceOpWithNewOp<amx::x86_amx_tilezero>(op, resType, tsz.first,
956ad7b97eSAart Bik                                                        tsz.second);
966ad7b97eSAart Bik     return success();
976ad7b97eSAart Bik   }
986ad7b97eSAart Bik };
996ad7b97eSAart Bik 
1006ad7b97eSAart Bik struct TileLoadConversion : public ConvertOpToLLVMPattern<TileLoadOp> {
1016ad7b97eSAart Bik   using ConvertOpToLLVMPattern<TileLoadOp>::ConvertOpToLLVMPattern;
1026ad7b97eSAart Bik 
1036ad7b97eSAart Bik   LogicalResult
matchAndRewrite__anona88611220111::TileLoadConversion104b54c724bSRiver Riddle   matchAndRewrite(TileLoadOp op, OpAdaptor adaptor,
1056ad7b97eSAart Bik                   ConversionPatternRewriter &rewriter) const override {
1066ad7b97eSAart Bik     MemRefType mType = op.getMemRefType();
1076ad7b97eSAart Bik     VectorType vType = op.getVectorType();
1086ad7b97eSAart Bik     // Determine m x n tile sizes.
1096ad7b97eSAart Bik     std::pair<Value, Value> tsz =
1106ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc());
1116ad7b97eSAart Bik     // Determine stride.
1126ad7b97eSAart Bik     if (failed(verifyStride(mType)))
1136ad7b97eSAart Bik       return failure();
1146ad7b97eSAart Bik     Value stride = getStride(rewriter, *getTypeConverter(), mType,
115*8df54a6aSJacques Pienaar                              adaptor.getBase(), op.getLoc());
1166ad7b97eSAart Bik     // Replace operation with intrinsic.
117*8df54a6aSJacques Pienaar     Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(),
118*8df54a6aSJacques Pienaar                                      adaptor.getIndices(), rewriter);
1196ad7b97eSAart Bik     ptr = castPtr(rewriter, op.getLoc(), ptr);
1206ad7b97eSAart Bik     Type resType = typeConverter->convertType(vType);
1216ad7b97eSAart Bik     rewriter.replaceOpWithNewOp<amx::x86_amx_tileloadd64>(
1226ad7b97eSAart Bik         op, resType, tsz.first, tsz.second, ptr, stride);
1236ad7b97eSAart Bik     return success();
1246ad7b97eSAart Bik   }
1256ad7b97eSAart Bik };
1266ad7b97eSAart Bik 
1276ad7b97eSAart Bik struct TileStoreConversion : public ConvertOpToLLVMPattern<TileStoreOp> {
1286ad7b97eSAart Bik   using ConvertOpToLLVMPattern<TileStoreOp>::ConvertOpToLLVMPattern;
1296ad7b97eSAart Bik 
1306ad7b97eSAart Bik   LogicalResult
matchAndRewrite__anona88611220111::TileStoreConversion131b54c724bSRiver Riddle   matchAndRewrite(TileStoreOp op, OpAdaptor adaptor,
1326ad7b97eSAart Bik                   ConversionPatternRewriter &rewriter) const override {
1336ad7b97eSAart Bik     MemRefType mType = op.getMemRefType();
1346ad7b97eSAart Bik     VectorType vType = op.getVectorType();
1356ad7b97eSAart Bik     // Determine m x n tile sizes.
1366ad7b97eSAart Bik     std::pair<Value, Value> tsz =
1376ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc());
1386ad7b97eSAart Bik     // Determine stride.
1396ad7b97eSAart Bik     if (failed(verifyStride(mType)))
1406ad7b97eSAart Bik       return failure();
1416ad7b97eSAart Bik     Value stride = getStride(rewriter, *getTypeConverter(), mType,
142*8df54a6aSJacques Pienaar                              adaptor.getBase(), op.getLoc());
1436ad7b97eSAart Bik     // Replace operation with intrinsic.
144*8df54a6aSJacques Pienaar     Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(),
145*8df54a6aSJacques Pienaar                                      adaptor.getIndices(), rewriter);
1466ad7b97eSAart Bik     ptr = castPtr(rewriter, op.getLoc(), ptr);
1476ad7b97eSAart Bik     rewriter.replaceOpWithNewOp<amx::x86_amx_tilestored64>(
148*8df54a6aSJacques Pienaar         op, tsz.first, tsz.second, ptr, stride, adaptor.getVal());
1496ad7b97eSAart Bik     return success();
1506ad7b97eSAart Bik   }
1516ad7b97eSAart Bik };
1526ad7b97eSAart Bik 
1536ad7b97eSAart Bik struct TileMulFConversion : public ConvertOpToLLVMPattern<TileMulFOp> {
1546ad7b97eSAart Bik   using ConvertOpToLLVMPattern<TileMulFOp>::ConvertOpToLLVMPattern;
1556ad7b97eSAart Bik   LogicalResult
matchAndRewrite__anona88611220111::TileMulFConversion156b54c724bSRiver Riddle   matchAndRewrite(TileMulFOp op, OpAdaptor adaptor,
1576ad7b97eSAart Bik                   ConversionPatternRewriter &rewriter) const override {
1586ad7b97eSAart Bik     VectorType aType = op.getLhsVectorType();
1596ad7b97eSAart Bik     VectorType bType = op.getRhsVectorType();
1606ad7b97eSAart Bik     VectorType cType = op.getVectorType();
1616ad7b97eSAart Bik     // Determine m x n x k tile sizes.
1626ad7b97eSAart Bik     std::pair<Value, Value> tsza =
1636ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), aType, op.getLoc());
1646ad7b97eSAart Bik     std::pair<Value, Value> tszb =
1656ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), bType, op.getLoc());
1666ad7b97eSAart Bik     // Replace operation with intrinsic.
1676ad7b97eSAart Bik     Type resType = typeConverter->convertType(cType);
1686ad7b97eSAart Bik     rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbf16ps>(
169*8df54a6aSJacques Pienaar         op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
170*8df54a6aSJacques Pienaar         adaptor.getLhs(), adaptor.getRhs());
1716ad7b97eSAart Bik     return success();
1726ad7b97eSAart Bik   }
1736ad7b97eSAart Bik };
1746ad7b97eSAart Bik 
1756ad7b97eSAart Bik struct TileMulIConversion : public ConvertOpToLLVMPattern<TileMulIOp> {
1766ad7b97eSAart Bik   using ConvertOpToLLVMPattern<TileMulIOp>::ConvertOpToLLVMPattern;
1776ad7b97eSAart Bik   LogicalResult
matchAndRewrite__anona88611220111::TileMulIConversion178b54c724bSRiver Riddle   matchAndRewrite(TileMulIOp op, OpAdaptor adaptor,
1796ad7b97eSAart Bik                   ConversionPatternRewriter &rewriter) const override {
1806ad7b97eSAart Bik     VectorType aType = op.getLhsVectorType();
1816ad7b97eSAart Bik     VectorType bType = op.getRhsVectorType();
1826ad7b97eSAart Bik     VectorType cType = op.getVectorType();
1836ad7b97eSAart Bik     // Determine m x n x k tile sizes.
1846ad7b97eSAart Bik     std::pair<Value, Value> tsza =
1856ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), aType, op.getLoc());
1866ad7b97eSAart Bik     std::pair<Value, Value> tszb =
1876ad7b97eSAart Bik         getTileSizes(rewriter, *getTypeConverter(), bType, op.getLoc());
1886ad7b97eSAart Bik     // Replace operation with intrinsic.
1896ad7b97eSAart Bik     Type resType = typeConverter->convertType(cType);
190*8df54a6aSJacques Pienaar     bool zexta = op.getIsZextLhs();
191*8df54a6aSJacques Pienaar     bool zextb = op.getIsZextRhs();
1926ad7b97eSAart Bik     if (zexta && zextb)
1936ad7b97eSAart Bik       rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbuud>(
194*8df54a6aSJacques Pienaar           op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
195*8df54a6aSJacques Pienaar           adaptor.getLhs(), adaptor.getRhs());
1966ad7b97eSAart Bik     else if (zexta && !zextb)
1976ad7b97eSAart Bik       rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbusd>(
198*8df54a6aSJacques Pienaar           op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
199*8df54a6aSJacques Pienaar           adaptor.getLhs(), adaptor.getRhs());
2006ad7b97eSAart Bik     else if (!zexta && zextb)
2016ad7b97eSAart Bik       rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbsud>(
202*8df54a6aSJacques Pienaar           op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
203*8df54a6aSJacques Pienaar           adaptor.getLhs(), adaptor.getRhs());
2046ad7b97eSAart Bik     else
2056ad7b97eSAart Bik       rewriter.replaceOpWithNewOp<amx::x86_amx_tdpbssd>(
206*8df54a6aSJacques Pienaar           op, resType, tsza.first, tszb.second, tsza.second, adaptor.getAcc(),
207*8df54a6aSJacques Pienaar           adaptor.getLhs(), adaptor.getRhs());
2086ad7b97eSAart Bik     return success();
2096ad7b97eSAart Bik   }
2106ad7b97eSAart Bik };
2116ad7b97eSAart Bik 
2126ad7b97eSAart Bik } // namespace
2136ad7b97eSAart Bik 
populateAMXLegalizeForLLVMExportPatterns(LLVMTypeConverter & converter,RewritePatternSet & patterns)2146ad7b97eSAart Bik void mlir::populateAMXLegalizeForLLVMExportPatterns(
215dc4e913bSChris Lattner     LLVMTypeConverter &converter, RewritePatternSet &patterns) {
216dc4e913bSChris Lattner   patterns.add<TileZeroConversion, TileLoadConversion, TileStoreConversion,
2176ad7b97eSAart Bik                TileMulFConversion, TileMulIConversion>(converter);
2186ad7b97eSAart Bik }
2196ad7b97eSAart Bik 
configureAMXLegalizeForExportTarget(LLVMConversionTarget & target)2206ad7b97eSAart Bik void mlir::configureAMXLegalizeForExportTarget(LLVMConversionTarget &target) {
2216ad7b97eSAart Bik   target.addLegalOp<x86_amx_tilezero, x86_amx_tileloadd64, x86_amx_tilestored64,
2226ad7b97eSAart Bik                     x86_amx_tdpbf16ps, x86_amx_tdpbssd, x86_amx_tdpbsud,
2236ad7b97eSAart Bik                     x86_amx_tdpbusd, x86_amx_tdpbuud>();
2246ad7b97eSAart Bik   target.addIllegalOp<TileZeroOp, TileLoadOp, TileStoreOp, TileMulIOp,
2256ad7b97eSAart Bik                       TileMulFOp>();
2266ad7b97eSAart Bik }
227