1fa90c9d5SSaurabh Jha"""This file contains benchmarks for sparse tensors. In particular, it
2fa90c9d5SSaurabh Jhacontains benchmarks for both mlir sparse tensor dialect and numpy so that they
3fa90c9d5SSaurabh Jhacan be compared against each other.
4fa90c9d5SSaurabh Jha"""
5fa90c9d5SSaurabh Jhaimport ctypes
6fa90c9d5SSaurabh Jhaimport numpy as np
7fa90c9d5SSaurabh Jhaimport os
8fa90c9d5SSaurabh Jhaimport re
9fa90c9d5SSaurabh Jhaimport time
10fa90c9d5SSaurabh Jha
11fa90c9d5SSaurabh Jhafrom mlir import ir
12fa90c9d5SSaurabh Jhafrom mlir import runtime as rt
13*5da5483fSIngo Müllerfrom mlir.dialects import func
14fa90c9d5SSaurabh Jhafrom mlir.dialects.linalg.opdsl import lang as dsl
15fa90c9d5SSaurabh Jhafrom mlir.execution_engine import ExecutionEngine
16fa90c9d5SSaurabh Jha
17fa90c9d5SSaurabh Jhafrom common import create_sparse_np_tensor
18fa90c9d5SSaurabh Jhafrom common import emit_timer_func
19fa90c9d5SSaurabh Jhafrom common import emit_benchmark_wrapped_main_func
20fa90c9d5SSaurabh Jhafrom common import get_kernel_func_from_module
21fa90c9d5SSaurabh Jhafrom common import setup_passes
22fa90c9d5SSaurabh Jha
23fa90c9d5SSaurabh Jha
24fa90c9d5SSaurabh Jha@dsl.linalg_structured_op
25fa90c9d5SSaurabh Jhadef matmul_dsl(
26fa90c9d5SSaurabh Jha    A=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.K),
27fa90c9d5SSaurabh Jha    B=dsl.TensorDef(dsl.T, dsl.S.K, dsl.S.N),
28fa90c9d5SSaurabh Jha    C=dsl.TensorDef(dsl.T, dsl.S.M, dsl.S.N, output=True)
29fa90c9d5SSaurabh Jha):
30fa90c9d5SSaurabh Jha    """Helper function for mlir sparse matrix multiplication benchmark."""
31fa90c9d5SSaurabh Jha    C[dsl.D.m, dsl.D.n] += A[dsl.D.m, dsl.D.k] * B[dsl.D.k, dsl.D.n]
32fa90c9d5SSaurabh Jha
33fa90c9d5SSaurabh Jha
34fa90c9d5SSaurabh Jhadef benchmark_sparse_mlir_multiplication():
35fa90c9d5SSaurabh Jha    """Benchmark for mlir sparse matrix multiplication. Because its an
36fa90c9d5SSaurabh Jha    MLIR benchmark we need to return both a `compiler` function and a `runner`
37fa90c9d5SSaurabh Jha    function.
38fa90c9d5SSaurabh Jha    """
39fa90c9d5SSaurabh Jha    with ir.Context(), ir.Location.unknown():
40fa90c9d5SSaurabh Jha        module = ir.Module.create()
41fa90c9d5SSaurabh Jha        f64 = ir.F64Type.get()
42fa90c9d5SSaurabh Jha        param1_type = ir.RankedTensorType.get([1000, 1500], f64)
43fa90c9d5SSaurabh Jha        param2_type = ir.RankedTensorType.get([1500, 2000], f64)
44fa90c9d5SSaurabh Jha        result_type = ir.RankedTensorType.get([1000, 2000], f64)
45fa90c9d5SSaurabh Jha        with ir.InsertionPoint(module.body):
4636550692SRiver Riddle            @func.FuncOp.from_py_func(param1_type, param2_type, result_type)
47fa90c9d5SSaurabh Jha            def sparse_kernel(x, y, z):
48fa90c9d5SSaurabh Jha                return matmul_dsl(x, y, outs=[z])
49fa90c9d5SSaurabh Jha
50fa90c9d5SSaurabh Jha    def compiler():
51fa90c9d5SSaurabh Jha        with ir.Context(), ir.Location.unknown():
52fa90c9d5SSaurabh Jha            kernel_func = get_kernel_func_from_module(module)
53fa90c9d5SSaurabh Jha            timer_func = emit_timer_func()
54fa90c9d5SSaurabh Jha            wrapped_func = emit_benchmark_wrapped_main_func(
55fa90c9d5SSaurabh Jha                kernel_func,
56fa90c9d5SSaurabh Jha                timer_func
57fa90c9d5SSaurabh Jha            )
58fa90c9d5SSaurabh Jha            main_module_with_benchmark = ir.Module.parse(
59fa90c9d5SSaurabh Jha                str(timer_func) + str(wrapped_func) + str(kernel_func)
60fa90c9d5SSaurabh Jha            )
61fa90c9d5SSaurabh Jha            setup_passes(main_module_with_benchmark)
62fa90c9d5SSaurabh Jha            c_runner_utils = os.getenv("MLIR_C_RUNNER_UTILS", "")
63fa90c9d5SSaurabh Jha            assert os.path.exists(c_runner_utils),\
64fa90c9d5SSaurabh Jha                f"{c_runner_utils} does not exist." \
65fa90c9d5SSaurabh Jha                f" Please pass a valid value for" \
66fa90c9d5SSaurabh Jha                f" MLIR_C_RUNNER_UTILS environment variable."
67fa90c9d5SSaurabh Jha            runner_utils = os.getenv("MLIR_RUNNER_UTILS", "")
68fa90c9d5SSaurabh Jha            assert os.path.exists(runner_utils),\
69fa90c9d5SSaurabh Jha                f"{runner_utils} does not exist." \
70fa90c9d5SSaurabh Jha                f" Please pass a valid value for MLIR_RUNNER_UTILS" \
71fa90c9d5SSaurabh Jha                f" environment variable."
72fa90c9d5SSaurabh Jha
73fa90c9d5SSaurabh Jha            engine = ExecutionEngine(
74fa90c9d5SSaurabh Jha                main_module_with_benchmark,
75fa90c9d5SSaurabh Jha                3,
76fa90c9d5SSaurabh Jha                shared_libs=[c_runner_utils, runner_utils]
77fa90c9d5SSaurabh Jha            )
78fa90c9d5SSaurabh Jha            return engine.invoke
79fa90c9d5SSaurabh Jha
80fa90c9d5SSaurabh Jha    def runner(engine_invoke):
81fa90c9d5SSaurabh Jha        compiled_program_args = []
82fa90c9d5SSaurabh Jha        for argument_type in [
83fa90c9d5SSaurabh Jha            result_type, param1_type, param2_type, result_type
84fa90c9d5SSaurabh Jha        ]:
85fa90c9d5SSaurabh Jha            argument_type_str = str(argument_type)
86fa90c9d5SSaurabh Jha            dimensions_str = re.sub("<|>|tensor", "", argument_type_str)
87fa90c9d5SSaurabh Jha            dimensions = [int(dim) for dim in dimensions_str.split("x")[:-1]]
88fa90c9d5SSaurabh Jha            if argument_type == result_type:
89fa90c9d5SSaurabh Jha                argument = np.zeros(dimensions, np.float64)
90fa90c9d5SSaurabh Jha            else:
91fa90c9d5SSaurabh Jha                argument = create_sparse_np_tensor(dimensions, 1000)
92fa90c9d5SSaurabh Jha            compiled_program_args.append(
93fa90c9d5SSaurabh Jha                ctypes.pointer(
94fa90c9d5SSaurabh Jha                    ctypes.pointer(rt.get_ranked_memref_descriptor(argument))
95fa90c9d5SSaurabh Jha                )
96fa90c9d5SSaurabh Jha            )
97fa90c9d5SSaurabh Jha        np_timers_ns = np.array([0], dtype=np.int64)
98fa90c9d5SSaurabh Jha        compiled_program_args.append(
99fa90c9d5SSaurabh Jha            ctypes.pointer(
100fa90c9d5SSaurabh Jha                ctypes.pointer(rt.get_ranked_memref_descriptor(np_timers_ns))
101fa90c9d5SSaurabh Jha            )
102fa90c9d5SSaurabh Jha        )
103fa90c9d5SSaurabh Jha        engine_invoke("main", *compiled_program_args)
104fa90c9d5SSaurabh Jha        return int(np_timers_ns[0])
105fa90c9d5SSaurabh Jha
106fa90c9d5SSaurabh Jha    return compiler, runner
107fa90c9d5SSaurabh Jha
108fa90c9d5SSaurabh Jha
109fa90c9d5SSaurabh Jhadef benchmark_np_matrix_multiplication():
110fa90c9d5SSaurabh Jha    """Benchmark for numpy matrix multiplication. Because its a python
111fa90c9d5SSaurabh Jha    benchmark, we don't have any `compiler` function returned. We just return
112fa90c9d5SSaurabh Jha    the `runner` function.
113fa90c9d5SSaurabh Jha    """
114fa90c9d5SSaurabh Jha    def runner():
115fa90c9d5SSaurabh Jha        argument1 = np.random.uniform(low=0.0, high=100.0, size=(1000, 1500))
116fa90c9d5SSaurabh Jha        argument2 = np.random.uniform(low=0.0, high=100.0, size=(1500, 2000))
117fa90c9d5SSaurabh Jha        start_time = time.time_ns()
118fa90c9d5SSaurabh Jha        np.matmul(argument1, argument2)
119fa90c9d5SSaurabh Jha        return time.time_ns() - start_time
120fa90c9d5SSaurabh Jha
121fa90c9d5SSaurabh Jha    return None, runner
122