1// RUN: mlir-opt %s \
2// RUN:   --sparsification --sparse-tensor-conversion \
3// RUN:   --convert-vector-to-scf --convert-scf-to-std \
4// RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
5// RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
6// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \
7// RUN: mlir-cpu-runner \
8// RUN:  -e entry -entry-point-result=void  \
9// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
10// RUN: FileCheck %s
11//
12// Do the same run, but now with SIMDization as well. This should not change the outcome.
13//
14// RUN: mlir-opt %s \
15// RUN:   --sparsification="vectorization-strategy=2 vl=4" --sparse-tensor-conversion \
16// RUN:   --convert-vector-to-scf --convert-scf-to-std \
17// RUN:   --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
18// RUN:   --std-bufferize --finalizing-bufferize --lower-affine \
19// RUN:   --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \
20// RUN: mlir-cpu-runner \
21// RUN:  -e entry -entry-point-result=void  \
22// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
23// RUN: FileCheck %s
24
25#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
26
27#trait_scale = {
28  indexing_maps = [
29    affine_map<(i,j) -> (i,j)>   // X (out)
30  ],
31  iterator_types = ["parallel", "parallel"],
32  doc = "X(i,j) = X(i,j) * 2"
33}
34
35//
36// Integration test that lowers a kernel annotated as sparse to actual sparse
37// code, initializes a matching sparse storage scheme from a dense tensor,
38// and runs the resulting code with the JIT compiler.
39//
40module {
41  //
42  // A kernel that scales a sparse matrix A by a factor of 2.0.
43  //
44  func @sparse_scale(%argx: tensor<8x8xf32, #CSR>
45                     {linalg.inplaceable = true}) -> tensor<8x8xf32, #CSR> {
46    %c = arith.constant 2.0 : f32
47    %0 = linalg.generic #trait_scale
48      outs(%argx: tensor<8x8xf32, #CSR>) {
49        ^bb(%x: f32):
50          %1 = arith.mulf %x, %c : f32
51          linalg.yield %1 : f32
52    } -> tensor<8x8xf32, #CSR>
53    return %0 : tensor<8x8xf32, #CSR>
54  }
55
56  //
57  // Main driver that converts a dense tensor into a sparse tensor
58  // and then calls the sparse scaling kernel with the sparse tensor
59  // as input argument.
60  //
61  func @entry() {
62    %c0 = arith.constant 0 : index
63    %f0 = arith.constant 0.0 : f32
64
65    // Initialize a dense tensor.
66    %0 = arith.constant dense<[
67       [1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0],
68       [0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
69       [0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0],
70       [0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0],
71       [0.0, 1.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0],
72       [0.0, 1.0, 1.0, 0.0, 0.0, 6.0, 0.0, 0.0],
73       [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 7.0, 1.0],
74       [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 8.0]
75    ]> : tensor<8x8xf32>
76
77    // Convert dense tensor to sparse tensor and call sparse kernel.
78    %1 = sparse_tensor.convert %0 : tensor<8x8xf32> to tensor<8x8xf32, #CSR>
79    %2 = call @sparse_scale(%1)
80      : (tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR>
81
82    // Print the resulting compacted values for verification.
83    //
84    // CHECK: ( 2, 2, 2, 4, 6, 8, 2, 10, 2, 2, 12, 2, 14, 2, 2, 16 )
85    //
86    %m = sparse_tensor.values %2 : tensor<8x8xf32, #CSR> to memref<?xf32>
87    %v = vector.transfer_read %m[%c0], %f0: memref<?xf32>, vector<16xf32>
88    vector.print %v : vector<16xf32>
89
90    // Release the resources.
91    sparse_tensor.release %1 : tensor<8x8xf32, #CSR>
92
93    return
94  }
95}
96