1// RUN: mlir-opt %s --sparse-compiler | \ 2// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ 3// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 4// RUN: FileCheck %s 5// 6// Do the same run, but now with SIMDization as well. This should not change the outcome. 7// 8// RUN: mlir-opt %s --sparse-compiler="vectorization-strategy=2 vl=2" | \ 9// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ 10// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 11// RUN: FileCheck %s 12 13#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> 14 15// An example of a quantized sparse matmul. With the zero offset for the 16// sparse input, the sparse compiler generates very efficient code for the 17// x(i,j) += (ext(a(i,k)) - 2) * ext(b(k,j)) 18// operation. 19module { 20 21 func.func @quantized_matmul(%input1: tensor<5x3xi8>, 22 %input2: tensor<3x6xi8, #DCSR>, 23 %output: tensor<5x6xi32>) -> tensor<5x6xi32> { 24 %c0 = arith.constant 0 : i32 25 %c2 = arith.constant 2 : i32 26 %0 = linalg.quantized_matmul 27 ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) 28 outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32> 29 return %0: tensor<5x6xi32> 30 } 31 32 func.func @entry() { 33 %c0 = arith.constant 0 : index 34 %i0 = arith.constant 0 : i32 35 36 %input1 = arith.constant dense<[ 37 [ -128, 3, 127 ], 38 [ 0, 0, 0 ], 39 [ 11, 1, 0 ], 40 [ 0, 5, -1 ], 41 [ 13, 0, 3 ] 42 ]> : tensor<5x3xi8> 43 44 %input2 = arith.constant dense<[ 45 [ 127, 0, -128, 0, 0, 3 ], 46 [ 0, 0, 0, 0, 0, 0 ], 47 [ 0, 0, 0, 100, 10, 0 ] 48 ]> : tensor<3x6xi8> 49 50 %sparse_input2 = sparse_tensor.convert %input2 : tensor<3x6xi8> to tensor<3x6xi8, #DCSR> 51 52 // Call the kernel. 53 %output = arith.constant dense<0> : tensor<5x6xi32> 54 %0 = call @quantized_matmul(%input1, %sparse_input2, %output) 55 : (tensor<5x3xi8>, 56 tensor<3x6xi8, #DCSR>, 57 tensor<5x6xi32>) -> tensor<5x6xi32> 58 59 // 60 // Verify the output. 61 // 62 // CHECK: ( ( -16510, 0, 16640, 12500, 1250, -390 ), 63 // CHECK-SAME: ( -254, 0, 256, -200, -20, -6 ), 64 // CHECK-SAME: ( 1143, 0, -1152, -200, -20, 27 ), 65 // CHECK-SAME: ( -254, 0, 256, -300, -30, -6 ), 66 // CHECK-SAME: ( 1397, 0, -1408, 100, 10, 33 ) ) 67 // 68 %v = vector.transfer_read %0[%c0, %c0], %i0 69 : tensor<5x6xi32>, vector<5x6xi32> 70 vector.print %v : vector<5x6xi32> 71 72 // Release the resources. 73 bufferization.dealloc_tensor %sparse_input2 : tensor<3x6xi8, #DCSR> 74 75 return 76 } 77} 78