1// RUN: mlir-opt %s \ 2// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ 3// RUN: --sparse-compiler | \ 4// RUN: mlir-cpu-runner \ 5// RUN: -e entry -entry-point-result=void \ 6// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 7// RUN: FileCheck %s 8// 9// Do the same run, but now with SIMDization as well. This should not change the outcome. 10// 11// RUN: mlir-opt %s \ 12// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ 13// RUN: --sparse-compiler="vectorization-strategy=2 vl=2" | \ 14// RUN: mlir-cpu-runner \ 15// RUN: -e entry -entry-point-result=void \ 16// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 17// RUN: FileCheck %s 18 19#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> 20 21// An example of a quantized sparse matmul. With the zero offset for the 22// sparse input, the sparse compiler generates very efficient code for the 23// x(i,j) += (ext(a(i,k)) - 2) * ext(b(k,j)) 24// operation. 25module { 26 27 func @quantized_matmul(%input1: tensor<5x3xi8>, 28 %input2: tensor<3x6xi8, #DCSR>, 29 %output: tensor<5x6xi32>) -> tensor<5x6xi32> { 30 %c0 = arith.constant 0 : i32 31 %c2 = arith.constant 2 : i32 32 %0 = linalg.quantized_matmul 33 ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) 34 outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32> 35 return %0: tensor<5x6xi32> 36 } 37 38 func @entry() { 39 %c0 = arith.constant 0 : index 40 %i0 = arith.constant 0 : i32 41 42 %input1 = arith.constant dense<[ 43 [ -128, 3, 127 ], 44 [ 0, 0, 0 ], 45 [ 11, 1, 0 ], 46 [ 0, 5, -1 ], 47 [ 13, 0, 3 ] 48 ]> : tensor<5x3xi8> 49 50 %input2 = arith.constant dense<[ 51 [ 127, 0, -128, 0, 0, 3 ], 52 [ 0, 0, 0, 0, 0, 0 ], 53 [ 0, 0, 0, 100, 10, 0 ] 54 ]> : tensor<3x6xi8> 55 56 %sparse_input2 = sparse_tensor.convert %input2 : tensor<3x6xi8> to tensor<3x6xi8, #DCSR> 57 58 // Call the kernel. 59 %output = arith.constant dense<0> : tensor<5x6xi32> 60 %0 = call @quantized_matmul(%input1, %sparse_input2, %output) 61 : (tensor<5x3xi8>, 62 tensor<3x6xi8, #DCSR>, 63 tensor<5x6xi32>) -> tensor<5x6xi32> 64 65 // 66 // Verify the output. 67 // 68 // CHECK: ( ( -16510, 0, 16640, 12500, 1250, -390 ), 69 // CHECK-SAME: ( -254, 0, 256, -200, -20, -6 ), 70 // CHECK-SAME: ( 1143, 0, -1152, -200, -20, 27 ), 71 // CHECK-SAME: ( -254, 0, 256, -300, -30, -6 ), 72 // CHECK-SAME: ( 1397, 0, -1408, 100, 10, 33 ) ) 73 // 74 %m = bufferization.to_memref %0 : memref<5x6xi32> 75 %v = vector.transfer_read %m[%c0, %c0], %i0 76 : memref<5x6xi32>, vector<5x6xi32> 77 vector.print %v : vector<5x6xi32> 78 79 // Release the resources. 80 sparse_tensor.release %sparse_input2 : tensor<3x6xi8, #DCSR> 81 memref.dealloc %m : memref<5x6xi32> 82 83 return 84 } 85} 86