1// RUN: mlir-opt %s \ 2// RUN: --sparsification --sparse-tensor-conversion \ 3// RUN: --convert-vector-to-scf --convert-scf-to-std \ 4// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ 5// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ 6// RUN: --convert-vector-to-llvm --convert-memref-to-llvm \ 7// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \ 8// RUN: mlir-cpu-runner \ 9// RUN: -e entry -entry-point-result=void \ 10// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 11// RUN: FileCheck %s 12// 13// Do the same run, but now with SIMDization as well. This should not change the outcome. 14// 15// RUN: mlir-opt %s \ 16// RUN: --sparsification="vectorization-strategy=2 vl=2" --sparse-tensor-conversion \ 17// RUN: --convert-vector-to-scf --convert-scf-to-std \ 18// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ 19// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ 20// RUN: --convert-vector-to-llvm --convert-memref-to-llvm \ 21// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \ 22// RUN: mlir-cpu-runner \ 23// RUN: -e entry -entry-point-result=void \ 24// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ 25// RUN: FileCheck %s 26 27#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> 28 29#trait_cast = { 30 indexing_maps = [ 31 affine_map<(i) -> (i)>, // A (in) 32 affine_map<(i) -> (i)> // X (out) 33 ], 34 iterator_types = ["parallel"], 35 doc = "X(i) = cast A(i)" 36} 37 38// 39// Integration test that lowers a kernel annotated as sparse to actual sparse 40// code, initializes a matching sparse storage scheme from a dense vector, 41// and runs the resulting code with the JIT compiler. 42// 43module { 44 // 45 // Various kernels that cast a sparse vector from one type to another. 46 // Arithmetic supports the following casts. 47 // sitofp 48 // uitofp 49 // fptosi 50 // fptoui 51 // extf 52 // truncf 53 // extsi 54 // extui 55 // trunci 56 // bitcast 57 // Since all casts are "zero preserving" unary operations, lattice computation 58 // and conversion to sparse code is straightforward. 59 // 60 func @sparse_cast_s32_to_f32(%arga: tensor<10xi32, #SV>) -> tensor<10xf32> { 61 %argx = arith.constant dense<0.0> : tensor<10xf32> 62 %0 = linalg.generic #trait_cast 63 ins(%arga: tensor<10xi32, #SV>) 64 outs(%argx: tensor<10xf32>) { 65 ^bb(%a: i32, %x : f32): 66 %cst = arith.sitofp %a : i32 to f32 67 linalg.yield %cst : f32 68 } -> tensor<10xf32> 69 return %0 : tensor<10xf32> 70 } 71 func @sparse_cast_u32_to_f32(%arga: tensor<10xi32, #SV>) -> tensor<10xf32> { 72 %argx = arith.constant dense<0.0> : tensor<10xf32> 73 %0 = linalg.generic #trait_cast 74 ins(%arga: tensor<10xi32, #SV>) 75 outs(%argx: tensor<10xf32>) { 76 ^bb(%a: i32, %x : f32): 77 %cst = arith.uitofp %a : i32 to f32 78 linalg.yield %cst : f32 79 } -> tensor<10xf32> 80 return %0 : tensor<10xf32> 81 } 82 func @sparse_cast_f32_to_s32(%arga: tensor<10xf32, #SV>) -> tensor<10xi32> { 83 %argx = arith.constant dense<0> : tensor<10xi32> 84 %0 = linalg.generic #trait_cast 85 ins(%arga: tensor<10xf32, #SV>) 86 outs(%argx: tensor<10xi32>) { 87 ^bb(%a: f32, %x : i32): 88 %cst = arith.fptosi %a : f32 to i32 89 linalg.yield %cst : i32 90 } -> tensor<10xi32> 91 return %0 : tensor<10xi32> 92 } 93 func @sparse_cast_f64_to_u32(%arga: tensor<10xf64, #SV>) -> tensor<10xi32> { 94 %argx = arith.constant dense<0> : tensor<10xi32> 95 %0 = linalg.generic #trait_cast 96 ins(%arga: tensor<10xf64, #SV>) 97 outs(%argx: tensor<10xi32>) { 98 ^bb(%a: f64, %x : i32): 99 %cst = arith.fptoui %a : f64 to i32 100 linalg.yield %cst : i32 101 } -> tensor<10xi32> 102 return %0 : tensor<10xi32> 103 } 104 func @sparse_cast_f32_to_f64(%arga: tensor<10xf32, #SV>) -> tensor<10xf64> { 105 %argx = arith.constant dense<0.0> : tensor<10xf64> 106 %0 = linalg.generic #trait_cast 107 ins(%arga: tensor<10xf32, #SV>) 108 outs(%argx: tensor<10xf64>) { 109 ^bb(%a: f32, %x : f64): 110 %cst = arith.extf %a : f32 to f64 111 linalg.yield %cst : f64 112 } -> tensor<10xf64> 113 return %0 : tensor<10xf64> 114 } 115 func @sparse_cast_f64_to_f32(%arga: tensor<10xf64, #SV>) -> tensor<10xf32> { 116 %argx = arith.constant dense<0.0> : tensor<10xf32> 117 %0 = linalg.generic #trait_cast 118 ins(%arga: tensor<10xf64, #SV>) 119 outs(%argx: tensor<10xf32>) { 120 ^bb(%a: f64, %x : f32): 121 %cst = arith.truncf %a : f64 to f32 122 linalg.yield %cst : f32 123 } -> tensor<10xf32> 124 return %0 : tensor<10xf32> 125 } 126 func @sparse_cast_s32_to_u64(%arga: tensor<10xi32, #SV>) -> tensor<10xi64> { 127 %argx = arith.constant dense<0> : tensor<10xi64> 128 %0 = linalg.generic #trait_cast 129 ins(%arga: tensor<10xi32, #SV>) 130 outs(%argx: tensor<10xi64>) { 131 ^bb(%a: i32, %x : i64): 132 %cst = arith.extsi %a : i32 to i64 133 linalg.yield %cst : i64 134 } -> tensor<10xi64> 135 return %0 : tensor<10xi64> 136 } 137 func @sparse_cast_u32_to_s64(%arga: tensor<10xi32, #SV>) -> tensor<10xi64> { 138 %argx = arith.constant dense<0> : tensor<10xi64> 139 %0 = linalg.generic #trait_cast 140 ins(%arga: tensor<10xi32, #SV>) 141 outs(%argx: tensor<10xi64>) { 142 ^bb(%a: i32, %x : i64): 143 %cst = arith.extui %a : i32 to i64 144 linalg.yield %cst : i64 145 } -> tensor<10xi64> 146 return %0 : tensor<10xi64> 147 } 148 func @sparse_cast_i32_to_i8(%arga: tensor<10xi32, #SV>) -> tensor<10xi8> { 149 %argx = arith.constant dense<0> : tensor<10xi8> 150 %0 = linalg.generic #trait_cast 151 ins(%arga: tensor<10xi32, #SV>) 152 outs(%argx: tensor<10xi8>) { 153 ^bb(%a: i32, %x : i8): 154 %cst = arith.trunci %a : i32 to i8 155 linalg.yield %cst : i8 156 } -> tensor<10xi8> 157 return %0 : tensor<10xi8> 158 } 159 func @sparse_cast_f32_as_s32(%arga: tensor<10xf32, #SV>) -> tensor<10xi32> { 160 %argx = arith.constant dense<0> : tensor<10xi32> 161 %0 = linalg.generic #trait_cast 162 ins(%arga: tensor<10xf32, #SV>) 163 outs(%argx: tensor<10xi32>) { 164 ^bb(%a: f32, %x : i32): 165 %cst = arith.bitcast %a : f32 to i32 166 linalg.yield %cst : i32 167 } -> tensor<10xi32> 168 return %0 : tensor<10xi32> 169 } 170 171 // 172 // Main driver that converts a dense tensor into a sparse tensor 173 // and then calls the sparse casting kernel. 174 // 175 func @entry() { 176 %z = arith.constant 0 : index 177 %b = arith.constant 0 : i8 178 %i = arith.constant 0 : i32 179 %l = arith.constant 0 : i64 180 %f = arith.constant 0.0 : f32 181 %d = arith.constant 0.0 : f64 182 183 // Initialize dense tensors, convert to a sparse vectors. 184 %0 = arith.constant dense<[ -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 ]> : tensor<10xi32> 185 %1 = sparse_tensor.convert %0 : tensor<10xi32> to tensor<10xi32, #SV> 186 %2 = arith.constant dense<[ -4.4, -3.3, -2.2, -1.1, 0.0, 1.1, 2.2, 3.3, 4.4, 305.5 ]> : tensor<10xf32> 187 %3 = sparse_tensor.convert %2 : tensor<10xf32> to tensor<10xf32, #SV> 188 %4 = arith.constant dense<[ -4.4, -3.3, -2.2, -1.1, 0.0, 1.1, 2.2, 3.3, 4.4, 305.5 ]> : tensor<10xf64> 189 %5 = sparse_tensor.convert %4 : tensor<10xf64> to tensor<10xf64, #SV> 190 %6 = arith.constant dense<[ 4294967295.0, 4294967294.0, 4294967293.0, 4294967292.0, 191 0.0, 1.1, 2.2, 3.3, 4.4, 305.5 ]> : tensor<10xf64> 192 %7 = sparse_tensor.convert %6 : tensor<10xf64> to tensor<10xf64, #SV> 193 194 // 195 // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 ) 196 // 197 %c0 = call @sparse_cast_s32_to_f32(%1) : (tensor<10xi32, #SV>) -> tensor<10xf32> 198 %m0 = bufferization.to_memref %c0 : memref<10xf32> 199 %v0 = vector.transfer_read %m0[%z], %f: memref<10xf32>, vector<10xf32> 200 vector.print %v0 : vector<10xf32> 201 202 // 203 // CHECK: ( 4.29497e+09, 4.29497e+09, 4.29497e+09, 4.29497e+09, 0, 1, 2, 3, 4, 305 ) 204 // 205 %c1 = call @sparse_cast_u32_to_f32(%1) : (tensor<10xi32, #SV>) -> tensor<10xf32> 206 %m1 = bufferization.to_memref %c1 : memref<10xf32> 207 %v1 = vector.transfer_read %m1[%z], %f: memref<10xf32>, vector<10xf32> 208 vector.print %v1 : vector<10xf32> 209 210 // 211 // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 ) 212 // 213 %c2 = call @sparse_cast_f32_to_s32(%3) : (tensor<10xf32, #SV>) -> tensor<10xi32> 214 %m2 = bufferization.to_memref %c2 : memref<10xi32> 215 %v2 = vector.transfer_read %m2[%z], %i: memref<10xi32>, vector<10xi32> 216 vector.print %v2 : vector<10xi32> 217 218 // 219 // CHECK: ( 4294967295, 4294967294, 4294967293, 4294967292, 0, 1, 2, 3, 4, 305 ) 220 // 221 %c3 = call @sparse_cast_f64_to_u32(%7) : (tensor<10xf64, #SV>) -> tensor<10xi32> 222 %m3 = bufferization.to_memref %c3 : memref<10xi32> 223 %v3 = vector.transfer_read %m3[%z], %i: memref<10xi32>, vector<10xi32> 224 %vu = vector.bitcast %v3 : vector<10xi32> to vector<10xui32> 225 vector.print %vu : vector<10xui32> 226 227 // 228 // CHECK: ( -4.4, -3.3, -2.2, -1.1, 0, 1.1, 2.2, 3.3, 4.4, 305.5 ) 229 // 230 %c4 = call @sparse_cast_f32_to_f64(%3) : (tensor<10xf32, #SV>) -> tensor<10xf64> 231 %m4 = bufferization.to_memref %c4 : memref<10xf64> 232 %v4 = vector.transfer_read %m4[%z], %d: memref<10xf64>, vector<10xf64> 233 vector.print %v4 : vector<10xf64> 234 235 // 236 // CHECK: ( -4.4, -3.3, -2.2, -1.1, 0, 1.1, 2.2, 3.3, 4.4, 305.5 ) 237 // 238 %c5 = call @sparse_cast_f64_to_f32(%5) : (tensor<10xf64, #SV>) -> tensor<10xf32> 239 %m5 = bufferization.to_memref %c5 : memref<10xf32> 240 %v5 = vector.transfer_read %m5[%z], %f: memref<10xf32>, vector<10xf32> 241 vector.print %v5 : vector<10xf32> 242 243 // 244 // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 305 ) 245 // 246 %c6 = call @sparse_cast_s32_to_u64(%1) : (tensor<10xi32, #SV>) -> tensor<10xi64> 247 %m6 = bufferization.to_memref %c6 : memref<10xi64> 248 %v6 = vector.transfer_read %m6[%z], %l: memref<10xi64>, vector<10xi64> 249 vector.print %v6 : vector<10xi64> 250 251 // 252 // CHECK: ( 4294967292, 4294967293, 4294967294, 4294967295, 0, 1, 2, 3, 4, 305 ) 253 // 254 %c7 = call @sparse_cast_u32_to_s64(%1) : (tensor<10xi32, #SV>) -> tensor<10xi64> 255 %m7 = bufferization.to_memref %c7 : memref<10xi64> 256 %v7 = vector.transfer_read %m7[%z], %l: memref<10xi64>, vector<10xi64> 257 vector.print %v7 : vector<10xi64> 258 259 // 260 // CHECK: ( -4, -3, -2, -1, 0, 1, 2, 3, 4, 49 ) 261 // 262 %c8 = call @sparse_cast_i32_to_i8(%1) : (tensor<10xi32, #SV>) -> tensor<10xi8> 263 %m8 = bufferization.to_memref %c8 : memref<10xi8> 264 %v8 = vector.transfer_read %m8[%z], %b: memref<10xi8>, vector<10xi8> 265 vector.print %v8 : vector<10xi8> 266 267 // 268 // CHECK: ( -1064514355, -1068289229, -1072902963, -1081291571, 0, 1066192077, 1074580685, 1079194419, 1082969293, 1134084096 ) 269 // 270 %c9 = call @sparse_cast_f32_as_s32(%3) : (tensor<10xf32, #SV>) -> tensor<10xi32> 271 %m9 = bufferization.to_memref %c9 : memref<10xi32> 272 %v9 = vector.transfer_read %m9[%z], %i: memref<10xi32>, vector<10xi32> 273 vector.print %v9 : vector<10xi32> 274 275 // Release the resources. 276 sparse_tensor.release %1 : tensor<10xi32, #SV> 277 sparse_tensor.release %3 : tensor<10xf32, #SV> 278 sparse_tensor.release %5 : tensor<10xf64, #SV> 279 sparse_tensor.release %7 : tensor<10xf64, #SV> 280 memref.dealloc %m0 : memref<10xf32> 281 memref.dealloc %m1 : memref<10xf32> 282 memref.dealloc %m2 : memref<10xi32> 283 memref.dealloc %m3 : memref<10xi32> 284 memref.dealloc %m4 : memref<10xf64> 285 memref.dealloc %m5 : memref<10xf32> 286 memref.dealloc %m6 : memref<10xi64> 287 memref.dealloc %m7 : memref<10xi64> 288 memref.dealloc %m8 : memref<10xi8> 289 memref.dealloc %m9 : memref<10xi32> 290 291 return 292 } 293} 294