1// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
2// RUN: mlir-opt %s -sparsification | FileCheck %s
3
4#DV = #sparse_tensor.encoding<{ dimLevelType = [ "dense"      ] }>
5#SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
6
7#trait1 = {
8  indexing_maps = [
9    affine_map<(i) -> (i)>,  // a
10    affine_map<(i) -> (i)>   // x (out)
11  ],
12  iterator_types = ["parallel"],
13  doc = "x(i) = a(i) OP b"
14}
15
16// CHECK-LABEL:   func @add_d(
17// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
18// CHECK-SAME:      %[[VAL_1:.*]]: f32,
19// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
20// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
21// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
22// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
23// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
24// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
25// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
26// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
27// CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xf32>
28// CHECK:             %[[VAL_11:.*]] = arith.addf %[[VAL_10]], %[[VAL_1]] : f32
29// CHECK:             memref.store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
30// CHECK:           }
31// CHECK:           %[[VAL_12:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf32>
32// CHECK:           return %[[VAL_12]] : tensor<32xf32>
33// CHECK:         }
34func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
35  %0 = linalg.generic #trait1
36     ins(%arga: tensor<32xf32, #DV>)
37    outs(%argx: tensor<32xf32>) {
38      ^bb(%a: f32, %x: f32):
39        %0 = arith.addf %a, %argb : f32
40        linalg.yield %0 : f32
41  } -> tensor<32xf32>
42  return %0 : tensor<32xf32>
43}
44
45// CHECK-LABEL:   func @add_d_init(
46// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
47// CHECK-SAME:      %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
48// CHECK:           %[[VAL_2:.*]] = arith.constant 32 : index
49// CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
50// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
51// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
52// CHECK:           %[[VAL_INITTENSOR:.*]] = linalg.init_tensor [32] : tensor<32xf32>
53// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
54// CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
55// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
56// CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
57// CHECK:             %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref<?xf32>
58// CHECK:             %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32
59// CHECK:             memref.store %[[VAL_10]], %[[VAL_7]]{{\[}}%[[VAL_8]]] : memref<32xf32>
60// CHECK:           }
61// CHECK:           %[[VAL_11:.*]] = bufferization.to_tensor %[[VAL_7]] : memref<32xf32>
62// CHECK:           return %[[VAL_11]] : tensor<32xf32>
63// CHECK:         }
64func.func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32> {
65  %u = linalg.init_tensor [32] : tensor<32xf32>
66  %0 = linalg.generic #trait1
67     ins(%arga: tensor<32xf32, #DV>)
68    outs(%u: tensor<32xf32>) {
69      ^bb(%a: f32, %x: f32):
70        %0 = arith.addf %a, %argb : f32
71        linalg.yield %0 : f32
72  } -> tensor<32xf32>
73  return %0 : tensor<32xf32>
74}
75
76// CHECK-LABEL:   func @mul_d(
77// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
78// CHECK-SAME:      %[[VAL_1:.*]]: f32,
79// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
80// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
81// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
82// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
83// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
84// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
85// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
86// CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
87// CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xf32>
88// CHECK:             %[[VAL_11:.*]] = arith.mulf %[[VAL_10]], %[[VAL_1]] : f32
89// CHECK:             memref.store %[[VAL_11]], %[[VAL_8]]{{\[}}%[[VAL_9]]] : memref<32xf32>
90// CHECK:           }
91// CHECK:           %[[VAL_12:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf32>
92// CHECK:           return %[[VAL_12]] : tensor<32xf32>
93// CHECK:         }
94func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
95  %0 = linalg.generic #trait1
96     ins(%arga: tensor<32xf32, #DV>)
97    outs(%argx: tensor<32xf32>) {
98      ^bb(%a: f32, %x: f32):
99        %0 = arith.mulf %a, %argb : f32
100        linalg.yield %0 : f32
101  } -> tensor<32xf32>
102  return %0 : tensor<32xf32>
103}
104
105// CHECK-LABEL:   func @add_s(
106// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
107// CHECK-SAME:      %[[VAL_1:.*]]: f32,
108// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
109// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
110// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
111// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
112// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
113// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
114// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
115// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
116// CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]]
117// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>)
118// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
119// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
120// CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
121// CHECK:             %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
122// CHECK:             scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
123// CHECK:           } do {
124// CHECK:           ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
125// CHECK:             %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
126// CHECK:             %[[VAL_21:.*]] = arith.cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
127// CHECK:             scf.if %[[VAL_21]] {
128// CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xf32>
129// CHECK:               %[[VAL_23:.*]] = arith.addf %[[VAL_22]], %[[VAL_1]] : f32
130// CHECK:               memref.store %[[VAL_23]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
131// CHECK:             } else {
132// CHECK:               scf.if %[[VAL_5]] {
133// CHECK:                 memref.store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
134// CHECK:               } else {
135// CHECK:               }
136// CHECK:             }
137// CHECK:             %[[VAL_24:.*]] = arith.cmpi eq, %[[VAL_20]], %[[VAL_19]] : index
138// CHECK:             %[[VAL_25:.*]] = arith.addi %[[VAL_18]], %[[VAL_6]] : index
139// CHECK:             %[[VAL_26:.*]] = arith.select %[[VAL_24]], %[[VAL_25]], %[[VAL_18]] : index
140// CHECK:             %[[VAL_27:.*]] = arith.addi %[[VAL_19]], %[[VAL_6]] : index
141// CHECK:             scf.yield %[[VAL_26]], %[[VAL_27]] : index, index
142// CHECK:           }
143// CHECK:           scf.for %[[VAL_28:.*]] = %[[VAL_29:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
144// CHECK:             memref.store %[[VAL_1]], %[[VAL_11]]{{\[}}%[[VAL_28]]] : memref<32xf32>
145// CHECK:           }
146// CHECK:           %[[VAL_30:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<32xf32>
147// CHECK:           return %[[VAL_30]] : tensor<32xf32>
148// CHECK:         }
149func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
150  %0 = linalg.generic #trait1
151     ins(%arga: tensor<32xf32, #SV>)
152    outs(%argx: tensor<32xf32>) {
153      ^bb(%a: f32, %x: f32):
154        %0 = arith.addf %a, %argb : f32
155        linalg.yield %0 : f32
156  } -> tensor<32xf32>
157  return %0 : tensor<32xf32>
158}
159
160// CHECK-LABEL:   func @repeated_add_s(
161// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
162// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
163// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
164// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
165// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
166// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_2]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
167// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
168// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]]
169// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
170// CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
171// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
172// CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
173// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
174// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
175// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
176// CHECK:             %[[VAL_15:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32
177// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
178// CHECK:             %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
179// CHECK:             %[[VAL_18:.*]] = arith.addf %[[VAL_16]], %[[VAL_17]] : f32
180// CHECK:             %[[VAL_19:.*]] = arith.addf %[[VAL_15]], %[[VAL_18]] : f32
181// CHECK:             memref.store %[[VAL_19]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf32>
182// CHECK:           }
183// CHECK:           %[[VAL_20:.*]] = bufferization.to_tensor %[[VAL_8]] : memref<32xf32>
184// CHECK:           return %[[VAL_20]] : tensor<32xf32>
185// CHECK:         }
186func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
187  %0 = linalg.generic #trait1
188     ins(%arga: tensor<32xf32, #SV>)
189    outs(%argx: tensor<32xf32>) {
190      ^bb(%a: f32, %x: f32):
191        %0 = arith.addf %a, %a : f32  // same tensor
192        %1 = arith.addf %a, %a : f32  // should yield
193        %2 = arith.addf %0, %1 : f32  // one guard
194        linalg.yield %2 : f32
195  } -> tensor<32xf32>
196  return %0 : tensor<32xf32>
197}
198
199// CHECK-LABEL:   func @mul_s(
200// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
201// CHECK-SAME:      %[[VAL_1:.*]]: f32,
202// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
203// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
204// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
205// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
206// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
207// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
208// CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
209// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
210// CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
211// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
212// CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
213// CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
214// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
215// CHECK:             %[[VAL_15:.*]] = arith.mulf %[[VAL_14]], %[[VAL_1]] : f32
216// CHECK:             memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf32>
217// CHECK:           }
218// CHECK:           %[[VAL_16:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf32>
219// CHECK:           return %[[VAL_16]] : tensor<32xf32>
220// CHECK:         }
221func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
222  %0 = linalg.generic #trait1
223     ins(%arga: tensor<32xf32, #SV>)
224    outs(%argx: tensor<32xf32>) {
225      ^bb(%a: f32, %x: f32):
226        %0 = arith.mulf %a, %argb : f32
227        linalg.yield %0 : f32
228  } -> tensor<32xf32>
229  return %0 : tensor<32xf32>
230}
231
232#trait2 = {
233  indexing_maps = [
234    affine_map<(i) -> (i)>,  // a
235    affine_map<(i) -> (i)>,  // b
236    affine_map<(i) -> (i)>   // x (out)
237  ],
238  iterator_types = ["parallel"],
239  doc = "x(i) = a(i) OP b(i)"
240}
241
242// CHECK-LABEL:   func @add_dd(
243// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
244// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
245// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
246// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
247// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
248// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
249// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
250// CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
251// CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
252// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
253// CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
254// CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
255// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
256// CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_11]], %[[VAL_12]] : f32
257// CHECK:             memref.store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
258// CHECK:           }
259// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf32>
260// CHECK:           return %[[VAL_14]] : tensor<32xf32>
261// CHECK:         }
262func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
263  %0 = linalg.generic #trait2
264     ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>)
265    outs(%argx: tensor<32xf32>) {
266      ^bb(%a: f32, %b: f32, %x: f32):
267        %0 = arith.addf %a, %b : f32
268        linalg.yield %0 : f32
269  } -> tensor<32xf32>
270  return %0 : tensor<32xf32>
271}
272
273// CHECK-LABEL:   func @mul_dd(
274// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
275// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
276// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
277// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
278// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
279// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
280// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
281// CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
282// CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
283// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
284// CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
285// CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
286// CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
287// CHECK:             %[[VAL_13:.*]] = arith.mulf %[[VAL_11]], %[[VAL_12]] : f32
288// CHECK:             memref.store %[[VAL_13]], %[[VAL_9]]{{\[}}%[[VAL_10]]] : memref<32xf32>
289// CHECK:           }
290// CHECK:           %[[VAL_14:.*]] = bufferization.to_tensor %[[VAL_9]] : memref<32xf32>
291// CHECK:           return %[[VAL_14]] : tensor<32xf32>
292// CHECK:         }
293func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
294  %0 = linalg.generic #trait2
295     ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>)
296    outs(%argx: tensor<32xf32>) {
297      ^bb(%a: f32, %b: f32, %x: f32):
298        %0 = arith.mulf %a, %b : f32
299        linalg.yield %0 : f32
300  } -> tensor<32xf32>
301  return %0 : tensor<32xf32>
302}
303
304// CHECK-LABEL:   func @add_ds(
305// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32>,
306// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
307// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
308// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
309// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
310// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
311// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
312// CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
313// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
314// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
315// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
316// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
317// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
318// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
319// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
320// CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
321// CHECK:             %[[VAL_18:.*]] = arith.cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
322// CHECK:             scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
323// CHECK:           } do {
324// CHECK:           ^bb0(%[[VAL_19:.*]]: index, %[[VAL_20:.*]]: index):
325// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
326// CHECK:             %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
327// CHECK:             scf.if %[[VAL_22]] {
328// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<32xf32>
329// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<?xf32>
330// CHECK:               %[[VAL_25:.*]] = arith.addf %[[VAL_23]], %[[VAL_24]] : f32
331// CHECK:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
332// CHECK:             } else {
333// CHECK:               scf.if %[[VAL_5]] {
334// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref<32xf32>
335// CHECK:                 memref.store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
336// CHECK:               } else {
337// CHECK:               }
338// CHECK:             }
339// CHECK:             %[[VAL_27:.*]] = arith.cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
340// CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_19]], %[[VAL_6]] : index
341// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_19]] : index
342// CHECK:             %[[VAL_30:.*]] = arith.addi %[[VAL_20]], %[[VAL_6]] : index
343// CHECK:             scf.yield %[[VAL_29]], %[[VAL_30]] : index, index
344// CHECK:           }
345// CHECK:           scf.for %[[VAL_31:.*]] = %[[VAL_32:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
346// CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_31]]] : memref<32xf32>
347// CHECK:             memref.store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
348// CHECK:           }
349// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
350// CHECK:           return %[[VAL_34]] : tensor<32xf32>
351// CHECK:         }
352func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
353  %0 = linalg.generic #trait2
354     ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>)
355    outs(%argx: tensor<32xf32>) {
356      ^bb(%a: f32, %b: f32, %x: f32):
357        %0 = arith.addf %a, %b : f32
358        linalg.yield %0 : f32
359  } -> tensor<32xf32>
360  return %0 : tensor<32xf32>
361}
362
363// CHECK-LABEL:   func @mul_ds(
364// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32>,
365// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
366// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
367// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
368// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
369// CHECK-DAG:       %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
370// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
371// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
372// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
373// CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
374// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
375// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
376// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
377// CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
378// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
379// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref<32xf32>
380// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref<?xf32>
381// CHECK:             %[[VAL_17:.*]] = arith.mulf %[[VAL_15]], %[[VAL_16]] : f32
382// CHECK:             memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
383// CHECK:           }
384// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf32>
385// CHECK:           return %[[VAL_18]] : tensor<32xf32>
386// CHECK:         }
387func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
388  %0 = linalg.generic #trait2
389     ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>)
390    outs(%argx: tensor<32xf32>) {
391      ^bb(%a: f32, %b: f32, %x: f32):
392        %0 = arith.mulf %a, %b : f32
393        linalg.yield %0 : f32
394  } -> tensor<32xf32>
395  return %0 : tensor<32xf32>
396}
397
398// CHECK-LABEL:   func @add_sd(
399// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
400// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
401// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
402// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
403// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
404// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
405// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
406// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
407// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
408// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
409// CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
410// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
411// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
412// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
413// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
414// CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
415// CHECK:             %[[VAL_18:.*]] = arith.cmpi ult, %[[VAL_16]], %[[VAL_14]] : index
416// CHECK:             scf.condition(%[[VAL_18]]) %[[VAL_16]], %[[VAL_17]] : index, index
417// CHECK:           } do {
418// CHECK:           ^bb0(%[[VAL_19:.*]]: index, %[[VAL_20:.*]]: index):
419// CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
420// CHECK:             %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
421// CHECK:             scf.if %[[VAL_22]] {
422// CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xf32>
423// CHECK:               %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<32xf32>
424// CHECK:               %[[VAL_25:.*]] = arith.addf %[[VAL_23]], %[[VAL_24]] : f32
425// CHECK:               memref.store %[[VAL_25]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
426// CHECK:             } else {
427// CHECK:               scf.if %[[VAL_5]] {
428// CHECK:                 %[[VAL_26:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<32xf32>
429// CHECK:                 memref.store %[[VAL_26]], %[[VAL_12]]{{\[}}%[[VAL_20]]] : memref<32xf32>
430// CHECK:               } else {
431// CHECK:               }
432// CHECK:             }
433// CHECK:             %[[VAL_27:.*]] = arith.cmpi eq, %[[VAL_21]], %[[VAL_20]] : index
434// CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_19]], %[[VAL_6]] : index
435// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_19]] : index
436// CHECK:             %[[VAL_30:.*]] = arith.addi %[[VAL_20]], %[[VAL_6]] : index
437// CHECK:             scf.yield %[[VAL_29]], %[[VAL_30]] : index, index
438// CHECK:           }
439// CHECK:           scf.for %[[VAL_31:.*]] = %[[VAL_32:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
440// CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_31]]] : memref<32xf32>
441// CHECK:             memref.store %[[VAL_33]], %[[VAL_12]]{{\[}}%[[VAL_31]]] : memref<32xf32>
442// CHECK:           }
443// CHECK:           %[[VAL_34:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
444// CHECK:           return %[[VAL_34]] : tensor<32xf32>
445// CHECK:         }
446func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
447  %0 = linalg.generic #trait2
448     ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>)
449    outs(%argx: tensor<32xf32>) {
450      ^bb(%a: f32, %b: f32, %x: f32):
451        %0 = arith.addf %a, %b : f32
452        linalg.yield %0 : f32
453  } -> tensor<32xf32>
454  return %0 : tensor<32xf32>
455}
456
457// CHECK-LABEL:   func @mul_sd(
458// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
459// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
460// CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
461// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
462// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
463// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
464// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
465// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
466// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
467// CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
468// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
469// CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
470// CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
471// CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
472// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
473// CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xf32>
474// CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<32xf32>
475// CHECK:             %[[VAL_17:.*]] = arith.mulf %[[VAL_15]], %[[VAL_16]] : f32
476// CHECK:             memref.store %[[VAL_17]], %[[VAL_10]]{{\[}}%[[VAL_14]]] : memref<32xf32>
477// CHECK:           }
478// CHECK:           %[[VAL_18:.*]] = bufferization.to_tensor %[[VAL_10]] : memref<32xf32>
479// CHECK:           return %[[VAL_18]] : tensor<32xf32>
480// CHECK:         }
481func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
482  %0 = linalg.generic #trait2
483     ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>)
484    outs(%argx: tensor<32xf32>) {
485      ^bb(%a: f32, %b: f32, %x: f32):
486        %0 = arith.mulf %a, %b : f32
487        linalg.yield %0 : f32
488  } -> tensor<32xf32>
489  return %0 : tensor<32xf32>
490}
491
492// CHECK-LABEL:   func @add_ss(
493// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
494// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
495// CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
496// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
497// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
498// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
499// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
500// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
501// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
502// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
503// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
504// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
505// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
506// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
507// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
508// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
509// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
510// CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
511// CHECK:             %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
512// CHECK:             %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
513// CHECK:             %[[VAL_22:.*]] = arith.andi %[[VAL_20]], %[[VAL_21]] : i1
514// CHECK:             scf.condition(%[[VAL_22]]) %[[VAL_18]], %[[VAL_19]] : index, index
515// CHECK:           } do {
516// CHECK:           ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
517// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
518// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
519// CHECK:             %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_25]] : index
520// CHECK:             %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_26]], %[[VAL_25]] : index
521// CHECK:             %[[VAL_29:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
522// CHECK:             %[[VAL_30:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
523// CHECK:             %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
524// CHECK:             scf.if %[[VAL_31]] {
525// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
526// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
527// CHECK:               %[[VAL_34:.*]] = arith.addf %[[VAL_32]], %[[VAL_33]] : f32
528// CHECK:               memref.store %[[VAL_34]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
529// CHECK:             } else {
530// CHECK:               %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
531// CHECK:               scf.if %[[VAL_35]] {
532// CHECK:                 %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
533// CHECK:                 memref.store %[[VAL_36]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
534// CHECK:               } else {
535// CHECK:                 %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
536// CHECK:                 scf.if %[[VAL_37]] {
537// CHECK:                   %[[VAL_38:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
538// CHECK:                   memref.store %[[VAL_38]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
539// CHECK:                 } else {
540// CHECK:                 }
541// CHECK:               }
542// CHECK:             }
543// CHECK:             %[[VAL_39:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
544// CHECK:             %[[VAL_40:.*]] = arith.addi %[[VAL_23]], %[[VAL_4]] : index
545// CHECK:             %[[VAL_41:.*]] = arith.select %[[VAL_39]], %[[VAL_40]], %[[VAL_23]] : index
546// CHECK:             %[[VAL_42:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
547// CHECK:             %[[VAL_43:.*]] = arith.addi %[[VAL_24]], %[[VAL_4]] : index
548// CHECK:             %[[VAL_44:.*]] = arith.select %[[VAL_42]], %[[VAL_43]], %[[VAL_24]] : index
549// CHECK:             scf.yield %[[VAL_41]], %[[VAL_44]] : index, index
550// CHECK:           }
551// CHECK:           scf.for %[[VAL_45:.*]] = %[[VAL_46:.*]]#0 to %[[VAL_14]] step %[[VAL_4]] {
552// CHECK:             %[[VAL_47:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_45]]] : memref<?xindex>
553// CHECK:             %[[VAL_48:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_45]]] : memref<?xf32>
554// CHECK:             memref.store %[[VAL_48]], %[[VAL_12]]{{\[}}%[[VAL_47]]] : memref<32xf32>
555// CHECK:           }
556// CHECK:           scf.for %[[VAL_49:.*]] = %[[VAL_50:.*]]#1 to %[[VAL_16]] step %[[VAL_4]] {
557// CHECK:             %[[VAL_51:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_49]]] : memref<?xindex>
558// CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_49]]] : memref<?xf32>
559// CHECK:             memref.store %[[VAL_52]], %[[VAL_12]]{{\[}}%[[VAL_51]]] : memref<32xf32>
560// CHECK:           }
561// CHECK:           %[[VAL_53:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
562// CHECK:           return %[[VAL_53]] : tensor<32xf32>
563// CHECK:         }
564func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
565  %0 = linalg.generic #trait2
566     ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>)
567    outs(%argx: tensor<32xf32>) {
568      ^bb(%a: f32, %b: f32, %x: f32):
569        %0 = arith.addf %a, %b : f32
570        linalg.yield %0 : f32
571  } -> tensor<32xf32>
572  return %0 : tensor<32xf32>
573}
574
575// CHECK-LABEL:   func @mul_ss(
576// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
577// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
578// CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
579// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
580// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
581// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
582// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
583// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
584// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
585// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
586// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
587// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
588// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
589// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
590// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
591// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
592// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
593// CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) {
594// CHECK:             %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_14]] : index
595// CHECK:             %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_16]] : index
596// CHECK:             %[[VAL_22:.*]] = arith.andi %[[VAL_20]], %[[VAL_21]] : i1
597// CHECK:             scf.condition(%[[VAL_22]]) %[[VAL_18]], %[[VAL_19]] : index, index
598// CHECK:           } do {
599// CHECK:           ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index):
600// CHECK:             %[[VAL_25:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref<?xindex>
601// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref<?xindex>
602// CHECK:             %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_25]] : index
603// CHECK:             %[[VAL_28:.*]] = arith.select %[[VAL_27]], %[[VAL_26]], %[[VAL_25]] : index
604// CHECK:             %[[VAL_29:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
605// CHECK:             %[[VAL_30:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
606// CHECK:             %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
607// CHECK:             scf.if %[[VAL_31]] {
608// CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref<?xf32>
609// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref<?xf32>
610// CHECK:               %[[VAL_34:.*]] = arith.mulf %[[VAL_32]], %[[VAL_33]] : f32
611// CHECK:               memref.store %[[VAL_34]], %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<32xf32>
612// CHECK:             } else {
613// CHECK:             }
614// CHECK:             %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index
615// CHECK:             %[[VAL_36:.*]] = arith.addi %[[VAL_23]], %[[VAL_4]] : index
616// CHECK:             %[[VAL_37:.*]] = arith.select %[[VAL_35]], %[[VAL_36]], %[[VAL_23]] : index
617// CHECK:             %[[VAL_38:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index
618// CHECK:             %[[VAL_39:.*]] = arith.addi %[[VAL_24]], %[[VAL_4]] : index
619// CHECK:             %[[VAL_40:.*]] = arith.select %[[VAL_38]], %[[VAL_39]], %[[VAL_24]] : index
620// CHECK:             scf.yield %[[VAL_37]], %[[VAL_40]] : index, index
621// CHECK:           }
622// CHECK:           %[[VAL_41:.*]] = bufferization.to_tensor %[[VAL_12]] : memref<32xf32>
623// CHECK:           return %[[VAL_41]] : tensor<32xf32>
624// CHECK:         }
625func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
626  %0 = linalg.generic #trait2
627     ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>)
628    outs(%argx: tensor<32xf32>) {
629      ^bb(%a: f32, %b: f32, %x: f32):
630        %0 = arith.mulf %a, %b : f32
631        linalg.yield %0 : f32
632  } -> tensor<32xf32>
633  return %0 : tensor<32xf32>
634}
635
636// CHECK-LABEL:   func @two_way_inv(
637// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
638// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
639// CHECK-SAME:      %[[VAL_2:.*2]]: f32,
640// CHECK-SAME:      %[[VAL_3:.*3]]: tensor<16xf32>) -> tensor<16xf32> {
641// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
642// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
643// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
644// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
645// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
646// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
647// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
648// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
649// CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
650// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
651// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
652// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
653// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
654// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
655// CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]]) : (index, index) -> (index, index) {
656// CHECK:             %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
657// CHECK:             %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
658// CHECK:             %[[VAL_23:.*]] = arith.andi %[[VAL_21]], %[[VAL_22]] : i1
659// CHECK:             scf.condition(%[[VAL_23]]) %[[VAL_19]], %[[VAL_20]] : index, index
660// CHECK:           } do {
661// CHECK:           ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
662// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
663// CHECK:             %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
664// CHECK:             %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_26]] : index
665// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_28]], %[[VAL_27]], %[[VAL_26]] : index
666// CHECK:             %[[VAL_30:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
667// CHECK:             %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
668// CHECK:             %[[VAL_32:.*]] = arith.andi %[[VAL_30]], %[[VAL_31]] : i1
669// CHECK:             scf.if %[[VAL_32]] {
670// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
671// CHECK:               %[[VAL_34:.*]] = arith.mulf %[[VAL_33]], %[[VAL_2]] : f32
672// CHECK:               %[[VAL_35:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
673// CHECK:               %[[VAL_36:.*]] = arith.mulf %[[VAL_35]], %[[VAL_2]] : f32
674// CHECK:               %[[VAL_37:.*]] = arith.addf %[[VAL_34]], %[[VAL_36]] : f32
675// CHECK:               memref.store %[[VAL_37]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
676// CHECK:             } else {
677// CHECK:               %[[VAL_38:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
678// CHECK:               scf.if %[[VAL_38]] {
679// CHECK:                 %[[VAL_39:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
680// CHECK:                 %[[VAL_40:.*]] = arith.mulf %[[VAL_39]], %[[VAL_2]] : f32
681// CHECK:                 memref.store %[[VAL_40]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
682// CHECK:               } else {
683// CHECK:                 %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
684// CHECK:                 scf.if %[[VAL_41]] {
685// CHECK:                   %[[VAL_42:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
686// CHECK:                   %[[VAL_43:.*]] = arith.mulf %[[VAL_42]], %[[VAL_2]] : f32
687// CHECK:                   memref.store %[[VAL_43]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
688// CHECK:                 } else {
689// CHECK:                 }
690// CHECK:               }
691// CHECK:             }
692// CHECK:             %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
693// CHECK:             %[[VAL_45:.*]] = arith.addi %[[VAL_24]], %[[VAL_5]] : index
694// CHECK:             %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_24]] : index
695// CHECK:             %[[VAL_47:.*]] = arith.cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
696// CHECK:             %[[VAL_48:.*]] = arith.addi %[[VAL_25]], %[[VAL_5]] : index
697// CHECK:             %[[VAL_49:.*]] = arith.select %[[VAL_47]], %[[VAL_48]], %[[VAL_25]] : index
698// CHECK:             scf.yield %[[VAL_46]], %[[VAL_49]] : index, index
699// CHECK:           }
700// CHECK:           scf.for %[[VAL_50:.*]] = %[[VAL_51:.*]]#0 to %[[VAL_15]] step %[[VAL_5]] {
701// CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_50]]] : memref<?xindex>
702// CHECK:             %[[VAL_53:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_50]]] : memref<?xf32>
703// CHECK:             %[[VAL_54:.*]] = arith.mulf %[[VAL_53]], %[[VAL_2]] : f32
704// CHECK:             memref.store %[[VAL_54]], %[[VAL_13]]{{\[}}%[[VAL_52]]] : memref<16xf32>
705// CHECK:           }
706// CHECK:           scf.for %[[VAL_55:.*]] = %[[VAL_56:.*]]#1 to %[[VAL_17]] step %[[VAL_5]] {
707// CHECK:             %[[VAL_57:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_55]]] : memref<?xindex>
708// CHECK:             %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_55]]] : memref<?xf32>
709// CHECK:             %[[VAL_59:.*]] = arith.mulf %[[VAL_58]], %[[VAL_2]] : f32
710// CHECK:             memref.store %[[VAL_59]], %[[VAL_13]]{{\[}}%[[VAL_57]]] : memref<16xf32>
711// CHECK:           }
712// CHECK:           %[[VAL_60:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<16xf32>
713// CHECK:           return %[[VAL_60]] : tensor<16xf32>
714// CHECK:         }
715func.func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %argc: f32, %argx: tensor<16xf32>) -> tensor<16xf32> {
716  // Kernel "x(i) = a(i) * c + b(i) * c".
717  %0 = linalg.generic #trait2
718    ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
719    outs(%argx: tensor<16xf32>) {
720      ^bb(%a: f32, %b: f32, %x: f32):
721        %0 = arith.mulf %a, %argc : f32
722        %1 = arith.mulf %b, %argc : f32
723        %2 = arith.addf %0, %1 : f32
724        linalg.yield %2 : f32
725  } -> tensor<16xf32>
726  return %0 : tensor<16xf32>
727}
728
729// CHECK-LABEL:   func @two_way_inv_alt(
730// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
731// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
732// CHECK-SAME:      %[[VAL_2:.*2]]: f32,
733// CHECK-SAME:      %[[VAL_3:.*3]]: tensor<16xf32>) -> tensor<16xf32> {
734// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
735// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
736// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
737// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
738// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
739// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
740// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
741// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
742// CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
743// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
744// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
745// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
746// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
747// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
748// CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]]) : (index, index) -> (index, index) {
749// CHECK:             %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
750// CHECK:             %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
751// CHECK:             %[[VAL_23:.*]] = arith.andi %[[VAL_21]], %[[VAL_22]] : i1
752// CHECK:             scf.condition(%[[VAL_23]]) %[[VAL_19]], %[[VAL_20]] : index, index
753// CHECK:           } do {
754// CHECK:           ^bb0(%[[VAL_24:.*]]: index, %[[VAL_25:.*]]: index):
755// CHECK:             %[[VAL_26:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref<?xindex>
756// CHECK:             %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref<?xindex>
757// CHECK:             %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_26]] : index
758// CHECK:             %[[VAL_29:.*]] = arith.select %[[VAL_28]], %[[VAL_27]], %[[VAL_26]] : index
759// CHECK:             %[[VAL_30:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
760// CHECK:             %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
761// CHECK:             %[[VAL_32:.*]] = arith.andi %[[VAL_30]], %[[VAL_31]] : i1
762// CHECK:             scf.if %[[VAL_32]] {
763// CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
764// CHECK:               %[[VAL_34:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
765// CHECK:               %[[VAL_35:.*]] = arith.addf %[[VAL_33]], %[[VAL_34]] : f32
766// CHECK:               %[[VAL_36:.*]] = arith.mulf %[[VAL_35]], %[[VAL_2]] : f32
767// CHECK:               memref.store %[[VAL_36]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
768// CHECK:             } else {
769// CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
770// CHECK:               scf.if %[[VAL_37]] {
771// CHECK:                 %[[VAL_38:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
772// CHECK:                 %[[VAL_39:.*]] = arith.mulf %[[VAL_38]], %[[VAL_2]] : f32
773// CHECK:                 memref.store %[[VAL_39]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
774// CHECK:               } else {
775// CHECK:                 %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
776// CHECK:                 scf.if %[[VAL_40]] {
777// CHECK:                   %[[VAL_41:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_25]]] : memref<?xf32>
778// CHECK:                   %[[VAL_42:.*]] = arith.mulf %[[VAL_41]], %[[VAL_2]] : f32
779// CHECK:                   memref.store %[[VAL_42]], %[[VAL_13]]{{\[}}%[[VAL_29]]] : memref<16xf32>
780// CHECK:                 } else {
781// CHECK:                 }
782// CHECK:               }
783// CHECK:             }
784// CHECK:             %[[VAL_43:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_29]] : index
785// CHECK:             %[[VAL_44:.*]] = arith.addi %[[VAL_24]], %[[VAL_5]] : index
786// CHECK:             %[[VAL_45:.*]] = arith.select %[[VAL_43]], %[[VAL_44]], %[[VAL_24]] : index
787// CHECK:             %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_27]], %[[VAL_29]] : index
788// CHECK:             %[[VAL_47:.*]] = arith.addi %[[VAL_25]], %[[VAL_5]] : index
789// CHECK:             %[[VAL_48:.*]] = arith.select %[[VAL_46]], %[[VAL_47]], %[[VAL_25]] : index
790// CHECK:             scf.yield %[[VAL_45]], %[[VAL_48]] : index, index
791// CHECK:           }
792// CHECK:           scf.for %[[VAL_49:.*]] = %[[VAL_50:.*]]#0 to %[[VAL_15]] step %[[VAL_5]] {
793// CHECK:             %[[VAL_51:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_49]]] : memref<?xindex>
794// CHECK:             %[[VAL_52:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_49]]] : memref<?xf32>
795// CHECK:             %[[VAL_53:.*]] = arith.mulf %[[VAL_52]], %[[VAL_2]] : f32
796// CHECK:             memref.store %[[VAL_53]], %[[VAL_13]]{{\[}}%[[VAL_51]]] : memref<16xf32>
797// CHECK:           }
798// CHECK:           scf.for %[[VAL_54:.*]] = %[[VAL_55:.*]]#1 to %[[VAL_17]] step %[[VAL_5]] {
799// CHECK:             %[[VAL_56:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_54]]] : memref<?xindex>
800// CHECK:             %[[VAL_57:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_54]]] : memref<?xf32>
801// CHECK:             %[[VAL_58:.*]] = arith.mulf %[[VAL_57]], %[[VAL_2]] : f32
802// CHECK:             memref.store %[[VAL_58]], %[[VAL_13]]{{\[}}%[[VAL_56]]] : memref<16xf32>
803// CHECK:           }
804// CHECK:           %[[VAL_59:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<16xf32>
805// CHECK:           return %[[VAL_59]] : tensor<16xf32>
806// CHECK:         }
807func.func @two_way_inv_alt(%arga: tensor<16xf32, #SV>,
808                      %argb: tensor<16xf32, #SV>, %argc: f32, %argx: tensor<16xf32>) -> tensor<16xf32> {
809  // Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c".
810  %0 = linalg.generic #trait2
811    ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
812    outs(%argx: tensor<16xf32>) {
813      ^bb(%a: f32, %b: f32, %x: f32):
814        %0 = arith.addf %a, %b : f32
815        %1 = arith.mulf %0, %argc : f32
816        linalg.yield %1 : f32
817  } -> tensor<16xf32>
818  return %0 : tensor<16xf32>
819}
820
821#trait_sum_reduction = {
822  indexing_maps = [
823    affine_map<(i) -> (i)>,  // a
824    affine_map<(i) -> ()>    // x (scalar out)
825  ],
826  iterator_types = ["reduction"],
827  doc = "x += SUM_i a(i)"
828}
829
830// CHECK-LABEL:   func @sum_reduction(
831// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
832// CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
833// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
834// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
835// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
836// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
837// CHECK-DAG:       %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
838// CHECK-DAG:       %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
839// CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
840// CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_6]][] : memref<f32>
841// CHECK:           %[[VAL_11:.*]] = scf.for %[[VAL_12:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (f32) {
842// CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xf32>
843// CHECK:             %[[VAL_15:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32
844// CHECK:             scf.yield %[[VAL_15]] : f32
845// CHECK:           }
846// CHECK:           memref.store %[[VAL_11]], %[[VAL_6]][] : memref<f32>
847// CHECK:           %[[VAL_17:.*]] = bufferization.to_tensor %[[VAL_6]] : memref<f32>
848// CHECK:           return %[[VAL_17]] : tensor<f32>
849// CHECK:         }
850func.func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tensor<f32> {
851  %0 = linalg.generic #trait_sum_reduction
852    ins(%arga: tensor<?xf32, #SV>)
853    outs(%argx: tensor<f32>) {
854      ^bb(%a: f32, %x: f32):
855        %0 = arith.addf %x, %a : f32
856        linalg.yield %0 : f32
857  } -> tensor<f32>
858  return %0 : tensor<f32>
859}
860
861#trait_sum_reduction2 = {
862  indexing_maps = [
863    affine_map<(i) -> (i)>, // a
864    affine_map<(i) -> (i)>, // b
865    affine_map<(i)-> ()>    // x (scalar out)
866  ],
867  iterator_types = ["reduction"],
868  doc = "x += SUM_i a(i) + b(i)"
869}
870
871// CHECK-LABEL:   func @sum_reduction_ss(
872// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
873// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
874// CHECK-SAME:      %[[VAL_2:.*2]]: tensor<f32>) -> tensor<f32> {
875// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
876// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
877// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
878// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
879// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
880// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
881// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
882// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
883// CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
884// CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_11]][] : memref<f32>
885// CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
886// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
887// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
888// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
889// CHECK:           %[[VAL_18:.*]]:3 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_13]]) : (index, index, f32) -> (index, index, f32) {
890// CHECK:             %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index
891// CHECK:             %[[VAL_23:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index
892// CHECK:             %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1
893// CHECK:             scf.condition(%[[VAL_24]]) %[[VAL_19]], %[[VAL_20]], %[[VAL_21]] : index, index, f32
894// CHECK:           } do {
895// CHECK:           ^bb0(%[[VAL_25:.*]]: index, %[[VAL_26:.*]]: index, %[[VAL_27:.*]]: f32):
896// CHECK:             %[[VAL_28:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_25]]] : memref<?xindex>
897// CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_26]]] : memref<?xindex>
898// CHECK:             %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_29]], %[[VAL_28]] : index
899// CHECK:             %[[VAL_31:.*]] = arith.select %[[VAL_30]], %[[VAL_29]], %[[VAL_28]] : index
900// CHECK:             %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index
901// CHECK:             %[[VAL_33:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index
902// CHECK:             %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1
903// CHECK:             %[[VAL_35:.*]] = scf.if %[[VAL_34]] -> (f32) {
904// CHECK:               %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_25]]] : memref<?xf32>
905// CHECK:               %[[VAL_37:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xf32>
906// CHECK:               %[[VAL_38:.*]] = arith.addf %[[VAL_36]], %[[VAL_37]] : f32
907// CHECK:               %[[VAL_39:.*]] = arith.addf %[[VAL_27]], %[[VAL_38]] : f32
908// CHECK:               scf.yield %[[VAL_39]] : f32
909// CHECK:             } else {
910// CHECK:               %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index
911// CHECK:               %[[VAL_41:.*]] = scf.if %[[VAL_40]] -> (f32) {
912// CHECK:                 %[[VAL_42:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_25]]] : memref<?xf32>
913// CHECK:                 %[[VAL_43:.*]] = arith.addf %[[VAL_27]], %[[VAL_42]] : f32
914// CHECK:                 scf.yield %[[VAL_43]] : f32
915// CHECK:               } else {
916// CHECK:                 %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index
917// CHECK:                 %[[VAL_45:.*]] = scf.if %[[VAL_44]] -> (f32) {
918// CHECK:                   %[[VAL_46:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref<?xf32>
919// CHECK:                   %[[VAL_47:.*]] = arith.addf %[[VAL_27]], %[[VAL_46]] : f32
920// CHECK:                   scf.yield %[[VAL_47]] : f32
921// CHECK:                 } else {
922// CHECK:                   scf.yield %[[VAL_27]] : f32
923// CHECK:                 }
924// CHECK:                 scf.yield %[[VAL_48:.*]] : f32
925// CHECK:               }
926// CHECK:               scf.yield %[[VAL_49:.*]] : f32
927// CHECK:             }
928// CHECK:             %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index
929// CHECK:             %[[VAL_51:.*]] = arith.addi %[[VAL_25]], %[[VAL_4]] : index
930// CHECK:             %[[VAL_52:.*]] = arith.select %[[VAL_50]], %[[VAL_51]], %[[VAL_25]] : index
931// CHECK:             %[[VAL_53:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index
932// CHECK:             %[[VAL_54:.*]] = arith.addi %[[VAL_26]], %[[VAL_4]] : index
933// CHECK:             %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_26]] : index
934// CHECK:             scf.yield %[[VAL_52]], %[[VAL_55]], %[[VAL_56:.*]] : index, index, f32
935// CHECK:           }
936// CHECK:           %[[VAL_57:.*]] = scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#0 to %[[VAL_15]] step %[[VAL_4]] iter_args(%[[VAL_60:.*]] = %[[VAL_59]]#2) -> (f32) {
937// CHECK:             %[[VAL_61:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_58]]] : memref<?xf32>
938// CHECK:             %[[VAL_62:.*]] = arith.addf %[[VAL_60]], %[[VAL_61]] : f32
939// CHECK:             scf.yield %[[VAL_62]] : f32
940// CHECK:           }
941// CHECK:           %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#1 to %[[VAL_17]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_67:.*]]) -> (f32) {
942// CHECK:             %[[VAL_68:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_64]]] : memref<?xf32>
943// CHECK:             %[[VAL_69:.*]] = arith.addf %[[VAL_66]], %[[VAL_68]] : f32
944// CHECK:             scf.yield %[[VAL_69]] : f32
945// CHECK:           }
946// CHECK:           memref.store %[[VAL_70:.*]], %[[VAL_11]][] : memref<f32>
947// CHECK:           %[[VAL_71:.*]] = bufferization.to_tensor %[[VAL_11]] : memref<f32>
948// CHECK:           return %[[VAL_71]] : tensor<f32>
949// CHECK:         }
950func.func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
951                       %argb: tensor<16xf32, #SV>,
952                       %argx: tensor<f32>) -> tensor<f32> {
953  // Just for testing. This case would be better expressed
954  // as two separate reductions kernels.
955  %0 = linalg.generic #trait_sum_reduction2
956    ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
957    outs(%argx: tensor<f32>) {
958      ^bb(%a: f32, %b: f32, %x: f32):
959        %0 = arith.addf %a, %b : f32
960        %1 = arith.addf %x, %0 : f32
961        linalg.yield %1 : f32
962  } -> tensor<f32>
963  return %0 : tensor<f32>
964}
965
966#trait_sum_reduction_inv = {
967  indexing_maps = [
968    affine_map<(i) -> (i)>, // a
969    affine_map<(i) -> ()>,  // b
970    affine_map<(i) -> (i)>, // c
971    affine_map<(i) -> ()>   // x (out)
972  ],
973  iterator_types = ["reduction"],
974  doc = "x += SUM_i a(i) * b + c(i)"
975}
976
977// CHECK-LABEL:   func @sum_reduction_inv(
978// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
979// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<f32>,
980// CHECK-SAME:      %[[VAL_2:.*2]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
981// CHECK-SAME:      %[[VAL_3:.*3]]: tensor<f32>) -> tensor<f32> {
982// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
983// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
984// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
985// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
986// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
987// CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
988// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
989// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
990// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf32>
991// CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f32>
992// CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_13]][] : memref<f32>
993// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref<f32>
994// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
995// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
996// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
997// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref<?xindex>
998// CHECK:           %[[VAL_21:.*]]:3 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]], %[[VAL_24:.*]] = %[[VAL_15]]) : (index, index, f32) -> (index, index, f32) {
999// CHECK:             %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_22]], %[[VAL_18]] : index
1000// CHECK:             %[[VAL_26:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_20]] : index
1001// CHECK:             %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1
1002// CHECK:             scf.condition(%[[VAL_27]]) %[[VAL_22]], %[[VAL_23]], %[[VAL_24]] : index, index, f32
1003// CHECK:           } do {
1004// CHECK:           ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index, %[[VAL_30:.*]]: f32):
1005// CHECK:             %[[VAL_31:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_28]]] : memref<?xindex>
1006// CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref<?xindex>
1007// CHECK:             %[[VAL_33:.*]] = arith.cmpi ult, %[[VAL_32]], %[[VAL_31]] : index
1008// CHECK:             %[[VAL_34:.*]] = arith.select %[[VAL_33]], %[[VAL_32]], %[[VAL_31]] : index
1009// CHECK:             %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_34]] : index
1010// CHECK:             %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_32]], %[[VAL_34]] : index
1011// CHECK:             %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
1012// CHECK:             %[[VAL_38:.*]] = scf.if %[[VAL_37]] -> (f32) {
1013// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref<?xf32>
1014// CHECK:               %[[VAL_40:.*]] = arith.mulf %[[VAL_39]], %[[VAL_16]] : f32
1015// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_29]]] : memref<?xf32>
1016// CHECK:               %[[VAL_42:.*]] = arith.addf %[[VAL_40]], %[[VAL_41]] : f32
1017// CHECK:               %[[VAL_43:.*]] = arith.addf %[[VAL_30]], %[[VAL_42]] : f32
1018// CHECK:               scf.yield %[[VAL_43]] : f32
1019// CHECK:             } else {
1020// CHECK:               %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_34]] : index
1021// CHECK:               %[[VAL_45:.*]] = scf.if %[[VAL_44]] -> (f32) {
1022// CHECK:                 %[[VAL_46:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref<?xf32>
1023// CHECK:                 %[[VAL_47:.*]] = arith.mulf %[[VAL_46]], %[[VAL_16]] : f32
1024// CHECK:                 %[[VAL_48:.*]] = arith.addf %[[VAL_30]], %[[VAL_47]] : f32
1025// CHECK:                 scf.yield %[[VAL_48]] : f32
1026// CHECK:               } else {
1027// CHECK:                 %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_32]], %[[VAL_34]] : index
1028// CHECK:                 %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (f32) {
1029// CHECK:                   %[[VAL_51:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_29]]] : memref<?xf32>
1030// CHECK:                   %[[VAL_52:.*]] = arith.addf %[[VAL_30]], %[[VAL_51]] : f32
1031// CHECK:                   scf.yield %[[VAL_52]] : f32
1032// CHECK:                 } else {
1033// CHECK:                   scf.yield %[[VAL_30]] : f32
1034// CHECK:                 }
1035// CHECK:                 scf.yield %[[VAL_53:.*]] : f32
1036// CHECK:               }
1037// CHECK:               scf.yield %[[VAL_54:.*]] : f32
1038// CHECK:             }
1039// CHECK:             %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_34]] : index
1040// CHECK:             %[[VAL_56:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index
1041// CHECK:             %[[VAL_57:.*]] = arith.select %[[VAL_55]], %[[VAL_56]], %[[VAL_28]] : index
1042// CHECK:             %[[VAL_58:.*]] = arith.cmpi eq, %[[VAL_32]], %[[VAL_34]] : index
1043// CHECK:             %[[VAL_59:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index
1044// CHECK:             %[[VAL_60:.*]] = arith.select %[[VAL_58]], %[[VAL_59]], %[[VAL_29]] : index
1045// CHECK:             scf.yield %[[VAL_57]], %[[VAL_60]], %[[VAL_61:.*]] : index, index, f32
1046// CHECK:           }
1047// CHECK:           %[[VAL_62:.*]] = scf.for %[[VAL_63:.*]] = %[[VAL_64:.*]]#0 to %[[VAL_18]] step %[[VAL_5]] iter_args(%[[VAL_65:.*]] = %[[VAL_64]]#2) -> (f32) {
1048// CHECK:             %[[VAL_66:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_63]]] : memref<?xf32>
1049// CHECK:             %[[VAL_67:.*]] = arith.mulf %[[VAL_66]], %[[VAL_16]] : f32
1050// CHECK:             %[[VAL_68:.*]] = arith.addf %[[VAL_65]], %[[VAL_67]] : f32
1051// CHECK:             scf.yield %[[VAL_68]] : f32
1052// CHECK:           }
1053// CHECK:           %[[VAL_69:.*]] = scf.for %[[VAL_70:.*]] = %[[VAL_71:.*]]#1 to %[[VAL_20]] step %[[VAL_5]] iter_args(%[[VAL_72:.*]] = %[[VAL_73:.*]]) -> (f32) {
1054// CHECK:             %[[VAL_74:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_70]]] : memref<?xf32>
1055// CHECK:             %[[VAL_75:.*]] = arith.addf %[[VAL_72]], %[[VAL_74]] : f32
1056// CHECK:             scf.yield %[[VAL_75]] : f32
1057// CHECK:           }
1058// CHECK:           memref.store %[[VAL_76:.*]], %[[VAL_13]][] : memref<f32>
1059// CHECK:           %[[VAL_77:.*]] = bufferization.to_tensor %[[VAL_13]] : memref<f32>
1060// CHECK:           return %[[VAL_77]] : tensor<f32>
1061// CHECK:         }
1062func.func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
1063                        %argb: tensor<f32>,
1064                        %argc: tensor<16xf32, #SV>,
1065                        %argx: tensor<f32>) -> tensor<f32> {
1066  // Just for testing. This case would be better expressed
1067  // as two separate reductions kernels.
1068  %0 = linalg.generic #trait_sum_reduction_inv
1069    ins(%arga, %argb, %argc : tensor<16xf32, #SV>, tensor<f32>, tensor<16xf32, #SV>)
1070    outs(%argx: tensor<f32>) {
1071      ^bb(%a: f32, %b: f32, %c: f32, %x: f32):
1072        %0 = arith.mulf %a, %b : f32
1073        %1 = arith.addf %0, %c : f32
1074        %2 = arith.addf %x, %1 : f32
1075        linalg.yield %2 : f32
1076  } -> tensor<f32>
1077  return %0 : tensor<f32>
1078}
1079
1080#trait_four_tensors = {
1081  indexing_maps = [
1082    affine_map<(i) -> (i)>,  // A
1083    affine_map<(i) -> (i)>,  // B
1084    affine_map<(i) -> (i)>,  // C
1085    affine_map<(i) -> (i)>,  // D
1086    affine_map<(i) -> (i)>   // X (out)
1087  ],
1088  iterator_types = ["parallel"],
1089  doc = "X(i) = A(i) + B(i) + C(i) + D(i)"
1090}
1091
1092// CHECK-LABEL:   func @four_tensors_op(
1093// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64>,
1094// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
1095// CHECK-SAME:      %[[VAL_2:.*2]]: tensor<?xf64>,
1096// CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
1097// CHECK-SAME:      %[[VAL_4:.*]]: tensor<?xf64>) -> tensor<?xf64> {
1098// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
1099// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
1100// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
1101// CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?xf64>
1102// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1103// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1104// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
1105// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?xf64>
1106// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.pointers %[[VAL_3]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1107// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.indices %[[VAL_3]], %[[VAL_5]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1108// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
1109// CHECK-DAG:       %[[VAL_16:.*]] = tensor.dim %[[VAL_4]], %[[VAL_5]] : tensor<?xf64>
1110// CHECK-DAG:       %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]]
1111// CHECK:           linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
1112// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
1113// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
1114// CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
1115// CHECK:           %[[VAL_22:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
1116// CHECK:           %[[VAL_23:.*]]:3 = scf.while (%[[VAL_24:.*]] = %[[VAL_19]], %[[VAL_25:.*]] = %[[VAL_21]], %[[VAL_26:.*]] = %[[VAL_5]]) : (index, index, index) -> (index, index, index) {
1117// CHECK:             %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_20]] : index
1118// CHECK:             %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_22]] : index
1119// CHECK:             %[[VAL_29:.*]] = arith.andi %[[VAL_27]], %[[VAL_28]] : i1
1120// CHECK:             scf.condition(%[[VAL_29]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, index
1121// CHECK:           } do {
1122// CHECK:           ^bb0(%[[VAL_30:.*]]: index, %[[VAL_31:.*]]: index, %[[VAL_32:.*]]: index):
1123// CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<?xindex>
1124// CHECK:             %[[VAL_34:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref<?xindex>
1125// CHECK:             %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_32]] : index
1126// CHECK:             %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_32]] : index
1127// CHECK:             %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1
1128// CHECK:             scf.if %[[VAL_37]] {
1129// CHECK:               %[[VAL_38:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1130// CHECK:               %[[VAL_39:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf64>
1131// CHECK:               %[[VAL_40:.*]] = arith.addf %[[VAL_38]], %[[VAL_39]] : f64
1132// CHECK:               %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1133// CHECK:               %[[VAL_42:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xf64>
1134// CHECK:               %[[VAL_43:.*]] = arith.addf %[[VAL_41]], %[[VAL_42]] : f64
1135// CHECK:               %[[VAL_44:.*]] = arith.addf %[[VAL_40]], %[[VAL_43]] : f64
1136// CHECK:               memref.store %[[VAL_44]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1137// CHECK:             } else {
1138// CHECK:               %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_32]] : index
1139// CHECK:               scf.if %[[VAL_45]] {
1140// CHECK:                 %[[VAL_46:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1141// CHECK:                 %[[VAL_47:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xf64>
1142// CHECK:                 %[[VAL_48:.*]] = arith.addf %[[VAL_46]], %[[VAL_47]] : f64
1143// CHECK:                 %[[VAL_49:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1144// CHECK:                 %[[VAL_50:.*]] = arith.addf %[[VAL_48]], %[[VAL_49]] : f64
1145// CHECK:                 memref.store %[[VAL_50]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1146// CHECK:               } else {
1147// CHECK:                 %[[VAL_51:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_32]] : index
1148// CHECK:                 scf.if %[[VAL_51]] {
1149// CHECK:                   %[[VAL_52:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1150// CHECK:                   %[[VAL_53:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1151// CHECK:                   %[[VAL_54:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_31]]] : memref<?xf64>
1152// CHECK:                   %[[VAL_55:.*]] = arith.addf %[[VAL_53]], %[[VAL_54]] : f64
1153// CHECK:                   %[[VAL_56:.*]] = arith.addf %[[VAL_52]], %[[VAL_55]] : f64
1154// CHECK:                   memref.store %[[VAL_56]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1155// CHECK:                 } else {
1156// CHECK:                   scf.if %[[VAL_6]] {
1157// CHECK:                     %[[VAL_57:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1158// CHECK:                     %[[VAL_58:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1159// CHECK:                     %[[VAL_59:.*]] = arith.addf %[[VAL_57]], %[[VAL_58]] : f64
1160// CHECK:                     memref.store %[[VAL_59]], %[[VAL_18]]{{\[}}%[[VAL_32]]] : memref<?xf64>
1161// CHECK:                   } else {
1162// CHECK:                   }
1163// CHECK:                 }
1164// CHECK:               }
1165// CHECK:             }
1166// CHECK:             %[[VAL_60:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_32]] : index
1167// CHECK:             %[[VAL_61:.*]] = arith.addi %[[VAL_30]], %[[VAL_7]] : index
1168// CHECK:             %[[VAL_62:.*]] = arith.select %[[VAL_60]], %[[VAL_61]], %[[VAL_30]] : index
1169// CHECK:             %[[VAL_63:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_32]] : index
1170// CHECK:             %[[VAL_64:.*]] = arith.addi %[[VAL_31]], %[[VAL_7]] : index
1171// CHECK:             %[[VAL_65:.*]] = arith.select %[[VAL_63]], %[[VAL_64]], %[[VAL_31]] : index
1172// CHECK:             %[[VAL_66:.*]] = arith.addi %[[VAL_32]], %[[VAL_7]] : index
1173// CHECK:             scf.yield %[[VAL_62]], %[[VAL_65]], %[[VAL_66]] : index, index, index
1174// CHECK:           }
1175// CHECK:           %[[VAL_67:.*]]:2 = scf.while (%[[VAL_68:.*]] = %[[VAL_69:.*]]#0, %[[VAL_70:.*]] = %[[VAL_69]]#2) : (index, index) -> (index, index) {
1176// CHECK:             %[[VAL_71:.*]] = arith.cmpi ult, %[[VAL_68]], %[[VAL_20]] : index
1177// CHECK:             scf.condition(%[[VAL_71]]) %[[VAL_68]], %[[VAL_70]] : index, index
1178// CHECK:           } do {
1179// CHECK:           ^bb0(%[[VAL_72:.*]]: index, %[[VAL_73:.*]]: index):
1180// CHECK:             %[[VAL_74:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_72]]] : memref<?xindex>
1181// CHECK:             %[[VAL_75:.*]] = arith.cmpi eq, %[[VAL_74]], %[[VAL_73]] : index
1182// CHECK:             scf.if %[[VAL_75]] {
1183// CHECK:               %[[VAL_76:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_73]]] : memref<?xf64>
1184// CHECK:               %[[VAL_77:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_72]]] : memref<?xf64>
1185// CHECK:               %[[VAL_78:.*]] = arith.addf %[[VAL_76]], %[[VAL_77]] : f64
1186// CHECK:               %[[VAL_79:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_73]]] : memref<?xf64>
1187// CHECK:               %[[VAL_80:.*]] = arith.addf %[[VAL_78]], %[[VAL_79]] : f64
1188// CHECK:               memref.store %[[VAL_80]], %[[VAL_18]]{{\[}}%[[VAL_73]]] : memref<?xf64>
1189// CHECK:             } else {
1190// CHECK:               scf.if %[[VAL_6]] {
1191// CHECK:                 %[[VAL_81:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_73]]] : memref<?xf64>
1192// CHECK:                 %[[VAL_82:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_73]]] : memref<?xf64>
1193// CHECK:                 %[[VAL_83:.*]] = arith.addf %[[VAL_81]], %[[VAL_82]] : f64
1194// CHECK:                 memref.store %[[VAL_83]], %[[VAL_18]]{{\[}}%[[VAL_73]]] : memref<?xf64>
1195// CHECK:               } else {
1196// CHECK:               }
1197// CHECK:             }
1198// CHECK:             %[[VAL_84:.*]] = arith.cmpi eq, %[[VAL_74]], %[[VAL_73]] : index
1199// CHECK:             %[[VAL_85:.*]] = arith.addi %[[VAL_72]], %[[VAL_7]] : index
1200// CHECK:             %[[VAL_86:.*]] = arith.select %[[VAL_84]], %[[VAL_85]], %[[VAL_72]] : index
1201// CHECK:             %[[VAL_87:.*]] = arith.addi %[[VAL_73]], %[[VAL_7]] : index
1202// CHECK:             scf.yield %[[VAL_86]], %[[VAL_87]] : index, index
1203// CHECK:           }
1204// CHECK:           %[[VAL_88:.*]]:2 = scf.while (%[[VAL_89:.*]] = %[[VAL_90:.*]]#1, %[[VAL_91:.*]] = %[[VAL_92:.*]]#1) : (index, index) -> (index, index) {
1205// CHECK:             %[[VAL_93:.*]] = arith.cmpi ult, %[[VAL_89]], %[[VAL_22]] : index
1206// CHECK:             scf.condition(%[[VAL_93]]) %[[VAL_89]], %[[VAL_91]] : index, index
1207// CHECK:           } do {
1208// CHECK:           ^bb0(%[[VAL_94:.*]]: index, %[[VAL_95:.*]]: index):
1209// CHECK:             %[[VAL_96:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_94]]] : memref<?xindex>
1210// CHECK:             %[[VAL_97:.*]] = arith.cmpi eq, %[[VAL_96]], %[[VAL_95]] : index
1211// CHECK:             scf.if %[[VAL_97]] {
1212// CHECK:               %[[VAL_98:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_95]]] : memref<?xf64>
1213// CHECK:               %[[VAL_99:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_95]]] : memref<?xf64>
1214// CHECK:               %[[VAL_100:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_94]]] : memref<?xf64>
1215// CHECK:               %[[VAL_101:.*]] = arith.addf %[[VAL_99]], %[[VAL_100]] : f64
1216// CHECK:               %[[VAL_102:.*]] = arith.addf %[[VAL_98]], %[[VAL_101]] : f64
1217// CHECK:               memref.store %[[VAL_102]], %[[VAL_18]]{{\[}}%[[VAL_95]]] : memref<?xf64>
1218// CHECK:             } else {
1219// CHECK:               scf.if %[[VAL_6]] {
1220// CHECK:                 %[[VAL_103:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_95]]] : memref<?xf64>
1221// CHECK:                 %[[VAL_104:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_95]]] : memref<?xf64>
1222// CHECK:                 %[[VAL_105:.*]] = arith.addf %[[VAL_103]], %[[VAL_104]] : f64
1223// CHECK:                 memref.store %[[VAL_105]], %[[VAL_18]]{{\[}}%[[VAL_95]]] : memref<?xf64>
1224// CHECK:               } else {
1225// CHECK:               }
1226// CHECK:             }
1227// CHECK:             %[[VAL_106:.*]] = arith.cmpi eq, %[[VAL_96]], %[[VAL_95]] : index
1228// CHECK:             %[[VAL_107:.*]] = arith.addi %[[VAL_94]], %[[VAL_7]] : index
1229// CHECK:             %[[VAL_108:.*]] = arith.select %[[VAL_106]], %[[VAL_107]], %[[VAL_94]] : index
1230// CHECK:             %[[VAL_109:.*]] = arith.addi %[[VAL_95]], %[[VAL_7]] : index
1231// CHECK:             scf.yield %[[VAL_108]], %[[VAL_109]] : index, index
1232// CHECK:           }
1233// CHECK:           scf.for %[[VAL_110:.*]] = %[[VAL_111:.*]]#1 to %[[VAL_16]] step %[[VAL_7]] {
1234// CHECK:             %[[VAL_112:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_110]]] : memref<?xf64>
1235// CHECK:             %[[VAL_113:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_110]]] : memref<?xf64>
1236// CHECK:             %[[VAL_114:.*]] = arith.addf %[[VAL_112]], %[[VAL_113]] : f64
1237// CHECK:             memref.store %[[VAL_114]], %[[VAL_18]]{{\[}}%[[VAL_110]]] : memref<?xf64>
1238// CHECK:           }
1239// CHECK:           %[[VAL_115:.*]] = bufferization.to_tensor %[[VAL_18]] : memref<?xf64>
1240// CHECK:           return %[[VAL_115]] : tensor<?xf64>
1241// CHECK:         }
1242func.func @four_tensors_op(%arga: tensor<?xf64>,
1243                      %argb: tensor<?xf64, #SV>,
1244                      %argc: tensor<?xf64>,
1245                      %argd: tensor<?xf64, #SV>,
1246                      %argx: tensor<?xf64>) -> tensor<?xf64> {
1247  %r = linalg.generic #trait_four_tensors
1248    ins(%arga, %argb, %argc, %argd: tensor<?xf64>, tensor<?xf64, #SV>, tensor<?xf64>, tensor<?xf64, #SV>)
1249    outs(%argx: tensor<?xf64>) {
1250      ^bb(%a: f64, %b: f64, %c: f64, %d: f64, %x: f64):
1251        %0 = arith.addf %a, %b : f64
1252        %1 = arith.addf %c, %d : f64
1253        %2 = arith.addf %0, %1 : f64
1254        linalg.yield %2 : f64
1255  } -> tensor<?xf64>
1256  return %r : tensor<?xf64>
1257}
1258
1259#trait_red3s = {
1260  indexing_maps = [
1261    affine_map<(i) -> (i)>,
1262    affine_map<(i) -> (i)>,
1263    affine_map<(i) -> (i)>,
1264    affine_map<(i) -> ()>
1265  ],
1266  iterator_types = ["reduction"],
1267  doc = "x += a(i) + b(i) + c(i)"
1268}
1269
1270// CHECK-LABEL:   func @red3s(
1271// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
1272// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
1273// CHECK-SAME:      %[[VAL_2:.*2]]: tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>,
1274// CHECK-SAME:      %[[VAL_3:.*3]]: tensor<f64>) -> tensor<f64> {
1275// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
1276// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
1277// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1278// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1279// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
1280// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1281// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1282// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
1283// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1284// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_4]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xindex>
1285// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref<?xf64>
1286// CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f64>
1287// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_15]][] : memref<f64>
1288// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
1289// CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
1290// CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
1291// CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
1292// CHECK:           %[[VAL_22:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<?xindex>
1293// CHECK:           %[[VAL_23:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<?xindex>
1294// CHECK:           %[[VAL_24:.*]]:4 = scf.while (%[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_17]]) : (index, index, index, f64) -> (index, index, index, f64) {
1295// CHECK:             %[[VAL_29:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_19]] : index
1296// CHECK:             %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_21]] : index
1297// CHECK:             %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
1298// CHECK:             %[[VAL_32:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_23]] : index
1299// CHECK:             %[[VAL_33:.*]] = arith.andi %[[VAL_31]], %[[VAL_32]] : i1
1300// CHECK:             scf.condition(%[[VAL_33]]) %[[VAL_25]], %[[VAL_26]], %[[VAL_27]], %[[VAL_28]] : index, index, index, f64
1301// CHECK:           } do {
1302// CHECK:           ^bb0(%[[VAL_34:.*]]: index, %[[VAL_35:.*]]: index, %[[VAL_36:.*]]: index, %[[VAL_37:.*]]: f64):
1303// CHECK:             %[[VAL_38:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_34]]] : memref<?xindex>
1304// CHECK:             %[[VAL_39:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_35]]] : memref<?xindex>
1305// CHECK:             %[[VAL_40:.*]] = arith.cmpi ult, %[[VAL_39]], %[[VAL_38]] : index
1306// CHECK:             %[[VAL_41:.*]] = arith.select %[[VAL_40]], %[[VAL_39]], %[[VAL_38]] : index
1307// CHECK:             %[[VAL_42:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_36]]] : memref<?xindex>
1308// CHECK:             %[[VAL_43:.*]] = arith.cmpi ult, %[[VAL_42]], %[[VAL_41]] : index
1309// CHECK:             %[[VAL_44:.*]] = arith.select %[[VAL_43]], %[[VAL_42]], %[[VAL_41]] : index
1310// CHECK:             %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index
1311// CHECK:             %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index
1312// CHECK:             %[[VAL_47:.*]] = arith.andi %[[VAL_45]], %[[VAL_46]] : i1
1313// CHECK:             %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index
1314// CHECK:             %[[VAL_49:.*]] = arith.andi %[[VAL_47]], %[[VAL_48]] : i1
1315// CHECK:             %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (f64) {
1316// CHECK:               %[[VAL_51:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref<?xf64>
1317// CHECK:               %[[VAL_52:.*]] = arith.addf %[[VAL_37]], %[[VAL_51]] : f64
1318// CHECK:               %[[VAL_53:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref<?xf64>
1319// CHECK:               %[[VAL_54:.*]] = arith.addf %[[VAL_52]], %[[VAL_53]] : f64
1320// CHECK:               %[[VAL_55:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref<?xf64>
1321// CHECK:               %[[VAL_56:.*]] = arith.addf %[[VAL_54]], %[[VAL_55]] : f64
1322// CHECK:               scf.yield %[[VAL_56]] : f64
1323// CHECK:             } else {
1324// CHECK:               %[[VAL_57:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index
1325// CHECK:               %[[VAL_58:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index
1326// CHECK:               %[[VAL_59:.*]] = arith.andi %[[VAL_57]], %[[VAL_58]] : i1
1327// CHECK:               %[[VAL_60:.*]] = scf.if %[[VAL_59]] -> (f64) {
1328// CHECK:                 %[[VAL_61:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref<?xf64>
1329// CHECK:                 %[[VAL_62:.*]] = arith.addf %[[VAL_37]], %[[VAL_61]] : f64
1330// CHECK:                 %[[VAL_63:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref<?xf64>
1331// CHECK:                 %[[VAL_64:.*]] = arith.addf %[[VAL_62]], %[[VAL_63]] : f64
1332// CHECK:                 scf.yield %[[VAL_64]] : f64
1333// CHECK:               } else {
1334// CHECK:                 %[[VAL_65:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index
1335// CHECK:                 %[[VAL_66:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index
1336// CHECK:                 %[[VAL_67:.*]] = arith.andi %[[VAL_65]], %[[VAL_66]] : i1
1337// CHECK:                 %[[VAL_68:.*]] = scf.if %[[VAL_67]] -> (f64) {
1338// CHECK:                   %[[VAL_69:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref<?xf64>
1339// CHECK:                   %[[VAL_70:.*]] = arith.addf %[[VAL_37]], %[[VAL_69]] : f64
1340// CHECK:                   %[[VAL_71:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref<?xf64>
1341// CHECK:                   %[[VAL_72:.*]] = arith.addf %[[VAL_70]], %[[VAL_71]] : f64
1342// CHECK:                   scf.yield %[[VAL_72]] : f64
1343// CHECK:                 } else {
1344// CHECK:                   %[[VAL_73:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index
1345// CHECK:                   %[[VAL_74:.*]] = scf.if %[[VAL_73]] -> (f64) {
1346// CHECK:                     %[[VAL_75:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref<?xf64>
1347// CHECK:                     %[[VAL_76:.*]] = arith.addf %[[VAL_37]], %[[VAL_75]] : f64
1348// CHECK:                     scf.yield %[[VAL_76]] : f64
1349// CHECK:                   } else {
1350// CHECK:                     %[[VAL_77:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index
1351// CHECK:                     %[[VAL_78:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index
1352// CHECK:                     %[[VAL_79:.*]] = arith.andi %[[VAL_77]], %[[VAL_78]] : i1
1353// CHECK:                     %[[VAL_80:.*]] = scf.if %[[VAL_79]] -> (f64) {
1354// CHECK:                       %[[VAL_81:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref<?xf64>
1355// CHECK:                       %[[VAL_82:.*]] = arith.addf %[[VAL_37]], %[[VAL_81]] : f64
1356// CHECK:                       %[[VAL_83:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref<?xf64>
1357// CHECK:                       %[[VAL_84:.*]] = arith.addf %[[VAL_82]], %[[VAL_83]] : f64
1358// CHECK:                       scf.yield %[[VAL_84]] : f64
1359// CHECK:                     } else {
1360// CHECK:                       %[[VAL_85:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index
1361// CHECK:                       %[[VAL_86:.*]] = scf.if %[[VAL_85]] -> (f64) {
1362// CHECK:                         %[[VAL_87:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref<?xf64>
1363// CHECK:                         %[[VAL_88:.*]] = arith.addf %[[VAL_37]], %[[VAL_87]] : f64
1364// CHECK:                         scf.yield %[[VAL_88]] : f64
1365// CHECK:                       } else {
1366// CHECK:                         %[[VAL_89:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index
1367// CHECK:                         %[[VAL_90:.*]] = scf.if %[[VAL_89]] -> (f64) {
1368// CHECK:                           %[[VAL_91:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref<?xf64>
1369// CHECK:                           %[[VAL_92:.*]] = arith.addf %[[VAL_37]], %[[VAL_91]] : f64
1370// CHECK:                           scf.yield %[[VAL_92]] : f64
1371// CHECK:                         } else {
1372// CHECK:                           scf.yield %[[VAL_37]] : f64
1373// CHECK:                         }
1374// CHECK:                         scf.yield %[[VAL_93:.*]] : f64
1375// CHECK:                       }
1376// CHECK:                       scf.yield %[[VAL_94:.*]] : f64
1377// CHECK:                     }
1378// CHECK:                     scf.yield %[[VAL_95:.*]] : f64
1379// CHECK:                   }
1380// CHECK:                   scf.yield %[[VAL_96:.*]] : f64
1381// CHECK:                 }
1382// CHECK:                 scf.yield %[[VAL_97:.*]] : f64
1383// CHECK:               }
1384// CHECK:               scf.yield %[[VAL_98:.*]] : f64
1385// CHECK:             }
1386// CHECK:             %[[VAL_99:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index
1387// CHECK:             %[[VAL_100:.*]] = arith.addi %[[VAL_34]], %[[VAL_5]] : index
1388// CHECK:             %[[VAL_101:.*]] = arith.select %[[VAL_99]], %[[VAL_100]], %[[VAL_34]] : index
1389// CHECK:             %[[VAL_102:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index
1390// CHECK:             %[[VAL_103:.*]] = arith.addi %[[VAL_35]], %[[VAL_5]] : index
1391// CHECK:             %[[VAL_104:.*]] = arith.select %[[VAL_102]], %[[VAL_103]], %[[VAL_35]] : index
1392// CHECK:             %[[VAL_105:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index
1393// CHECK:             %[[VAL_106:.*]] = arith.addi %[[VAL_36]], %[[VAL_5]] : index
1394// CHECK:             %[[VAL_107:.*]] = arith.select %[[VAL_105]], %[[VAL_106]], %[[VAL_36]] : index
1395// CHECK:             scf.yield %[[VAL_101]], %[[VAL_104]], %[[VAL_107]], %[[VAL_108:.*]] : index, index, index, f64
1396// CHECK:           }
1397// CHECK:           %[[VAL_109:.*]]:3 = scf.while (%[[VAL_110:.*]] = %[[VAL_111:.*]]#1, %[[VAL_112:.*]] = %[[VAL_111]]#2, %[[VAL_113:.*]] = %[[VAL_111]]#3) : (index, index, f64) -> (index, index, f64) {
1398// CHECK:             %[[VAL_114:.*]] = arith.cmpi ult, %[[VAL_110]], %[[VAL_21]] : index
1399// CHECK:             %[[VAL_115:.*]] = arith.cmpi ult, %[[VAL_112]], %[[VAL_23]] : index
1400// CHECK:             %[[VAL_116:.*]] = arith.andi %[[VAL_114]], %[[VAL_115]] : i1
1401// CHECK:             scf.condition(%[[VAL_116]]) %[[VAL_110]], %[[VAL_112]], %[[VAL_113]] : index, index, f64
1402// CHECK:           } do {
1403// CHECK:           ^bb0(%[[VAL_117:.*]]: index, %[[VAL_118:.*]]: index, %[[VAL_119:.*]]: f64):
1404// CHECK:             %[[VAL_120:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_117]]] : memref<?xindex>
1405// CHECK:             %[[VAL_121:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_118]]] : memref<?xindex>
1406// CHECK:             %[[VAL_122:.*]] = arith.cmpi ult, %[[VAL_121]], %[[VAL_120]] : index
1407// CHECK:             %[[VAL_123:.*]] = arith.select %[[VAL_122]], %[[VAL_121]], %[[VAL_120]] : index
1408// CHECK:             %[[VAL_124:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_123]] : index
1409// CHECK:             %[[VAL_125:.*]] = arith.cmpi eq, %[[VAL_121]], %[[VAL_123]] : index
1410// CHECK:             %[[VAL_126:.*]] = arith.andi %[[VAL_124]], %[[VAL_125]] : i1
1411// CHECK:             %[[VAL_127:.*]] = scf.if %[[VAL_126]] -> (f64) {
1412// CHECK:               %[[VAL_128:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_117]]] : memref<?xf64>
1413// CHECK:               %[[VAL_129:.*]] = arith.addf %[[VAL_119]], %[[VAL_128]] : f64
1414// CHECK:               %[[VAL_130:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_118]]] : memref<?xf64>
1415// CHECK:               %[[VAL_131:.*]] = arith.addf %[[VAL_129]], %[[VAL_130]] : f64
1416// CHECK:               scf.yield %[[VAL_131]] : f64
1417// CHECK:             } else {
1418// CHECK:               %[[VAL_132:.*]] = arith.cmpi eq, %[[VAL_121]], %[[VAL_123]] : index
1419// CHECK:               %[[VAL_133:.*]] = scf.if %[[VAL_132]] -> (f64) {
1420// CHECK:                 %[[VAL_134:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_118]]] : memref<?xf64>
1421// CHECK:                 %[[VAL_135:.*]] = arith.addf %[[VAL_119]], %[[VAL_134]] : f64
1422// CHECK:                 scf.yield %[[VAL_135]] : f64
1423// CHECK:               } else {
1424// CHECK:                 %[[VAL_136:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_123]] : index
1425// CHECK:                 %[[VAL_137:.*]] = scf.if %[[VAL_136]] -> (f64) {
1426// CHECK:                   %[[VAL_138:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_117]]] : memref<?xf64>
1427// CHECK:                   %[[VAL_139:.*]] = arith.addf %[[VAL_119]], %[[VAL_138]] : f64
1428// CHECK:                   scf.yield %[[VAL_139]] : f64
1429// CHECK:                 } else {
1430// CHECK:                   scf.yield %[[VAL_119]] : f64
1431// CHECK:                 }
1432// CHECK:                 scf.yield %[[VAL_140:.*]] : f64
1433// CHECK:               }
1434// CHECK:               scf.yield %[[VAL_141:.*]] : f64
1435// CHECK:             }
1436// CHECK:             %[[VAL_142:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_123]] : index
1437// CHECK:             %[[VAL_143:.*]] = arith.addi %[[VAL_117]], %[[VAL_5]] : index
1438// CHECK:             %[[VAL_144:.*]] = arith.select %[[VAL_142]], %[[VAL_143]], %[[VAL_117]] : index
1439// CHECK:             %[[VAL_145:.*]] = arith.cmpi eq, %[[VAL_121]], %[[VAL_123]] : index
1440// CHECK:             %[[VAL_146:.*]] = arith.addi %[[VAL_118]], %[[VAL_5]] : index
1441// CHECK:             %[[VAL_147:.*]] = arith.select %[[VAL_145]], %[[VAL_146]], %[[VAL_118]] : index
1442// CHECK:             scf.yield %[[VAL_144]], %[[VAL_147]], %[[VAL_148:.*]] : index, index, f64
1443// CHECK:           }
1444// CHECK:           %[[VAL_149:.*]]:3 = scf.while (%[[VAL_150:.*]] = %[[VAL_151:.*]]#0, %[[VAL_152:.*]] = %[[VAL_153:.*]]#1, %[[VAL_154:.*]] = %[[VAL_153]]#2) : (index, index, f64) -> (index, index, f64) {
1445// CHECK:             %[[VAL_155:.*]] = arith.cmpi ult, %[[VAL_150]], %[[VAL_19]] : index
1446// CHECK:             %[[VAL_156:.*]] = arith.cmpi ult, %[[VAL_152]], %[[VAL_23]] : index
1447// CHECK:             %[[VAL_157:.*]] = arith.andi %[[VAL_155]], %[[VAL_156]] : i1
1448// CHECK:             scf.condition(%[[VAL_157]]) %[[VAL_150]], %[[VAL_152]], %[[VAL_154]] : index, index, f64
1449// CHECK:           } do {
1450// CHECK:           ^bb0(%[[VAL_158:.*]]: index, %[[VAL_159:.*]]: index, %[[VAL_160:.*]]: f64):
1451// CHECK:             %[[VAL_161:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_158]]] : memref<?xindex>
1452// CHECK:             %[[VAL_162:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_159]]] : memref<?xindex>
1453// CHECK:             %[[VAL_163:.*]] = arith.cmpi ult, %[[VAL_162]], %[[VAL_161]] : index
1454// CHECK:             %[[VAL_164:.*]] = arith.select %[[VAL_163]], %[[VAL_162]], %[[VAL_161]] : index
1455// CHECK:             %[[VAL_165:.*]] = arith.cmpi eq, %[[VAL_161]], %[[VAL_164]] : index
1456// CHECK:             %[[VAL_166:.*]] = arith.cmpi eq, %[[VAL_162]], %[[VAL_164]] : index
1457// CHECK:             %[[VAL_167:.*]] = arith.andi %[[VAL_165]], %[[VAL_166]] : i1
1458// CHECK:             %[[VAL_168:.*]] = scf.if %[[VAL_167]] -> (f64) {
1459// CHECK:               %[[VAL_169:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_158]]] : memref<?xf64>
1460// CHECK:               %[[VAL_170:.*]] = arith.addf %[[VAL_160]], %[[VAL_169]] : f64
1461// CHECK:               %[[VAL_171:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_159]]] : memref<?xf64>
1462// CHECK:               %[[VAL_172:.*]] = arith.addf %[[VAL_170]], %[[VAL_171]] : f64
1463// CHECK:               scf.yield %[[VAL_172]] : f64
1464// CHECK:             } else {
1465// CHECK:               %[[VAL_173:.*]] = arith.cmpi eq, %[[VAL_162]], %[[VAL_164]] : index
1466// CHECK:               %[[VAL_174:.*]] = scf.if %[[VAL_173]] -> (f64) {
1467// CHECK:                 %[[VAL_175:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_159]]] : memref<?xf64>
1468// CHECK:                 %[[VAL_176:.*]] = arith.addf %[[VAL_160]], %[[VAL_175]] : f64
1469// CHECK:                 scf.yield %[[VAL_176]] : f64
1470// CHECK:               } else {
1471// CHECK:                 %[[VAL_177:.*]] = arith.cmpi eq, %[[VAL_161]], %[[VAL_164]] : index
1472// CHECK:                 %[[VAL_178:.*]] = scf.if %[[VAL_177]] -> (f64) {
1473// CHECK:                   %[[VAL_179:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_158]]] : memref<?xf64>
1474// CHECK:                   %[[VAL_180:.*]] = arith.addf %[[VAL_160]], %[[VAL_179]] : f64
1475// CHECK:                   scf.yield %[[VAL_180]] : f64
1476// CHECK:                 } else {
1477// CHECK:                   scf.yield %[[VAL_160]] : f64
1478// CHECK:                 }
1479// CHECK:                 scf.yield %[[VAL_181:.*]] : f64
1480// CHECK:               }
1481// CHECK:               scf.yield %[[VAL_182:.*]] : f64
1482// CHECK:             }
1483// CHECK:             %[[VAL_183:.*]] = arith.cmpi eq, %[[VAL_161]], %[[VAL_164]] : index
1484// CHECK:             %[[VAL_184:.*]] = arith.addi %[[VAL_158]], %[[VAL_5]] : index
1485// CHECK:             %[[VAL_185:.*]] = arith.select %[[VAL_183]], %[[VAL_184]], %[[VAL_158]] : index
1486// CHECK:             %[[VAL_186:.*]] = arith.cmpi eq, %[[VAL_162]], %[[VAL_164]] : index
1487// CHECK:             %[[VAL_187:.*]] = arith.addi %[[VAL_159]], %[[VAL_5]] : index
1488// CHECK:             %[[VAL_188:.*]] = arith.select %[[VAL_186]], %[[VAL_187]], %[[VAL_159]] : index
1489// CHECK:             scf.yield %[[VAL_185]], %[[VAL_188]], %[[VAL_189:.*]] : index, index, f64
1490// CHECK:           }
1491// CHECK:           %[[VAL_190:.*]] = scf.for %[[VAL_191:.*]] = %[[VAL_192:.*]]#1 to %[[VAL_23]] step %[[VAL_5]] iter_args(%[[VAL_193:.*]] = %[[VAL_192]]#2) -> (f64) {
1492// CHECK:             %[[VAL_194:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_191]]] : memref<?xf64>
1493// CHECK:             %[[VAL_195:.*]] = arith.addf %[[VAL_193]], %[[VAL_194]] : f64
1494// CHECK:             scf.yield %[[VAL_195]] : f64
1495// CHECK:           }
1496// CHECK:           %[[VAL_196:.*]]:3 = scf.while (%[[VAL_197:.*]] = %[[VAL_198:.*]]#0, %[[VAL_199:.*]] = %[[VAL_200:.*]]#0, %[[VAL_201:.*]] = %[[VAL_202:.*]]) : (index, index, f64) -> (index, index, f64) {
1497// CHECK:             %[[VAL_203:.*]] = arith.cmpi ult, %[[VAL_197]], %[[VAL_19]] : index
1498// CHECK:             %[[VAL_204:.*]] = arith.cmpi ult, %[[VAL_199]], %[[VAL_21]] : index
1499// CHECK:             %[[VAL_205:.*]] = arith.andi %[[VAL_203]], %[[VAL_204]] : i1
1500// CHECK:             scf.condition(%[[VAL_205]]) %[[VAL_197]], %[[VAL_199]], %[[VAL_201]] : index, index, f64
1501// CHECK:           } do {
1502// CHECK:           ^bb0(%[[VAL_206:.*]]: index, %[[VAL_207:.*]]: index, %[[VAL_208:.*]]: f64):
1503// CHECK:             %[[VAL_209:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_206]]] : memref<?xindex>
1504// CHECK:             %[[VAL_210:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_207]]] : memref<?xindex>
1505// CHECK:             %[[VAL_211:.*]] = arith.cmpi ult, %[[VAL_210]], %[[VAL_209]] : index
1506// CHECK:             %[[VAL_212:.*]] = arith.select %[[VAL_211]], %[[VAL_210]], %[[VAL_209]] : index
1507// CHECK:             %[[VAL_213:.*]] = arith.cmpi eq, %[[VAL_209]], %[[VAL_212]] : index
1508// CHECK:             %[[VAL_214:.*]] = arith.cmpi eq, %[[VAL_210]], %[[VAL_212]] : index
1509// CHECK:             %[[VAL_215:.*]] = arith.andi %[[VAL_213]], %[[VAL_214]] : i1
1510// CHECK:             %[[VAL_216:.*]] = scf.if %[[VAL_215]] -> (f64) {
1511// CHECK:               %[[VAL_217:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_206]]] : memref<?xf64>
1512// CHECK:               %[[VAL_218:.*]] = arith.addf %[[VAL_208]], %[[VAL_217]] : f64
1513// CHECK:               %[[VAL_219:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_207]]] : memref<?xf64>
1514// CHECK:               %[[VAL_220:.*]] = arith.addf %[[VAL_218]], %[[VAL_219]] : f64
1515// CHECK:               scf.yield %[[VAL_220]] : f64
1516// CHECK:             } else {
1517// CHECK:               %[[VAL_221:.*]] = arith.cmpi eq, %[[VAL_210]], %[[VAL_212]] : index
1518// CHECK:               %[[VAL_222:.*]] = scf.if %[[VAL_221]] -> (f64) {
1519// CHECK:                 %[[VAL_223:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_207]]] : memref<?xf64>
1520// CHECK:                 %[[VAL_224:.*]] = arith.addf %[[VAL_208]], %[[VAL_223]] : f64
1521// CHECK:                 scf.yield %[[VAL_224]] : f64
1522// CHECK:               } else {
1523// CHECK:                 %[[VAL_225:.*]] = arith.cmpi eq, %[[VAL_209]], %[[VAL_212]] : index
1524// CHECK:                 %[[VAL_226:.*]] = scf.if %[[VAL_225]] -> (f64) {
1525// CHECK:                   %[[VAL_227:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_206]]] : memref<?xf64>
1526// CHECK:                   %[[VAL_228:.*]] = arith.addf %[[VAL_208]], %[[VAL_227]] : f64
1527// CHECK:                   scf.yield %[[VAL_228]] : f64
1528// CHECK:                 } else {
1529// CHECK:                   scf.yield %[[VAL_208]] : f64
1530// CHECK:                 }
1531// CHECK:                 scf.yield %[[VAL_229:.*]] : f64
1532// CHECK:               }
1533// CHECK:               scf.yield %[[VAL_230:.*]] : f64
1534// CHECK:             }
1535// CHECK:             %[[VAL_231:.*]] = arith.cmpi eq, %[[VAL_209]], %[[VAL_212]] : index
1536// CHECK:             %[[VAL_232:.*]] = arith.addi %[[VAL_206]], %[[VAL_5]] : index
1537// CHECK:             %[[VAL_233:.*]] = arith.select %[[VAL_231]], %[[VAL_232]], %[[VAL_206]] : index
1538// CHECK:             %[[VAL_234:.*]] = arith.cmpi eq, %[[VAL_210]], %[[VAL_212]] : index
1539// CHECK:             %[[VAL_235:.*]] = arith.addi %[[VAL_207]], %[[VAL_5]] : index
1540// CHECK:             %[[VAL_236:.*]] = arith.select %[[VAL_234]], %[[VAL_235]], %[[VAL_207]] : index
1541// CHECK:             scf.yield %[[VAL_233]], %[[VAL_236]], %[[VAL_237:.*]] : index, index, f64
1542// CHECK:           }
1543// CHECK:           %[[VAL_238:.*]] = scf.for %[[VAL_239:.*]] = %[[VAL_240:.*]]#1 to %[[VAL_21]] step %[[VAL_5]] iter_args(%[[VAL_241:.*]] = %[[VAL_240]]#2) -> (f64) {
1544// CHECK:             %[[VAL_242:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_239]]] : memref<?xf64>
1545// CHECK:             %[[VAL_243:.*]] = arith.addf %[[VAL_241]], %[[VAL_242]] : f64
1546// CHECK:             scf.yield %[[VAL_243]] : f64
1547// CHECK:           }
1548// CHECK:           %[[VAL_244:.*]] = scf.for %[[VAL_245:.*]] = %[[VAL_246:.*]]#0 to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_247:.*]] = %[[VAL_248:.*]]) -> (f64) {
1549// CHECK:             %[[VAL_249:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_245]]] : memref<?xf64>
1550// CHECK:             %[[VAL_250:.*]] = arith.addf %[[VAL_247]], %[[VAL_249]] : f64
1551// CHECK:             scf.yield %[[VAL_250]] : f64
1552// CHECK:           }
1553// CHECK:           memref.store %[[VAL_251:.*]], %[[VAL_15]][] : memref<f64>
1554// CHECK:           %[[VAL_252:.*]] = bufferization.to_tensor %[[VAL_15]] : memref<f64>
1555// CHECK:           return %[[VAL_252]] : tensor<f64>
1556// CHECK:         }
1557func.func @red3s(%arga: tensor<?xf64, #SV>,
1558            %argb: tensor<?xf64, #SV>,
1559	    %argc: tensor<?xf64, #SV>, %argx: tensor<f64>) ->tensor<f64>{
1560 %0 = linalg.generic #trait_red3s
1561   ins(%arga, %argb, %argc: tensor<?xf64, #SV>, tensor<?xf64, #SV>, tensor<?xf64, #SV>)
1562   outs(%argx: tensor<f64>) {
1563     ^bb(%a: f64,%b: f64,%c: f64,%x: f64):
1564        %0 = arith.addf %x, %a : f64
1565        %1 = arith.addf %0, %b : f64
1566        %2 = arith.addf %1, %c : f64
1567      linalg.yield %2 : f64
1568    } -> tensor<f64>
1569  return %0 : tensor<f64>
1570}
1571