1# RUN: %PYTHON %s 2>&1 | FileCheck %s
2
3import ctypes
4import sys
5from mlir.ir import *
6from mlir.dialects import builtin
7from mlir.dialects import func
8from mlir.dialects import linalg
9from mlir.passmanager import *
10from mlir.execution_engine import *
11
12from mlir.dialects.linalg.opdsl.lang import *
13
14
15# Log everything to stderr and flush so that we have a unified stream to match
16# errors/info emitted by MLIR to stderr.
17def log(*args):
18  print(*args, file=sys.stderr)
19  sys.stderr.flush()
20
21
22elemwise_boiler = """
23func.func @main() -> f32 attributes {llvm.emit_c_interface} {
24  %v0 = arith.constant 0.0 : f32
25  %v1 = arith.constant 1.0 : f32
26  %v2 = arith.constant 2.0 : f32
27
28  %lhs = memref.alloc() : memref<f32>
29  %rhs = memref.alloc() : memref<4x8xf32>
30  %O0 = memref.alloc() : memref<4x8xf32>
31  %O1 = memref.alloc() : memref<4x8xf32>
32  linalg.fill ins(%v1 : f32) outs(%lhs : memref<f32>)
33  linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>)
34  linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>)
35  linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>)
36
37  call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) :
38    (memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
39  call @elemwise_log_mul_on_buffers(%lhs, %rhs, %O1) :
40    (memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
41
42  %c0 = arith.constant 0 : index
43  %res0 = memref.load %O0[%c0, %c0] : memref<4x8xf32>
44  %res1 = memref.load %O1[%c0, %c0] : memref<4x8xf32>
45
46  %0 = arith.addf %res0, %res1 : f32
47
48  // TODO: FFI-based solution to allow testing and printing with python code.
49  return %0 : f32
50}
51"""
52
53matmul_boiler = """
54func.func @main() -> f32 attributes {llvm.emit_c_interface} {
55  %v0 = arith.constant 0.0 : f32
56  %v1 = arith.constant -1 : i8
57  %v2 = arith.constant 2.0 : f32
58
59  %A = memref.alloc() : memref<4x16xi8>
60  %B = memref.alloc() : memref<16x8xf32>
61  %C0 = memref.alloc() : memref<4x8xf32>
62  %C1 = memref.alloc() : memref<4x8xf32>
63  linalg.fill ins(%v1 : i8) outs(%A : memref<4x16xi8>)
64  linalg.fill ins(%v2 : f32) outs(%B : memref<16x8xf32>)
65  linalg.fill ins(%v0 : f32) outs(%C0 : memref<4x8xf32>)
66  linalg.fill ins(%v0 : f32) outs(%C1 : memref<4x8xf32>)
67
68  call @matmul_signed_on_buffers(%A, %B, %C0) :
69    (memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> ()
70  call @matmul_unsigned_on_buffers(%A, %B, %C1) :
71    (memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> ()
72
73  %c0 = arith.constant 0 : index
74  %res0 = memref.load %C0[%c0, %c0] : memref<4x8xf32>
75  %res1 = memref.load %C1[%c0, %c0] : memref<4x8xf32>
76
77  %0 = arith.addf %res0, %res1 : f32
78
79  // TODO: FFI-based solution to allow testing and printing with python code.
80  return %0 : f32
81}
82"""
83
84fill_boiler = """
85func.func @main() -> i32 attributes {llvm.emit_c_interface} {
86  %O0 = memref.alloc() : memref<i32>
87  %O1 = memref.alloc() : memref<16xi32>
88  %O2 = memref.alloc() : memref<4x16xi32>
89
90  %val0 = arith.constant 1.0 : f32
91  %val1 = arith.constant 2.0 : f32
92  %val2 = arith.constant 3.0 : f32
93
94  call @fill_0d_on_buffers(%val0, %O0) : (f32, memref<i32>) -> ()
95  call @fill_1d_on_buffers(%val1, %O1) : (f32, memref<16xi32>) -> ()
96  call @fill_2d_on_buffers(%val2, %O2) : (f32, memref<4x16xi32>) -> ()
97
98  %c0 = arith.constant 0 : index
99  %res0 = memref.load %O0[] : memref<i32>
100  %c8 = arith.constant 8 : index
101  %res1 = memref.load %O1[%c8] : memref<16xi32>
102  %c2 = arith.constant 2 : index
103  %res2 = memref.load %O2[%c2, %c8] : memref<4x16xi32>
104
105  %0 = arith.addi %res0, %res1 : i32
106  %1 = arith.addi %0, %res2 : i32
107
108  // TODO: FFI-based solution to allow testing and printing with python code.
109  return %1 : i32
110}
111"""
112
113fill_rng_boiler = """
114func.func @main() -> i32 attributes {llvm.emit_c_interface} {
115  %O = memref.alloc() : memref<4x16xi32>
116  %min = arith.constant -1000.0 : f64
117  %max = arith.constant 1000.0 : f64
118  %seed = arith.constant 42 : i32
119
120  call @fill_rng_on_buffers(%min, %max, %seed, %O) :
121    (f64, f64, i32, memref<4x16xi32>) -> ()
122
123  %c0 = arith.constant 0 : index
124  %0 = memref.load %O[%c0, %c0] : memref<4x16xi32>
125
126  // TODO: FFI-based solution to allow testing and printing with python code.
127  return %0 : i32
128}
129"""
130
131conv_boiler = """
132func.func @main() -> i32 attributes {llvm.emit_c_interface} {
133  %v0 = arith.constant 0 : i32
134  %v1 = arith.constant 1.0 : f64
135  %v2 = arith.constant 2.0 : f64
136
137  %input = memref.alloc() : memref<1x4x16x1xf64>
138  %filter = memref.alloc() : memref<2x2x1xf64>
139  %output = memref.alloc() : memref<1x2x4x1xi32>
140  linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
141  linalg.fill ins(%v2 : f64) outs(%filter : memref<2x2x1xf64>)
142  linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
143
144  call @conv_on_buffers(%input, %filter, %output) :
145    (memref<1x4x16x1xf64>, memref<2x2x1xf64>, memref<1x2x4x1xi32>) -> ()
146
147  %c0 = arith.constant 0 : index
148  %0 = memref.load %output[%c0, %c0, %c0, %c0] : memref<1x2x4x1xi32>
149
150  // TODO: FFI-based solution to allow testing and printing with python code.
151  return %0 : i32
152}
153"""
154
155pooling_boiler = """
156func.func @main() -> i32 attributes {llvm.emit_c_interface} {
157  %v0 = arith.constant 0 : i32
158  %v42 = arith.constant 42.0 : f64
159  %v77 = arith.constant 77.0 : f64
160  %v-13 = arith.constant -13.0 : f64
161  %v1 = arith.constant 1.0 : f64
162
163  %input = memref.alloc() : memref<1x4x16x1xf64>
164  %shape = memref.alloc() : memref<2x2xf64>
165  %output = memref.alloc() : memref<1x2x4x1xi32>
166  linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
167  linalg.fill ins(%v1 : f64) outs(%shape : memref<2x2xf64>)
168  linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
169
170  %c0 = arith.constant 0 : index
171  %c1 = arith.constant 1 : index
172  %c2 = arith.constant 2 : index
173  memref.store %v42, %input[%c0, %c0, %c0, %c0] : memref<1x4x16x1xf64>
174  memref.store %v77, %input[%c0, %c0, %c1, %c0] : memref<1x4x16x1xf64>
175  memref.store %v-13, %input[%c0, %c1, %c0, %c0] : memref<1x4x16x1xf64>
176
177  call @pooling_on_buffers(%input, %shape, %output) :
178    (memref<1x4x16x1xf64>, memref<2x2xf64>, memref<1x2x4x1xi32>) -> ()
179
180  %0 = memref.load %output[%c0, %c0, %c0, %c0] : memref<1x2x4x1xi32>
181
182  // TODO: FFI-based solution to allow testing and printing with python code.
183  return %0 : i32
184}
185"""
186
187
188def transform(module, boilerplate):
189  # TODO: Allow cloning functions from one module to another.
190  # Atm we have to resort to string concatenation.
191  ops = module.operation.regions[0].blocks[0].operations
192  mod = Module.parse("\n".join([str(op) for op in ops]) + boilerplate)
193
194  pm = PassManager.parse(
195      "func.func(convert-linalg-to-loops, lower-affine, " +
196      "convert-math-to-llvm, convert-scf-to-cf, arith-expand, memref-expand), "
197      + "convert-vector-to-llvm, convert-memref-to-llvm, convert-func-to-llvm," +
198      "reconcile-unrealized-casts")
199  pm.run(mod)
200  return mod
201
202
203def test_elemwise_builtin():
204  with Context() as ctx, Location.unknown():
205    module = Module.create()
206    f32 = F32Type.get()
207    i8 = IntegerType.get_signless(8)
208    with InsertionPoint(module.body):
209
210      @func.FuncOp.from_py_func(
211          MemRefType.get((), f32), MemRefType.get((4, 8), f32),
212          MemRefType.get((4, 8), f32))
213      def elemwise_exp_add_on_buffers(lhs, rhs, out):
214        linalg.elemwise_unary(lhs, outs=[out])
215        linalg.elemwise_binary(out, rhs, outs=[out])
216
217      @func.FuncOp.from_py_func(
218          MemRefType.get((), f32), MemRefType.get((4, 8), f32),
219          MemRefType.get((4, 8), f32))
220      def elemwise_log_mul_on_buffers(lhs, rhs, out):
221        linalg.elemwise_unary(lhs, outs=[out], fun=UnaryFn.log)
222        linalg.elemwise_binary(out, rhs, outs=[out], fun=BinaryFn.mul)
223
224    execution_engine = ExecutionEngine(transform(module, elemwise_boiler))
225
226    # TODO: FFI-based solution to allow testing and printing with python code.
227    # Prepare arguments: one result f32.
228    # Arguments must be passed as pointers.
229    c_float_p = ctypes.c_float * 1
230    res = c_float_p(-1.)
231    execution_engine.invoke("main", res)
232
233    log("RESULT: ", res[0])
234    # elemwise_exp_add_on_buffers: exp(1.0) + 2.0 = 4.71828182846
235    # elemwise_log_mul_on_buffers: log(1.0) * 2.0 = 0.0
236    # CHECK: RESULT: 4.71828
237
238
239test_elemwise_builtin()
240
241
242def test_elemwise_generic():
243  with Context() as ctx, Location.unknown():
244    module = Module.create()
245    f32 = F32Type.get()
246    i8 = IntegerType.get_signless(8)
247    with InsertionPoint(module.body):
248
249      @func.FuncOp.from_py_func(
250          MemRefType.get((), f32), MemRefType.get((4, 8), f32),
251          MemRefType.get((4, 8), f32))
252      def elemwise_exp_add_on_buffers(lhs, rhs, out):
253        linalg.elemwise_unary(lhs, outs=[out], emit_generic=True)
254        linalg.elemwise_binary(out, rhs, outs=[out], emit_generic=True)
255
256      @func.FuncOp.from_py_func(
257          MemRefType.get((), f32), MemRefType.get((4, 8), f32),
258          MemRefType.get((4, 8), f32))
259      def elemwise_log_mul_on_buffers(lhs, rhs, out):
260        linalg.elemwise_unary(
261            lhs, outs=[out], fun=UnaryFn.log, emit_generic=True)
262        linalg.elemwise_binary(
263            out, rhs, outs=[out], fun=BinaryFn.mul, emit_generic=True)
264
265    execution_engine = ExecutionEngine(transform(module, elemwise_boiler))
266
267    # TODO: FFI-based solution to allow testing and printing with python code.
268    # Prepare arguments: one result f32.
269    # Arguments must be passed as pointers.
270    c_float_p = ctypes.c_float * 1
271    res = c_float_p(-1.)
272    execution_engine.invoke("main", res)
273
274    log("RESULT: ", res[0])
275    # elemwise_exp_add_on_buffers: exp(1.0) + 2.0 = 4.71828182846
276    # elemwise_log_mul_on_buffers: log(1.0) * 2.0 = 0.0
277    # CHECK: RESULT: 4.71828
278
279
280test_elemwise_generic()
281
282
283def test_matmul_builtin():
284  with Context() as ctx, Location.unknown():
285    module = Module.create()
286    f32 = F32Type.get()
287    i8 = IntegerType.get_signless(8)
288    with InsertionPoint(module.body):
289
290      @func.FuncOp.from_py_func(
291          MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
292          MemRefType.get((4, 8), f32))
293      def matmul_signed_on_buffers(lhs, rhs, out):
294        linalg.matmul(lhs, rhs, outs=[out])
295
296      @func.FuncOp.from_py_func(
297          MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
298          MemRefType.get((4, 8), f32))
299      def matmul_unsigned_on_buffers(lhs, rhs, out):
300        linalg.matmul(lhs, rhs, outs=[out], cast=TypeFn.cast_unsigned)
301
302    execution_engine = ExecutionEngine(transform(module, matmul_boiler))
303
304    # TODO: FFI-based solution to allow testing and printing with python code.
305    # Prepare arguments: one result f32.
306    # Arguments must be passed as pointers.
307    c_float_p = ctypes.c_float * 1
308    res = c_float_p(-1.)
309    execution_engine.invoke("main", res)
310
311    log("RESULT: ", res[0])
312    # matmul_signed_on_buffers: -1 * 2.0 * 16 = -32
313    # matmul_unsigned_on_buffers: (2^8-1) * 2.0 * 16 = 8160
314    # CHECK: RESULT: 8128
315
316
317test_matmul_builtin()
318
319
320def test_matmul_generic():
321  with Context() as ctx, Location.unknown():
322    module = Module.create()
323    f32 = F32Type.get()
324    i8 = IntegerType.get_signless(8)
325    with InsertionPoint(module.body):
326
327      @func.FuncOp.from_py_func(
328          MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
329          MemRefType.get((4, 8), f32))
330      def matmul_signed_on_buffers(lhs, rhs, out):
331        linalg.matmul(lhs, rhs, outs=[out], emit_generic=True)
332
333      @func.FuncOp.from_py_func(
334          MemRefType.get((4, 16), i8), MemRefType.get((16, 8), f32),
335          MemRefType.get((4, 8), f32))
336      def matmul_unsigned_on_buffers(lhs, rhs, out):
337        linalg.matmul(
338            lhs, rhs, outs=[out], cast=TypeFn.cast_unsigned, emit_generic=True)
339
340    execution_engine = ExecutionEngine(transform(module, matmul_boiler))
341
342    # TODO: FFI-based solution to allow testing and printing with python code.
343    # Prepare arguments: one result f32.
344    # Arguments must be passed as pointers.
345    c_float_p = ctypes.c_float * 1
346    res = c_float_p(-1.)
347    execution_engine.invoke("main", res)
348
349    log("RESULT: ", res[0])
350    # matmul_signed_on_buffers = -1 * 2.0 * 16 = -32
351    # matmul_unsigned_on_buffers = (2^8-1) * 2.0 * 16 = 8160
352    # CHECK: RESULT: 8128
353
354
355test_matmul_generic()
356
357
358def test_fill_builtin():
359  with Context() as ctx, Location.unknown():
360    module = Module.create()
361    f32 = F32Type.get()
362    i32 = IntegerType.get_signless(32)
363    with InsertionPoint(module.body):
364
365      @func.FuncOp.from_py_func(f32, MemRefType.get([], i32))
366      def fill_0d_on_buffers(value, out):
367        linalg.fill(value, outs=[out])
368
369      @func.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
370      def fill_1d_on_buffers(value, out):
371        linalg.fill(value, outs=[out])
372
373      @func.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
374      def fill_2d_on_buffers(value, out):
375        linalg.fill(value, outs=[out])
376
377    execution_engine = ExecutionEngine(transform(module, fill_boiler))
378
379    # TODO: FFI-based solution to allow testing and printing with python code.
380    # Prepare arguments: one result i32.
381    # Arguments must be passed as pointers.
382    c_int_p = ctypes.c_int * 1
383    res = c_int_p(-1)
384    execution_engine.invoke("main", res)
385
386    log("RESULT: ", res[0])
387    # CHECK: RESULT: 6
388
389
390test_fill_builtin()
391
392
393def test_fill_generic():
394  with Context() as ctx, Location.unknown():
395    module = Module.create()
396    f32 = F32Type.get()
397    i32 = IntegerType.get_signless(32)
398    with InsertionPoint(module.body):
399
400      @func.FuncOp.from_py_func(f32, MemRefType.get([], i32))
401      def fill_0d_on_buffers(value, out):
402        linalg.fill(value, outs=[out], emit_generic=True)
403
404      @func.FuncOp.from_py_func(f32, MemRefType.get([16], i32))
405      def fill_1d_on_buffers(value, out):
406        linalg.fill(value, outs=[out], emit_generic=True)
407
408      @func.FuncOp.from_py_func(f32, MemRefType.get([4, 16], i32))
409      def fill_2d_on_buffers(value, out):
410        linalg.fill(value, outs=[out], emit_generic=True)
411
412    execution_engine = ExecutionEngine(transform(module, fill_boiler))
413
414    # TODO: FFI-based solution to allow testing and printing with python code.
415    # Prepare arguments: one result i32.
416    # Arguments must be passed as pointers.
417    c_int_p = ctypes.c_int * 1
418    res = c_int_p(-1)
419    execution_engine.invoke("main", res)
420
421    log("RESULT: ", res[0])
422    # CHECK: RESULT: 6
423
424
425test_fill_generic()
426
427
428def test_fill_rng_builtin():
429  with Context() as ctx, Location.unknown():
430    module = Module.create()
431    f64 = F64Type.get()
432    i32 = IntegerType.get_signless(32)
433    with InsertionPoint(module.body):
434
435      @func.FuncOp.from_py_func(f64, f64, i32, MemRefType.get((4, 16), i32))
436      def fill_rng_on_buffers(min, max, seed, out):
437        linalg.fill_rng_2d(min, max, seed, outs=[out])
438
439    execution_engine = ExecutionEngine(transform(module, fill_rng_boiler))
440
441    # TODO: FFI-based solution to allow testing and printing with python code.
442    # Prepare arguments: one result i32.
443    # Arguments must be passed as pointers.
444    c_int_p = ctypes.c_int * 1
445    res = c_int_p(-1)
446    execution_engine.invoke("main", res)
447
448    log("RESULT: ", res[0])
449    # CHECK: RESULT: -480
450
451
452test_fill_rng_builtin()
453
454
455def test_fill_rng_generic():
456  with Context() as ctx, Location.unknown():
457    module = Module.create()
458    f64 = F64Type.get()
459    i32 = IntegerType.get_signless(32)
460    with InsertionPoint(module.body):
461
462      @func.FuncOp.from_py_func(f64, f64, i32, MemRefType.get((4, 16), i32))
463      def fill_rng_on_buffers(min, max, seed, out):
464        linalg.fill_rng_2d(min, max, seed, outs=[out], emit_generic=True)
465
466    execution_engine = ExecutionEngine(transform(module, fill_rng_boiler))
467
468    # TODO: FFI-based solution to allow testing and printing with python code.
469    # Prepare arguments: one result i32.
470    # Arguments must be passed as pointers.
471    c_int_p = ctypes.c_int * 1
472    res = c_int_p(-1)
473    execution_engine.invoke("main", res)
474
475    log("RESULT: ", res[0])
476    # CHECK: RESULT: -480
477
478
479test_fill_rng_generic()
480
481
482def test_max_pooling_builtin():
483  with Context() as ctx, Location.unknown():
484    module = Module.create()
485    f64 = F64Type.get()
486    i32 = IntegerType.get_signless(32)
487    with InsertionPoint(module.body):
488
489      @func.FuncOp.from_py_func(
490          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
491          MemRefType.get((1, 2, 4, 1), i32))
492      def pooling_on_buffers(input, shape, output):
493        linalg.pooling_nhwc_max(
494            input, shape, outs=[output], strides=[2, 4], dilations=[1, 2])
495
496    execution_engine = ExecutionEngine(transform(module, pooling_boiler))
497
498    # TODO: FFI-based solution to allow testing and printing with python code.
499    # Prepare arguments: one result i32.
500    # Arguments must be passed as pointers.
501    c_int_p = ctypes.c_int * 1
502    res = c_int_p(-1)
503    execution_engine.invoke("main", res)
504
505    log("RESULT: ", res[0])
506    # 77 is not selected due to the dilation 2 in the second dimension.
507    # CHECK: RESULT: 42
508
509
510test_max_pooling_builtin()
511
512
513def test_max_pooling_generic():
514  with Context() as ctx, Location.unknown():
515    module = Module.create()
516    f64 = F64Type.get()
517    i32 = IntegerType.get_signless(32)
518    with InsertionPoint(module.body):
519
520      @func.FuncOp.from_py_func(
521          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
522          MemRefType.get((1, 2, 4, 1), i32))
523      def pooling_on_buffers(input, shape, output):
524        linalg.pooling_nhwc_max(
525            input,
526            shape,
527            outs=[output],
528            strides=[2, 4],
529            dilations=[1, 2],
530            emit_generic=True)
531
532    execution_engine = ExecutionEngine(transform(module, pooling_boiler))
533
534    # TODO: FFI-based solution to allow testing and printing with python code.
535    # Prepare arguments: one result i32.
536    # Arguments must be passed as pointers.
537    c_int_p = ctypes.c_int * 1
538    res = c_int_p(-1)
539    execution_engine.invoke("main", res)
540
541    log("RESULT: ", res[0])
542    # 77 is not selected due to the dilation 2 in the second dimension.
543    # CHECK: RESULT: 42
544
545
546test_max_pooling_generic()
547
548
549def test_min_pooling_builtin():
550  with Context() as ctx, Location.unknown():
551    module = Module.create()
552    f64 = F64Type.get()
553    i32 = IntegerType.get_signless(32)
554    with InsertionPoint(module.body):
555
556      @func.FuncOp.from_py_func(
557          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
558          MemRefType.get((1, 2, 4, 1), i32))
559      # Set the strides and use the default dilations.
560      def pooling_on_buffers(input, shape, output):
561        linalg.pooling_nhwc_min(input, shape, outs=[output], strides=[2, 4])
562
563    execution_engine = ExecutionEngine(transform(module, pooling_boiler))
564
565    # TODO: FFI-based solution to allow testing and printing with python code.
566    # Prepare arguments: one result i32.
567    # Arguments must be passed as pointers.
568    c_int_p = ctypes.c_int * 1
569    res = c_int_p(-1)
570    execution_engine.invoke("main", res)
571
572    log("RESULT: ", res[0])
573    # CHECK: RESULT: -13
574
575
576test_min_pooling_builtin()
577
578
579def test_min_pooling_generic():
580  with Context() as ctx, Location.unknown():
581    module = Module.create()
582    f64 = F64Type.get()
583    i32 = IntegerType.get_signless(32)
584    with InsertionPoint(module.body):
585
586      @func.FuncOp.from_py_func(
587          MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64),
588          MemRefType.get((1, 2, 4, 1), i32))
589      # Set the strides and use the default dilations.
590      def pooling_on_buffers(input, shape, output):
591        linalg.pooling_nhwc_min(
592            input, shape, outs=[output], strides=[2, 4], emit_generic=True)
593
594    execution_engine = ExecutionEngine(transform(module, pooling_boiler))
595
596    # TODO: FFI-based solution to allow testing and printing with python code.
597    # Prepare arguments: one result i32.
598    # Arguments must be passed as pointers.
599    c_int_p = ctypes.c_int * 1
600    res = c_int_p(-1)
601    execution_engine.invoke("main", res)
602
603    log("RESULT: ", res[0])
604    # CHECK: RESULT: -13
605
606
607test_min_pooling_generic()
608