1d31c9e5aSMehdi Amini// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s
2eab4a199SAlex Zinenko
3eab4a199SAlex Zinenko// 2-d parallel loop mapped to block.y and block.x
4eab4a199SAlex Zinenko
53028bf74SRiver Riddlefunc.func @parallel_loop_bidy_bidx(%arg0 : index, %arg1 : index, %arg2 : index,
6eab4a199SAlex Zinenko                              %arg3 : index, %arg4 : index,
7eab4a199SAlex Zinenko                              %buf : memref<?x?xf32>,
8eab4a199SAlex Zinenko                              %res : memref<?x?xf32>) {
9a54f4eaeSMogball  %step = arith.constant 2 : index
10eab4a199SAlex Zinenko  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
11eab4a199SAlex Zinenko                                          step (%arg4, %step)  {
12e2310704SJulian Gross    %val = memref.load %buf[%i0, %i1] : memref<?x?xf32>
13e2310704SJulian Gross    memref.store %val, %res[%i1, %i0] : memref<?x?xf32>
14*7bdd3722SMogball  } { mapping = [#gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>] }
15eab4a199SAlex Zinenko  return
16eab4a199SAlex Zinenko}
17eab4a199SAlex Zinenko
1872d5ac90STres Popp// CHECK:       #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
1995371ce9SMehdi Amini// CHECK:       #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
20eab4a199SAlex Zinenko
21eab4a199SAlex Zinenko// CHECK:       module {
22eab4a199SAlex Zinenko// CHECK-LABEL:   func @parallel_loop_bidy_bidx(
23eab4a199SAlex Zinenko// CHECK-SAME:                                  [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: index, [[VAL_3:%.*]]: index, [[VAL_4:%.*]]: index, [[VAL_5:%.*]]: memref<?x?xf32>, [[VAL_6:%.*]]: memref<?x?xf32>) {
24a54f4eaeSMogball// CHECK:           [[VAL_7:%.*]] = arith.constant 2 : index
25a54f4eaeSMogball// CHECK:           [[VAL_8:%.*]] = arith.constant 1 : index
2672d5ac90STres Popp// CHECK:           [[VAL_9:%.*]] = affine.apply #[[$MAP0]]([[VAL_2]]){{\[}}[[VAL_0]], [[VAL_4]]]
2772d5ac90STres Popp// CHECK:           [[VAL_10:%.*]] = affine.apply #[[$MAP0]]([[VAL_3]]){{\[}}[[VAL_1]], [[VAL_7]]]
28eab4a199SAlex Zinenko// CHECK:           gpu.launch blocks([[VAL_11:%.*]], [[VAL_12:%.*]], [[VAL_13:%.*]]) in ([[VAL_14:%.*]] = [[VAL_10]], [[VAL_15:%.*]] = [[VAL_9]], [[VAL_16:%.*]] = [[VAL_8]]) threads([[VAL_17:%.*]], [[VAL_18:%.*]], [[VAL_19:%.*]]) in ([[VAL_20:%.*]] = [[VAL_8]], [[VAL_21:%.*]] = [[VAL_8]], [[VAL_22:%.*]] = [[VAL_8]]) {
2995371ce9SMehdi Amini// CHECK:             [[VAL_23:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}[[VAL_4]], [[VAL_0]]]
3095371ce9SMehdi Amini// CHECK:             [[VAL_24:%.*]] = affine.apply #[[$MAP1]]([[VAL_11]]){{\[}}[[VAL_7]], [[VAL_1]]]
31e2310704SJulian Gross// CHECK:             [[VAL_25:%.*]] = memref.load [[VAL_5]]{{\[}}[[VAL_23]], [[VAL_24]]] : memref<?x?xf32>
32e2310704SJulian Gross// CHECK:             memref.store [[VAL_25]], [[VAL_6]]{{\[}}[[VAL_24]], [[VAL_23]]] : memref<?x?xf32>
33eab4a199SAlex Zinenko// CHECK:             gpu.terminator
34eab4a199SAlex Zinenko// CHECK:           }
35eab4a199SAlex Zinenko// CHECK:           return
36eab4a199SAlex Zinenko// CHECK:         }
37eab4a199SAlex Zinenko// CHECK:       }
38eab4a199SAlex Zinenko
39eab4a199SAlex Zinenko// -----
40eab4a199SAlex Zinenko
41eab4a199SAlex Zinenko// tiled 2-d parallel loop mapped to block.y and block.x and thread.y and thread.x.
42eab4a199SAlex Zinenko
433028bf74SRiver Riddlefunc.func @parallel_loop_tiled(%arg0 : index, %arg1 : index, %arg2 : index,
44eab4a199SAlex Zinenko                        %arg3 : index,
45eab4a199SAlex Zinenko                        %buf : memref<?x?xf32>,
46eab4a199SAlex Zinenko                        %res : memref<?x?xf32>) {
47a54f4eaeSMogball  %zero = arith.constant 0 : index
48a54f4eaeSMogball  %one = arith.constant 1 : index
49a54f4eaeSMogball  %four = arith.constant 4 : index
50eab4a199SAlex Zinenko  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
51eab4a199SAlex Zinenko                                          step (%four, %four)  {
52eab4a199SAlex Zinenko    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
53eab4a199SAlex Zinenko                                            step (%one, %one)  {
54a54f4eaeSMogball      %idx0 = arith.addi %i0, %si0 : index
55a54f4eaeSMogball      %idx1 = arith.addi %i1, %si1 : index
56e2310704SJulian Gross      %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32>
57e2310704SJulian Gross      memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32>
58eab4a199SAlex Zinenko    } { mapping = [
59*7bdd3722SMogball        #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
60*7bdd3722SMogball        #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
61eab4a199SAlex Zinenko     ] }
62eab4a199SAlex Zinenko  } { mapping = [
63*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
64*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
65eab4a199SAlex Zinenko    ] }
66eab4a199SAlex Zinenko  return
67eab4a199SAlex Zinenko}
68eab4a199SAlex Zinenko
6972d5ac90STres Popp// CHECK:       #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
7095371ce9SMehdi Amini// CHECK:       #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
71eab4a199SAlex Zinenko
72eab4a199SAlex Zinenko// CHECK:       module {
73eab4a199SAlex Zinenko// CHECK-LABEL:   func @parallel_loop_tiled(
74eab4a199SAlex Zinenko// CHECK-SAME:                              [[VAL_26:%.*]]: index, [[VAL_27:%.*]]: index, [[VAL_28:%.*]]: index, [[VAL_29:%.*]]: index, [[VAL_30:%.*]]: memref<?x?xf32>, [[VAL_31:%.*]]: memref<?x?xf32>) {
75a54f4eaeSMogball// CHECK:           [[VAL_32:%.*]] = arith.constant 0 : index
76a54f4eaeSMogball// CHECK:           [[VAL_33:%.*]] = arith.constant 1 : index
77a54f4eaeSMogball// CHECK:           [[VAL_34:%.*]] = arith.constant 4 : index
78a54f4eaeSMogball// CHECK:           [[VAL_35:%.*]] = arith.constant 1 : index
7972d5ac90STres Popp// CHECK:           [[VAL_36:%.*]] = affine.apply #[[$MAP0]]([[VAL_28]]){{\[}}[[VAL_26]], [[VAL_34]]]
8072d5ac90STres Popp// CHECK:           [[VAL_37:%.*]] = affine.apply #[[$MAP0]]([[VAL_29]]){{\[}}[[VAL_27]], [[VAL_34]]]
8172d5ac90STres Popp// CHECK:           [[VAL_38:%.*]] = affine.apply #[[$MAP0]]([[VAL_34]]){{\[}}[[VAL_32]], [[VAL_33]]]
8272d5ac90STres Popp// CHECK:           [[VAL_39:%.*]] = affine.apply #[[$MAP0]]([[VAL_34]]){{\[}}[[VAL_32]], [[VAL_33]]]
83eab4a199SAlex Zinenko// CHECK:           gpu.launch blocks([[VAL_40:%.*]], [[VAL_41:%.*]], [[VAL_42:%.*]]) in ([[VAL_43:%.*]] = [[VAL_37]], [[VAL_44:%.*]] = [[VAL_36]], [[VAL_45:%.*]] = [[VAL_35]]) threads([[VAL_46:%.*]], [[VAL_47:%.*]], [[VAL_48:%.*]]) in ([[VAL_49:%.*]] = [[VAL_39]], [[VAL_50:%.*]] = [[VAL_38]], [[VAL_51:%.*]] = [[VAL_35]]) {
8495371ce9SMehdi Amini// CHECK:             [[VAL_52:%.*]] = affine.apply #[[$MAP1]]([[VAL_41]]){{\[}}[[VAL_34]], [[VAL_26]]]
8595371ce9SMehdi Amini// CHECK:             [[VAL_53:%.*]] = affine.apply #[[$MAP1]]([[VAL_40]]){{\[}}[[VAL_34]], [[VAL_27]]]
8695371ce9SMehdi Amini// CHECK:             [[VAL_54:%.*]] = affine.apply #[[$MAP1]]([[VAL_47]]){{\[}}[[VAL_33]], [[VAL_32]]]
8795371ce9SMehdi Amini// CHECK:             [[VAL_55:%.*]] = affine.apply #[[$MAP1]]([[VAL_46]]){{\[}}[[VAL_33]], [[VAL_32]]]
88a54f4eaeSMogball// CHECK:             [[VAL_56:%.*]] = arith.addi [[VAL_52]], [[VAL_54]] : index
89a54f4eaeSMogball// CHECK:             [[VAL_57:%.*]] = arith.addi [[VAL_53]], [[VAL_55]] : index
90e2310704SJulian Gross// CHECK:             [[VAL_58:%.*]] = memref.load [[VAL_30]]{{\[}}[[VAL_56]], [[VAL_57]]] : memref<?x?xf32>
91e2310704SJulian Gross// CHECK:             memref.store [[VAL_58]], [[VAL_31]]{{\[}}[[VAL_57]], [[VAL_56]]] : memref<?x?xf32>
92eab4a199SAlex Zinenko// CHECK:             gpu.terminator
93eab4a199SAlex Zinenko// CHECK:           }
94eab4a199SAlex Zinenko// CHECK:           return
95eab4a199SAlex Zinenko// CHECK:         }
96eab4a199SAlex Zinenko// CHECK:       }
97eab4a199SAlex Zinenko
98eab4a199SAlex Zinenko// -----
99eab4a199SAlex Zinenko
100eab4a199SAlex Zinenko// 2-d parallel loop mapped to block.y and sequential
101eab4a199SAlex Zinenko
1023028bf74SRiver Riddlefunc.func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index,
103eab4a199SAlex Zinenko                             %arg3 : index, %arg4 : index,
104eab4a199SAlex Zinenko                             %buf : memref<?x?xf32>,
105eab4a199SAlex Zinenko                             %res : memref<?x?xf32>) {
106a54f4eaeSMogball  %step = arith.constant 2 : index
107eab4a199SAlex Zinenko  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
108eab4a199SAlex Zinenko                                          step (%arg4, %step)  {
109e2310704SJulian Gross    %val = memref.load %buf[%i0, %i1] : memref<?x?xf32>
110e2310704SJulian Gross    memref.store %val, %res[%i1, %i0] : memref<?x?xf32>
111eab4a199SAlex Zinenko  } { mapping = [
112*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
113*7bdd3722SMogball      #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>
114eab4a199SAlex Zinenko    ] }
115eab4a199SAlex Zinenko  return
116eab4a199SAlex Zinenko}
117eab4a199SAlex Zinenko
11872d5ac90STres Popp// CHECK:       #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
11995371ce9SMehdi Amini// CHECK:       #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
120eab4a199SAlex Zinenko
121eab4a199SAlex Zinenko// CHECK:       module {
122eab4a199SAlex Zinenko// CHECK-LABEL:   func @parallel_loop_bidy_seq(
123eab4a199SAlex Zinenko// CHECK-SAME:                                 [[VAL_59:%.*]]: index, [[VAL_60:%.*]]: index, [[VAL_61:%.*]]: index, [[VAL_62:%.*]]: index, [[VAL_63:%.*]]: index, [[VAL_64:%.*]]: memref<?x?xf32>, [[VAL_65:%.*]]: memref<?x?xf32>) {
124a54f4eaeSMogball// CHECK:           [[VAL_66:%.*]] = arith.constant 2 : index
125a54f4eaeSMogball// CHECK:           [[VAL_67:%.*]] = arith.constant 1 : index
12672d5ac90STres Popp// CHECK:           [[VAL_68:%.*]] = affine.apply #[[$MAP0]]([[VAL_61]]){{\[}}[[VAL_59]], [[VAL_63]]]
127eab4a199SAlex Zinenko// CHECK:           gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) {
12895371ce9SMehdi Amini// CHECK:             [[VAL_81:%.*]] = affine.apply #[[$MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]]
129eab4a199SAlex Zinenko// CHECK:             scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] {
130e2310704SJulian Gross// CHECK:               [[VAL_83:%.*]] = memref.load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32>
131e2310704SJulian Gross// CHECK:               memref.store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32>
132eab4a199SAlex Zinenko// CHECK:             }
133eab4a199SAlex Zinenko// CHECK:             gpu.terminator
134eab4a199SAlex Zinenko// CHECK:           }
135eab4a199SAlex Zinenko// CHECK:           return
136eab4a199SAlex Zinenko// CHECK:         }
137eab4a199SAlex Zinenko// CHECK:       }
138eab4a199SAlex Zinenko
139eab4a199SAlex Zinenko// -----
140eab4a199SAlex Zinenko
141eab4a199SAlex Zinenko// tiled 2-d parallel loop mapped to block.y and seq. and thread.y and seq.
142eab4a199SAlex Zinenko
1433028bf74SRiver Riddlefunc.func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index,
144eab4a199SAlex Zinenko                              %arg3 : index,
145eab4a199SAlex Zinenko                              %buf : memref<?x?xf32>,
146eab4a199SAlex Zinenko                              %res : memref<?x?xf32>) {
147a54f4eaeSMogball  %zero = arith.constant 0 : index
148a54f4eaeSMogball  %one = arith.constant 1 : index
149a54f4eaeSMogball  %four = arith.constant 4 : index
150eab4a199SAlex Zinenko  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
151eab4a199SAlex Zinenko                                          step (%four, %four)  {
152eab4a199SAlex Zinenko    scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four)
153eab4a199SAlex Zinenko                                            step (%one, %one)  {
154a54f4eaeSMogball      %idx0 = arith.addi %i0, %si0 : index
155a54f4eaeSMogball      %idx1 = arith.addi %i1, %si1 : index
156e2310704SJulian Gross      %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32>
157e2310704SJulian Gross      memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32>
158eab4a199SAlex Zinenko    } { mapping = [
159*7bdd3722SMogball        #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
160*7bdd3722SMogball        #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>
161eab4a199SAlex Zinenko      ] }
162eab4a199SAlex Zinenko  } { mapping = [
163*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
164*7bdd3722SMogball      #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>
165eab4a199SAlex Zinenko    ] }
166eab4a199SAlex Zinenko  return
167eab4a199SAlex Zinenko}
168eab4a199SAlex Zinenko
16972d5ac90STres Popp// CHECK:       #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
17095371ce9SMehdi Amini// CHECK:       #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
171eab4a199SAlex Zinenko
172eab4a199SAlex Zinenko// CHECK:       module {
173eab4a199SAlex Zinenko// CHECK-LABEL:   func @parallel_loop_tiled_seq(
174eab4a199SAlex Zinenko// CHECK-SAME:                                  [[VAL_84:%.*]]: index, [[VAL_85:%.*]]: index, [[VAL_86:%.*]]: index, [[VAL_87:%.*]]: index, [[VAL_88:%.*]]: memref<?x?xf32>, [[VAL_89:%.*]]: memref<?x?xf32>) {
175a54f4eaeSMogball// CHECK:           [[VAL_90:%.*]] = arith.constant 0 : index
176a54f4eaeSMogball// CHECK:           [[VAL_91:%.*]] = arith.constant 1 : index
177a54f4eaeSMogball// CHECK:           [[VAL_92:%.*]] = arith.constant 4 : index
178a54f4eaeSMogball// CHECK:           [[VAL_93:%.*]] = arith.constant 1 : index
17972d5ac90STres Popp// CHECK:           [[VAL_94:%.*]] = affine.apply #[[$MAP0]]([[VAL_86]]){{\[}}[[VAL_84]], [[VAL_92]]]
18072d5ac90STres Popp// CHECK:           [[VAL_95:%.*]] = affine.apply #[[$MAP0]]([[VAL_92]]){{\[}}[[VAL_90]], [[VAL_91]]]
181eab4a199SAlex Zinenko// CHECK:           gpu.launch blocks([[VAL_96:%.*]], [[VAL_97:%.*]], [[VAL_98:%.*]]) in ([[VAL_99:%.*]] = [[VAL_93]], [[VAL_100:%.*]] = [[VAL_94]], [[VAL_101:%.*]] = [[VAL_93]]) threads([[VAL_102:%.*]], [[VAL_103:%.*]], [[VAL_104:%.*]]) in ([[VAL_105:%.*]] = [[VAL_93]], [[VAL_106:%.*]] = [[VAL_95]], [[VAL_107:%.*]] = [[VAL_93]]) {
18295371ce9SMehdi Amini// CHECK:             [[VAL_108:%.*]] = affine.apply #[[$MAP1]]([[VAL_97]]){{\[}}[[VAL_92]], [[VAL_84]]]
183eab4a199SAlex Zinenko// CHECK:             scf.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] {
18495371ce9SMehdi Amini// CHECK:               [[VAL_110:%.*]] = affine.apply #[[$MAP1]]([[VAL_103]]){{\[}}[[VAL_91]], [[VAL_90]]]
185eab4a199SAlex Zinenko// CHECK:               scf.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] {
186a54f4eaeSMogball// CHECK:                 [[VAL_112:%.*]] = arith.addi [[VAL_108]], [[VAL_110]] : index
187a54f4eaeSMogball// CHECK:                 [[VAL_113:%.*]] = arith.addi [[VAL_109]], [[VAL_111]] : index
188e2310704SJulian Gross// CHECK:                 [[VAL_114:%.*]] = memref.load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32>
189e2310704SJulian Gross// CHECK:                 memref.store [[VAL_114]], [[VAL_89]]{{\[}}[[VAL_113]], [[VAL_112]]] : memref<?x?xf32>
190eab4a199SAlex Zinenko// CHECK:               }
191eab4a199SAlex Zinenko// CHECK:             }
192eab4a199SAlex Zinenko// CHECK:             gpu.terminator
193eab4a199SAlex Zinenko// CHECK:           }
194eab4a199SAlex Zinenko// CHECK:           return
195eab4a199SAlex Zinenko// CHECK:         }
196eab4a199SAlex Zinenko// CHECK:       }
197eab4a199SAlex Zinenko
198eab4a199SAlex Zinenko// -----
199eab4a199SAlex Zinenko
200eab4a199SAlex Zinenko#map0 = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
201eab4a199SAlex Zinenko#map1 = affine_map<(d0)[s0] -> (2, -d0 + s0)>
202eab4a199SAlex Zinenko#map2 = affine_map<(d0)[s0] -> (3, -d0 + s0)>
203eab4a199SAlex Zinenko#map3 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
204eab4a199SAlex Zinenko
205eab4a199SAlex Zinenkomodule {
2063028bf74SRiver Riddle  func.func @sum(%arg0: memref<?x?xf32, #map0>, %arg1: memref<?x?xf32, #map0>, %arg2: memref<?x?xf32, #map0>) {
207a54f4eaeSMogball    %c1 = arith.constant 1 : index
208a54f4eaeSMogball    %c0 = arith.constant 0 : index
209a54f4eaeSMogball    %c3 = arith.constant 3 : index
210a54f4eaeSMogball    %c2 = arith.constant 2 : index
211e2310704SJulian Gross    %0 = memref.dim %arg0, %c0 : memref<?x?xf32, #map0>
212e2310704SJulian Gross    %1 = memref.dim %arg0, %c1 : memref<?x?xf32, #map0>
213eab4a199SAlex Zinenko    scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) {
214e2310704SJulian Gross      %2 = memref.dim %arg0, %c0 : memref<?x?xf32, #map0>
215eab4a199SAlex Zinenko      %3 = affine.min #map1(%arg3)[%2]
216a54f4eaeSMogball      %squared_min = arith.muli %3, %3 : index
217e2310704SJulian Gross      %4 = memref.dim %arg0, %c1 : memref<?x?xf32, #map0>
218eab4a199SAlex Zinenko      %5 = affine.min #map2(%arg4)[%4]
219e2310704SJulian Gross      %6 = memref.subview %arg0[%arg3, %arg4][%squared_min, %5][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
220e2310704SJulian Gross      %7 = memref.dim %arg1, %c0 : memref<?x?xf32, #map0>
221eab4a199SAlex Zinenko      %8 = affine.min #map1(%arg3)[%7]
222e2310704SJulian Gross      %9 = memref.dim %arg1, %c1 : memref<?x?xf32, #map0>
223eab4a199SAlex Zinenko      %10 = affine.min #map2(%arg4)[%9]
224e2310704SJulian Gross      %11 = memref.subview %arg1[%arg3, %arg4][%8, %10][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
225e2310704SJulian Gross      %12 = memref.dim %arg2, %c0 : memref<?x?xf32, #map0>
226eab4a199SAlex Zinenko      %13 = affine.min #map1(%arg3)[%12]
227e2310704SJulian Gross      %14 = memref.dim %arg2, %c1 : memref<?x?xf32, #map0>
228eab4a199SAlex Zinenko      %15 = affine.min #map2(%arg4)[%14]
229e2310704SJulian Gross      %16 = memref.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3>
230eab4a199SAlex Zinenko      scf.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) {
231e2310704SJulian Gross        %17 = memref.load %6[%arg5, %arg6] : memref<?x?xf32, #map3>
232e2310704SJulian Gross        %18 = memref.load %11[%arg5, %arg6] : memref<?x?xf32, #map3>
233e2310704SJulian Gross        %19 = memref.load %16[%arg5, %arg6] : memref<?x?xf32, #map3>
234a54f4eaeSMogball        %20 = arith.addf %17, %18 : f32
235e2310704SJulian Gross        memref.store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3>
236eab4a199SAlex Zinenko        scf.yield
237*7bdd3722SMogball      } {mapping = [#gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = thread_x>, #gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = thread_y>]}
238eab4a199SAlex Zinenko      scf.yield
239*7bdd3722SMogball    } {mapping = [#gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = block_x>, #gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = block_y>]}
240eab4a199SAlex Zinenko    return
241eab4a199SAlex Zinenko  }
242eab4a199SAlex Zinenko}
243eab4a199SAlex Zinenko
24495371ce9SMehdi Amini// CHECK:       #[[$MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
24572d5ac90STres Popp// CHECK:       #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>
24695371ce9SMehdi Amini// CHECK:       #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>
24795371ce9SMehdi Amini// CHECK:       #[[$MAP3:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
24895371ce9SMehdi Amini// CHECK:       #[[$MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)>
24995371ce9SMehdi Amini// CHECK:       #[[$MAP5:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
250eab4a199SAlex Zinenko
251eab4a199SAlex Zinenko// CHECK:       module {
252eab4a199SAlex Zinenko// CHECK-LABEL:   func @sum(
25395371ce9SMehdi Amini// CHECK-SAME:              [[VAL_0:%.*]]: memref<?x?xf32, #[[$MAP0]]>, [[VAL_1:%.*]]: memref<?x?xf32, #[[$MAP0]]>, [[VAL_2:%.*]]: memref<?x?xf32, #[[$MAP0]]>) {
254a54f4eaeSMogball// CHECK:           %[[C1:.*]] = arith.constant 1 : index
255a54f4eaeSMogball// CHECK:           %[[C0:.*]] = arith.constant 0 : index
256a54f4eaeSMogball// CHECK:           %[[C3:.*]] = arith.constant 3 : index
257a54f4eaeSMogball// CHECK:           %[[C2:.*]] = arith.constant 2 : index
258e2310704SJulian Gross// CHECK:           [[VAL_7:%.*]] = memref.dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
259e2310704SJulian Gross// CHECK:           [[VAL_8:%.*]] = memref.dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
260a54f4eaeSMogball// CHECK:           [[VAL_9:%.*]] = arith.constant 1 : index
26172d5ac90STres Popp// CHECK:           [[VAL_10:%.*]] = affine.apply #[[$MAP1]]([[VAL_7]]){{\[}}%[[C0]], %[[C2]]]
26272d5ac90STres Popp// CHECK:           [[VAL_11:%.*]] = affine.apply #[[$MAP1]]([[VAL_8]]){{\[}}%[[C0]], %[[C3]]]
263a54f4eaeSMogball// CHECK:           [[VAL_12:%.*]] = arith.constant 4 : index
26472d5ac90STres Popp// CHECK:           [[VAL_13:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}%[[C0]], %[[C1]]]
265a54f4eaeSMogball// CHECK:           [[VAL_14:%.*]] = arith.constant 3 : index
26672d5ac90STres Popp// CHECK:           [[VAL_15:%.*]] = affine.apply #[[$MAP1]]([[VAL_14]]){{\[}}%[[C0]], %[[C1]]]
267eab4a199SAlex Zinenko// CHECK:           gpu.launch blocks([[VAL_16:%.*]], [[VAL_17:%.*]], [[VAL_18:%.*]]) in ([[VAL_19:%.*]] = [[VAL_10]], [[VAL_20:%.*]] = [[VAL_11]], [[VAL_21:%.*]] = [[VAL_9]]) threads([[VAL_22:%.*]], [[VAL_23:%.*]], [[VAL_24:%.*]]) in ([[VAL_25:%.*]] = [[VAL_13]], [[VAL_26:%.*]] = [[VAL_15]], [[VAL_27:%.*]] = [[VAL_9]]) {
26895371ce9SMehdi Amini// CHECK:             [[VAL_28:%.*]] = affine.apply #[[$MAP2]]([[VAL_16]]){{\[}}%[[C2]], %[[C0]]]
26995371ce9SMehdi Amini// CHECK:             [[VAL_29:%.*]] = affine.apply #[[$MAP2]]([[VAL_17]]){{\[}}%[[C3]], %[[C0]]]
270e2310704SJulian Gross// CHECK:             [[VAL_30:%.*]] = memref.dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
27195371ce9SMehdi Amini// CHECK:             [[VAL_31:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_30]]]
272a54f4eaeSMogball// CHECK:             [[VAL_31_SQUARED:%.*]] = arith.muli [[VAL_31]], [[VAL_31]] : index
273e2310704SJulian Gross// CHECK:             [[VAL_32:%.*]] = memref.dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
27495371ce9SMehdi Amini// CHECK:             [[VAL_33:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_32]]]
275e2310704SJulian Gross// CHECK:             [[VAL_34:%.*]] = memref.subview [[VAL_0]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_31_SQUARED]], [[VAL_33]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
276e2310704SJulian Gross// CHECK:             [[VAL_35:%.*]] = memref.dim [[VAL_1]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
27795371ce9SMehdi Amini// CHECK:             [[VAL_36:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_35]]]
278e2310704SJulian Gross// CHECK:             [[VAL_37:%.*]] = memref.dim [[VAL_1]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
27995371ce9SMehdi Amini// CHECK:             [[VAL_38:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_37]]]
280e2310704SJulian Gross// CHECK:             [[VAL_39:%.*]] = memref.subview [[VAL_1]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_36]], [[VAL_38]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
281e2310704SJulian Gross// CHECK:             [[VAL_40:%.*]] = memref.dim [[VAL_2]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]>
28295371ce9SMehdi Amini// CHECK:             [[VAL_41:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_40]]]
283e2310704SJulian Gross// CHECK:             [[VAL_42:%.*]] = memref.dim [[VAL_2]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]>
28495371ce9SMehdi Amini// CHECK:             [[VAL_43:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_42]]]
285e2310704SJulian Gross// CHECK:             [[VAL_44:%.*]] = memref.subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]>
28695371ce9SMehdi Amini// CHECK:             [[VAL_45:%.*]] = affine.apply #[[$MAP2]]([[VAL_22]]){{\[}}%[[C1]], %[[C0]]]
287a54f4eaeSMogball// CHECK:             [[VAL_46:%.*]] = arith.cmpi slt, [[VAL_45]], [[VAL_31_SQUARED]] : index
288eab4a199SAlex Zinenko// CHECK:             scf.if [[VAL_46]] {
28995371ce9SMehdi Amini// CHECK:               [[VAL_47:%.*]] = affine.apply #[[$MAP2]]([[VAL_23]]){{\[}}%[[C1]], %[[C0]]]
290a54f4eaeSMogball// CHECK:               [[VAL_48:%.*]] = arith.cmpi slt, [[VAL_47]], [[VAL_33]] : index
291eab4a199SAlex Zinenko// CHECK:               scf.if [[VAL_48]] {
292e2310704SJulian Gross// CHECK:                 [[VAL_49:%.*]] = memref.load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
293e2310704SJulian Gross// CHECK:                 [[VAL_50:%.*]] = memref.load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
294e2310704SJulian Gross// CHECK:                 [[VAL_51:%.*]] = memref.load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
295a54f4eaeSMogball// CHECK:                 [[VAL_52:%.*]] = arith.addf [[VAL_49]], [[VAL_50]] : f32
296e2310704SJulian Gross// CHECK:                 memref.store [[VAL_52]], [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]>
297eab4a199SAlex Zinenko// CHECK:               }
298eab4a199SAlex Zinenko// CHECK:             }
299eab4a199SAlex Zinenko// CHECK:             gpu.terminator
300eab4a199SAlex Zinenko// CHECK:           }
301eab4a199SAlex Zinenko// CHECK:           return
302eab4a199SAlex Zinenko// CHECK:         }
303eab4a199SAlex Zinenko// CHECK:       }
304eab4a199SAlex Zinenko
305eab4a199SAlex Zinenko// -----
306eab4a199SAlex Zinenko
307396e7f45SArtur Bialas// Optional attribute lowering test
308396e7f45SArtur Bialas
3093028bf74SRiver Riddlefunc.func @parallel_loop_optional_attr() {
310a54f4eaeSMogball  %c0 = arith.constant 0 : index
311a54f4eaeSMogball  %c1 = arith.constant 1 : index
312396e7f45SArtur Bialas  scf.parallel (%i0) = (%c0) to (%c1) step (%c1) {
313*7bdd3722SMogball  } { mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>], optional_attr = 1 }
314396e7f45SArtur Bialas  // CHECK: optional_attr = 1
315396e7f45SArtur Bialas  return
316396e7f45SArtur Bialas}
317396e7f45SArtur Bialas
318396e7f45SArtur Bialas// -----
319396e7f45SArtur Bialas
3205da2423bSStephan Herhut// Mapping to the same processor twice. Cannot be mapped.
321eab4a199SAlex Zinenko
3223028bf74SRiver Riddlefunc.func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index,
323eab4a199SAlex Zinenko                          %arg3 : index,
324eab4a199SAlex Zinenko                          %buf : memref<?x?xf32>,
325eab4a199SAlex Zinenko                          %res : memref<?x?xf32>) {
326a54f4eaeSMogball  %four = arith.constant 4 : index
327eab4a199SAlex Zinenko  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
328eab4a199SAlex Zinenko                                          step (%four, %four)  {
329eab4a199SAlex Zinenko  } { mapping = [
330*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
331*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>
332eab4a199SAlex Zinenko    ] }
333eab4a199SAlex Zinenko  return
334eab4a199SAlex Zinenko}
335eab4a199SAlex Zinenko
3365da2423bSStephan Herhut// CHECK-LABEL: @parallel_double_map
3375da2423bSStephan Herhut// CHECK: scf.parallel
3385da2423bSStephan Herhut
339eab4a199SAlex Zinenko// -----
340eab4a199SAlex Zinenko
3415da2423bSStephan Herhut// Loop with loop-variant upper bound. Cannot be mapped.
342eab4a199SAlex Zinenko
3433028bf74SRiver Riddlefunc.func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index,
344eab4a199SAlex Zinenko                                       %arg3 : index,
345eab4a199SAlex Zinenko                                       %buf : memref<?x?xf32>,
346eab4a199SAlex Zinenko                                       %res : memref<?x?xf32>) {
347a54f4eaeSMogball  %zero = arith.constant 0 : index
348a54f4eaeSMogball  %one = arith.constant 1 : index
349a54f4eaeSMogball  %four = arith.constant 4 : index
350eab4a199SAlex Zinenko  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
351eab4a199SAlex Zinenko                                          step (%four, %four)  {
352eab4a199SAlex Zinenko    scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1)
353eab4a199SAlex Zinenko                                            step (%one, %one)  {
354a54f4eaeSMogball      %idx0 = arith.addi %i0, %si0 : index
355a54f4eaeSMogball      %idx1 = arith.addi %i1, %si1 : index
356e2310704SJulian Gross      %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32>
357e2310704SJulian Gross      memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32>
358eab4a199SAlex Zinenko    } { mapping = [
359*7bdd3722SMogball        #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
360*7bdd3722SMogball        #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>
361eab4a199SAlex Zinenko      ] }
362eab4a199SAlex Zinenko  } { mapping = [
363*7bdd3722SMogball      #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
364*7bdd3722SMogball      #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>
365eab4a199SAlex Zinenko    ] }
366eab4a199SAlex Zinenko  return
367eab4a199SAlex Zinenko}
3685da2423bSStephan Herhut
3695da2423bSStephan Herhut// CHECK-LABEL: @parallel_loop_loop_variant_bound
3705da2423bSStephan Herhut// CHECK: scf.parallel
3715da2423bSStephan Herhut// CHECK: scf.parallel
3725da2423bSStephan Herhut
3735da2423bSStephan Herhut// -----
3745da2423bSStephan Herhut
3755da2423bSStephan Herhut// Loop without annotations. Cannot be mapped.
3765da2423bSStephan Herhut
3773028bf74SRiver Riddlefunc.func @parallel_no_annotations(%arg0 : index, %arg1 : index, %arg2 : index,
3785da2423bSStephan Herhut                              %arg3 : index,
3795da2423bSStephan Herhut                              %buf : memref<?x?xf32>,
3805da2423bSStephan Herhut                              %res : memref<?x?xf32>) {
381a54f4eaeSMogball  %four = arith.constant 4 : index
3825da2423bSStephan Herhut  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
3835da2423bSStephan Herhut                                          step (%four, %four)  {
3845da2423bSStephan Herhut  }
3855da2423bSStephan Herhut  return
3865da2423bSStephan Herhut}
3875da2423bSStephan Herhut
3885da2423bSStephan Herhut// CHECK-LABEL: @parallel_no_annotations
3895da2423bSStephan Herhut// CHECK: scf.parallel
390