1// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure 2 3// 2-d parallel loop mapped to block.y and block.x 4 5func @parallel_loop_bidy_bidx(%arg0 : index, %arg1 : index, %arg2 : index, 6 %arg3 : index, %arg4 : index, 7 %buf : memref<?x?xf32>, 8 %res : memref<?x?xf32>) { 9 %step = constant 2 : index 10 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 11 step (%arg4, %step) { 12 %val = load %buf[%i0, %i1] : memref<?x?xf32> 13 store %val, %res[%i1, %i0] : memref<?x?xf32> 14 } { mapping = [{processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, {processor = 0, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}] } 15 return 16} 17 18// CHECK: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)> 19// CHECK: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 20 21// CHECK: module { 22// CHECK-LABEL: func @parallel_loop_bidy_bidx( 23// CHECK-SAME: [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: index, [[VAL_3:%.*]]: index, [[VAL_4:%.*]]: index, [[VAL_5:%.*]]: memref<?x?xf32>, [[VAL_6:%.*]]: memref<?x?xf32>) { 24// CHECK: [[VAL_7:%.*]] = constant 2 : index 25// CHECK: [[VAL_8:%.*]] = constant 1 : index 26// CHECK: [[VAL_9:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_2]], [[VAL_0]], [[VAL_4]]] 27// CHECK: [[VAL_10:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_3]], [[VAL_1]], [[VAL_7]]] 28// CHECK: gpu.launch blocks([[VAL_11:%.*]], [[VAL_12:%.*]], [[VAL_13:%.*]]) in ([[VAL_14:%.*]] = [[VAL_10]], [[VAL_15:%.*]] = [[VAL_9]], [[VAL_16:%.*]] = [[VAL_8]]) threads([[VAL_17:%.*]], [[VAL_18:%.*]], [[VAL_19:%.*]]) in ([[VAL_20:%.*]] = [[VAL_8]], [[VAL_21:%.*]] = [[VAL_8]], [[VAL_22:%.*]] = [[VAL_8]]) { 29// CHECK: [[VAL_23:%.*]] = affine.apply #[[MAP1]]([[VAL_12]]){{\[}}[[VAL_4]], [[VAL_0]]] 30// CHECK: [[VAL_24:%.*]] = affine.apply #[[MAP1]]([[VAL_11]]){{\[}}[[VAL_7]], [[VAL_1]]] 31// CHECK: [[VAL_25:%.*]] = load [[VAL_5]]{{\[}}[[VAL_23]], [[VAL_24]]] : memref<?x?xf32> 32// CHECK: store [[VAL_25]], [[VAL_6]]{{\[}}[[VAL_24]], [[VAL_23]]] : memref<?x?xf32> 33// CHECK: gpu.terminator 34// CHECK: } 35// CHECK: return 36// CHECK: } 37// CHECK: } 38 39// ----- 40 41// tiled 2-d parallel loop mapped to block.y and block.x and thread.y and thread.x. 42 43func @parallel_loop_tiled(%arg0 : index, %arg1 : index, %arg2 : index, 44 %arg3 : index, 45 %buf : memref<?x?xf32>, 46 %res : memref<?x?xf32>) { 47 %zero = constant 0 : index 48 %one = constant 1 : index 49 %four = constant 4 : index 50 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 51 step (%four, %four) { 52 scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four) 53 step (%one, %one) { 54 %idx0 = addi %i0, %si0 : index 55 %idx1 = addi %i1, %si1 : index 56 %val = load %buf[%idx0, %idx1] : memref<?x?xf32> 57 store %val, %res[%idx1, %idx0] : memref<?x?xf32> 58 } { mapping = [ 59 {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 60 {processor = 3, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 61 ] } 62 } { mapping = [ 63 {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 64 {processor = 0, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 65 ] } 66 return 67} 68 69// CHECK: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)> 70// CHECK: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 71 72// CHECK: module { 73// CHECK-LABEL: func @parallel_loop_tiled( 74// CHECK-SAME: [[VAL_26:%.*]]: index, [[VAL_27:%.*]]: index, [[VAL_28:%.*]]: index, [[VAL_29:%.*]]: index, [[VAL_30:%.*]]: memref<?x?xf32>, [[VAL_31:%.*]]: memref<?x?xf32>) { 75// CHECK: [[VAL_32:%.*]] = constant 0 : index 76// CHECK: [[VAL_33:%.*]] = constant 1 : index 77// CHECK: [[VAL_34:%.*]] = constant 4 : index 78// CHECK: [[VAL_35:%.*]] = constant 1 : index 79// CHECK: [[VAL_36:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_28]], [[VAL_26]], [[VAL_34]]] 80// CHECK: [[VAL_37:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_29]], [[VAL_27]], [[VAL_34]]] 81// CHECK: [[VAL_38:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_34]], [[VAL_32]], [[VAL_33]]] 82// CHECK: [[VAL_39:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_34]], [[VAL_32]], [[VAL_33]]] 83// CHECK: gpu.launch blocks([[VAL_40:%.*]], [[VAL_41:%.*]], [[VAL_42:%.*]]) in ([[VAL_43:%.*]] = [[VAL_37]], [[VAL_44:%.*]] = [[VAL_36]], [[VAL_45:%.*]] = [[VAL_35]]) threads([[VAL_46:%.*]], [[VAL_47:%.*]], [[VAL_48:%.*]]) in ([[VAL_49:%.*]] = [[VAL_39]], [[VAL_50:%.*]] = [[VAL_38]], [[VAL_51:%.*]] = [[VAL_35]]) { 84// CHECK: [[VAL_52:%.*]] = affine.apply #[[MAP1]]([[VAL_41]]){{\[}}[[VAL_34]], [[VAL_26]]] 85// CHECK: [[VAL_53:%.*]] = affine.apply #[[MAP1]]([[VAL_40]]){{\[}}[[VAL_34]], [[VAL_27]]] 86// CHECK: [[VAL_54:%.*]] = affine.apply #[[MAP1]]([[VAL_47]]){{\[}}[[VAL_33]], [[VAL_32]]] 87// CHECK: [[VAL_55:%.*]] = affine.apply #[[MAP1]]([[VAL_46]]){{\[}}[[VAL_33]], [[VAL_32]]] 88// CHECK: [[VAL_56:%.*]] = addi [[VAL_52]], [[VAL_54]] : index 89// CHECK: [[VAL_57:%.*]] = addi [[VAL_53]], [[VAL_55]] : index 90// CHECK: [[VAL_58:%.*]] = load [[VAL_30]]{{\[}}[[VAL_56]], [[VAL_57]]] : memref<?x?xf32> 91// CHECK: store [[VAL_58]], [[VAL_31]]{{\[}}[[VAL_57]], [[VAL_56]]] : memref<?x?xf32> 92// CHECK: gpu.terminator 93// CHECK: } 94// CHECK: return 95// CHECK: } 96// CHECK: } 97 98// ----- 99 100// 2-d parallel loop mapped to block.y and sequential 101 102func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index, 103 %arg3 : index, %arg4 : index, 104 %buf : memref<?x?xf32>, 105 %res : memref<?x?xf32>) { 106 %step = constant 2 : index 107 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 108 step (%arg4, %step) { 109 %val = load %buf[%i0, %i1] : memref<?x?xf32> 110 store %val, %res[%i1, %i0] : memref<?x?xf32> 111 } { mapping = [ 112 {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 113 {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 114 ] } 115 return 116} 117 118// CHECK: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)> 119// CHECK: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 120 121// CHECK: module { 122// CHECK-LABEL: func @parallel_loop_bidy_seq( 123// CHECK-SAME: [[VAL_59:%.*]]: index, [[VAL_60:%.*]]: index, [[VAL_61:%.*]]: index, [[VAL_62:%.*]]: index, [[VAL_63:%.*]]: index, [[VAL_64:%.*]]: memref<?x?xf32>, [[VAL_65:%.*]]: memref<?x?xf32>) { 124// CHECK: [[VAL_66:%.*]] = constant 2 : index 125// CHECK: [[VAL_67:%.*]] = constant 1 : index 126// CHECK: [[VAL_68:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_61]], [[VAL_59]], [[VAL_63]]] 127// CHECK: gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) { 128// CHECK: [[VAL_81:%.*]] = affine.apply #[[MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]] 129// CHECK: scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] { 130// CHECK: [[VAL_83:%.*]] = load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32> 131// CHECK: store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32> 132// CHECK: } 133// CHECK: gpu.terminator 134// CHECK: } 135// CHECK: return 136// CHECK: } 137// CHECK: } 138 139// ----- 140 141// tiled 2-d parallel loop mapped to block.y and seq. and thread.y and seq. 142 143func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index, 144 %arg3 : index, 145 %buf : memref<?x?xf32>, 146 %res : memref<?x?xf32>) { 147 %zero = constant 0 : index 148 %one = constant 1 : index 149 %four = constant 4 : index 150 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 151 step (%four, %four) { 152 scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four) 153 step (%one, %one) { 154 %idx0 = addi %i0, %si0 : index 155 %idx1 = addi %i1, %si1 : index 156 %val = load %buf[%idx0, %idx1] : memref<?x?xf32> 157 store %val, %res[%idx1, %idx0] : memref<?x?xf32> 158 } { mapping = [ 159 {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 160 {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 161 ] } 162 } { mapping = [ 163 {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 164 {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 165 ] } 166 return 167} 168 169// CHECK: #[[MAP0:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)> 170// CHECK: #[[MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 171 172// CHECK: module { 173// CHECK-LABEL: func @parallel_loop_tiled_seq( 174// CHECK-SAME: [[VAL_84:%.*]]: index, [[VAL_85:%.*]]: index, [[VAL_86:%.*]]: index, [[VAL_87:%.*]]: index, [[VAL_88:%.*]]: memref<?x?xf32>, [[VAL_89:%.*]]: memref<?x?xf32>) { 175// CHECK: [[VAL_90:%.*]] = constant 0 : index 176// CHECK: [[VAL_91:%.*]] = constant 1 : index 177// CHECK: [[VAL_92:%.*]] = constant 4 : index 178// CHECK: [[VAL_93:%.*]] = constant 1 : index 179// CHECK: [[VAL_94:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_86]], [[VAL_84]], [[VAL_92]]] 180// CHECK: [[VAL_95:%.*]] = affine.apply #[[MAP0]](){{\[}}[[VAL_92]], [[VAL_90]], [[VAL_91]]] 181// CHECK: gpu.launch blocks([[VAL_96:%.*]], [[VAL_97:%.*]], [[VAL_98:%.*]]) in ([[VAL_99:%.*]] = [[VAL_93]], [[VAL_100:%.*]] = [[VAL_94]], [[VAL_101:%.*]] = [[VAL_93]]) threads([[VAL_102:%.*]], [[VAL_103:%.*]], [[VAL_104:%.*]]) in ([[VAL_105:%.*]] = [[VAL_93]], [[VAL_106:%.*]] = [[VAL_95]], [[VAL_107:%.*]] = [[VAL_93]]) { 182// CHECK: [[VAL_108:%.*]] = affine.apply #[[MAP1]]([[VAL_97]]){{\[}}[[VAL_92]], [[VAL_84]]] 183// CHECK: scf.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] { 184// CHECK: [[VAL_110:%.*]] = affine.apply #[[MAP1]]([[VAL_103]]){{\[}}[[VAL_91]], [[VAL_90]]] 185// CHECK: scf.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] { 186// CHECK: [[VAL_112:%.*]] = addi [[VAL_108]], [[VAL_110]] : index 187// CHECK: [[VAL_113:%.*]] = addi [[VAL_109]], [[VAL_111]] : index 188// CHECK: [[VAL_114:%.*]] = load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32> 189// CHECK: store [[VAL_114]], [[VAL_89]]{{\[}}[[VAL_113]], [[VAL_112]]] : memref<?x?xf32> 190// CHECK: } 191// CHECK: } 192// CHECK: gpu.terminator 193// CHECK: } 194// CHECK: return 195// CHECK: } 196// CHECK: } 197 198// ----- 199 200#map0 = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 201#map1 = affine_map<(d0)[s0] -> (2, -d0 + s0)> 202#map2 = affine_map<(d0)[s0] -> (3, -d0 + s0)> 203#map3 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> 204 205module { 206 func @sum(%arg0: memref<?x?xf32, #map0>, %arg1: memref<?x?xf32, #map0>, %arg2: memref<?x?xf32, #map0>) { 207 %c1 = constant 1 : index 208 %c0 = constant 0 : index 209 %c3 = constant 3 : index 210 %c2 = constant 2 : index 211 %0 = dim %arg0, 0 : memref<?x?xf32, #map0> 212 %1 = dim %arg0, 1 : memref<?x?xf32, #map0> 213 scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) { 214 %2 = dim %arg0, 0 : memref<?x?xf32, #map0> 215 %3 = affine.min #map1(%arg3)[%2] 216 %squared_min = muli %3, %3 : index 217 %4 = dim %arg0, 1 : memref<?x?xf32, #map0> 218 %5 = affine.min #map2(%arg4)[%4] 219 %6 = std.subview %arg0[%arg3, %arg4][%squared_min, %5][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3> 220 %7 = dim %arg1, 0 : memref<?x?xf32, #map0> 221 %8 = affine.min #map1(%arg3)[%7] 222 %9 = dim %arg1, 1 : memref<?x?xf32, #map0> 223 %10 = affine.min #map2(%arg4)[%9] 224 %11 = std.subview %arg1[%arg3, %arg4][%8, %10][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3> 225 %12 = dim %arg2, 0 : memref<?x?xf32, #map0> 226 %13 = affine.min #map1(%arg3)[%12] 227 %14 = dim %arg2, 1 : memref<?x?xf32, #map0> 228 %15 = affine.min #map2(%arg4)[%14] 229 %16 = std.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3> 230 scf.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) { 231 %17 = load %6[%arg5, %arg6] : memref<?x?xf32, #map3> 232 %18 = load %11[%arg5, %arg6] : memref<?x?xf32, #map3> 233 %19 = load %16[%arg5, %arg6] : memref<?x?xf32, #map3> 234 %20 = addf %17, %18 : f32 235 store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3> 236 scf.yield 237 } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]} 238 scf.yield 239 } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 1 : i64}]} 240 return 241 } 242} 243 244// CHECK: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 245// CHECK: #[[MAP1:.*]] = affine_map<()[s0, s1, s2] -> ((s0 - s1) ceildiv s2)> 246// CHECK: #[[MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 247// CHECK: #[[MAP3:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> 248// CHECK: #[[MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> 249// CHECK: #[[MAP5:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> 250 251// CHECK: module { 252// CHECK-LABEL: func @sum( 253// CHECK-SAME: [[VAL_0:%.*]]: memref<?x?xf32, #[[MAP0]]>, [[VAL_1:%.*]]: memref<?x?xf32, #[[MAP0]]>, [[VAL_2:%.*]]: memref<?x?xf32, #[[MAP0]]>) { 254// CHECK: [[VAL_3:%.*]] = constant 1 : index 255// CHECK: [[VAL_4:%.*]] = constant 0 : index 256// CHECK: [[VAL_5:%.*]] = constant 3 : index 257// CHECK: [[VAL_6:%.*]] = constant 2 : index 258// CHECK: [[VAL_7:%.*]] = dim [[VAL_0]], 0 : memref<?x?xf32, #[[MAP0]]> 259// CHECK: [[VAL_8:%.*]] = dim [[VAL_0]], 1 : memref<?x?xf32, #[[MAP0]]> 260// CHECK: [[VAL_9:%.*]] = constant 1 : index 261// CHECK: [[VAL_10:%.*]] = affine.apply #[[MAP1]](){{\[}}[[VAL_7]], [[VAL_4]], [[VAL_6]]] 262// CHECK: [[VAL_11:%.*]] = affine.apply #[[MAP1]](){{\[}}[[VAL_8]], [[VAL_4]], [[VAL_5]]] 263// CHECK: [[VAL_12:%.*]] = constant 4 : index 264// CHECK: [[VAL_13:%.*]] = affine.apply #[[MAP1]](){{\[}}[[VAL_12]], [[VAL_4]], [[VAL_3]]] 265// CHECK: [[VAL_14:%.*]] = constant 3 : index 266// CHECK: [[VAL_15:%.*]] = affine.apply #[[MAP1]](){{\[}}[[VAL_14]], [[VAL_4]], [[VAL_3]]] 267// CHECK: gpu.launch blocks([[VAL_16:%.*]], [[VAL_17:%.*]], [[VAL_18:%.*]]) in ([[VAL_19:%.*]] = [[VAL_10]], [[VAL_20:%.*]] = [[VAL_11]], [[VAL_21:%.*]] = [[VAL_9]]) threads([[VAL_22:%.*]], [[VAL_23:%.*]], [[VAL_24:%.*]]) in ([[VAL_25:%.*]] = [[VAL_13]], [[VAL_26:%.*]] = [[VAL_15]], [[VAL_27:%.*]] = [[VAL_9]]) { 268// CHECK: [[VAL_28:%.*]] = affine.apply #[[MAP2]]([[VAL_16]]){{\[}}[[VAL_6]], [[VAL_4]]] 269// CHECK: [[VAL_29:%.*]] = affine.apply #[[MAP2]]([[VAL_17]]){{\[}}[[VAL_5]], [[VAL_4]]] 270// CHECK: [[VAL_30:%.*]] = dim [[VAL_0]], 0 : memref<?x?xf32, #[[MAP0]]> 271// CHECK: [[VAL_31:%.*]] = affine.min #[[MAP3]]([[VAL_28]]){{\[}}[[VAL_30]]] 272// CHECK: [[VAL_31_SQUARED:%.*]] = muli [[VAL_31]], [[VAL_31]] : index 273// CHECK: [[VAL_32:%.*]] = dim [[VAL_0]], 1 : memref<?x?xf32, #[[MAP0]]> 274// CHECK: [[VAL_33:%.*]] = affine.min #[[MAP4]]([[VAL_29]]){{\[}}[[VAL_32]]] 275// CHECK: [[VAL_34:%.*]] = subview [[VAL_0]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_31_SQUARED]], [[VAL_33]]] {{\[}}[[VAL_3]], [[VAL_3]]] : memref<?x?xf32, #[[MAP0]]> to memref<?x?xf32, #[[MAP5]]> 276// CHECK: [[VAL_35:%.*]] = dim [[VAL_1]], 0 : memref<?x?xf32, #[[MAP0]]> 277// CHECK: [[VAL_36:%.*]] = affine.min #[[MAP3]]([[VAL_28]]){{\[}}[[VAL_35]]] 278// CHECK: [[VAL_37:%.*]] = dim [[VAL_1]], 1 : memref<?x?xf32, #[[MAP0]]> 279// CHECK: [[VAL_38:%.*]] = affine.min #[[MAP4]]([[VAL_29]]){{\[}}[[VAL_37]]] 280// CHECK: [[VAL_39:%.*]] = subview [[VAL_1]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_36]], [[VAL_38]]] {{\[}}[[VAL_3]], [[VAL_3]]] : memref<?x?xf32, #[[MAP0]]> to memref<?x?xf32, #[[MAP5]]> 281// CHECK: [[VAL_40:%.*]] = dim [[VAL_2]], 0 : memref<?x?xf32, #[[MAP0]]> 282// CHECK: [[VAL_41:%.*]] = affine.min #[[MAP3]]([[VAL_28]]){{\[}}[[VAL_40]]] 283// CHECK: [[VAL_42:%.*]] = dim [[VAL_2]], 1 : memref<?x?xf32, #[[MAP0]]> 284// CHECK: [[VAL_43:%.*]] = affine.min #[[MAP4]]([[VAL_29]]){{\[}}[[VAL_42]]] 285// CHECK: [[VAL_44:%.*]] = subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}[[VAL_3]], [[VAL_3]]] : memref<?x?xf32, #[[MAP0]]> to memref<?x?xf32, #[[MAP5]]> 286// CHECK: [[VAL_45:%.*]] = affine.apply #[[MAP2]]([[VAL_22]]){{\[}}[[VAL_3]], [[VAL_4]]] 287// CHECK: [[VAL_46:%.*]] = cmpi "slt", [[VAL_45]], [[VAL_31_SQUARED]] : index 288// CHECK: scf.if [[VAL_46]] { 289// CHECK: [[VAL_47:%.*]] = affine.apply #[[MAP2]]([[VAL_23]]){{\[}}[[VAL_3]], [[VAL_4]]] 290// CHECK: [[VAL_48:%.*]] = cmpi "slt", [[VAL_47]], [[VAL_33]] : index 291// CHECK: scf.if [[VAL_48]] { 292// CHECK: [[VAL_49:%.*]] = load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]> 293// CHECK: [[VAL_50:%.*]] = load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]> 294// CHECK: [[VAL_51:%.*]] = load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]> 295// CHECK: [[VAL_52:%.*]] = addf [[VAL_49]], [[VAL_50]] : f32 296// CHECK: store [[VAL_52]], [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[MAP5]]> 297// CHECK: } 298// CHECK: } 299// CHECK: gpu.terminator 300// CHECK: } 301// CHECK: return 302// CHECK: } 303// CHECK: } 304 305// ----- 306 307// Mapping to the same processor twice. 308 309func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index, 310 %arg3 : index, 311 %buf : memref<?x?xf32>, 312 %res : memref<?x?xf32>) { 313 %four = constant 4 : index 314 // expected-error@+2 {{cannot redefine the bound for processor 1}} 315 // expected-error@+1 {{failed to legalize operation 'scf.parallel'}} 316 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 317 step (%four, %four) { 318 } { mapping = [ 319 {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 320 {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 321 ] } 322 return 323} 324 325// ----- 326 327// Loop with loop-variant upper bound. 328 329func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index, 330 %arg3 : index, 331 %buf : memref<?x?xf32>, 332 %res : memref<?x?xf32>) { 333 %zero = constant 0 : index 334 %one = constant 1 : index 335 %four = constant 4 : index 336 // expected-error@+1 {{failed to legalize operation 'scf.parallel'}} 337 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 338 step (%four, %four) { 339 // expected-error@+1 {{cannot derive loop-invariant upper bound}} 340 scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1) 341 step (%one, %one) { 342 %idx0 = addi %i0, %si0 : index 343 %idx1 = addi %i1, %si1 : index 344 %val = load %buf[%idx0, %idx1] : memref<?x?xf32> 345 store %val, %res[%idx1, %idx0] : memref<?x?xf32> 346 } { mapping = [ 347 {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 348 {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 349 ] } 350 } { mapping = [ 351 {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, 352 {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} 353 ] } 354 return 355} 356