1d31c9e5aSMehdi Amini// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s 2eab4a199SAlex Zinenko 3eab4a199SAlex Zinenko// 2-d parallel loop mapped to block.y and block.x 4eab4a199SAlex Zinenko 53028bf74SRiver Riddlefunc.func @parallel_loop_bidy_bidx(%arg0 : index, %arg1 : index, %arg2 : index, 6eab4a199SAlex Zinenko %arg3 : index, %arg4 : index, 7eab4a199SAlex Zinenko %buf : memref<?x?xf32>, 8eab4a199SAlex Zinenko %res : memref<?x?xf32>) { 9a54f4eaeSMogball %step = arith.constant 2 : index 10eab4a199SAlex Zinenko scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 11eab4a199SAlex Zinenko step (%arg4, %step) { 12e2310704SJulian Gross %val = memref.load %buf[%i0, %i1] : memref<?x?xf32> 13e2310704SJulian Gross memref.store %val, %res[%i1, %i0] : memref<?x?xf32> 14*7bdd3722SMogball } { mapping = [#gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>] } 15eab4a199SAlex Zinenko return 16eab4a199SAlex Zinenko} 17eab4a199SAlex Zinenko 1872d5ac90STres Popp// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)> 1995371ce9SMehdi Amini// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 20eab4a199SAlex Zinenko 21eab4a199SAlex Zinenko// CHECK: module { 22eab4a199SAlex Zinenko// CHECK-LABEL: func @parallel_loop_bidy_bidx( 23eab4a199SAlex Zinenko// CHECK-SAME: [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: index, [[VAL_3:%.*]]: index, [[VAL_4:%.*]]: index, [[VAL_5:%.*]]: memref<?x?xf32>, [[VAL_6:%.*]]: memref<?x?xf32>) { 24a54f4eaeSMogball// CHECK: [[VAL_7:%.*]] = arith.constant 2 : index 25a54f4eaeSMogball// CHECK: [[VAL_8:%.*]] = arith.constant 1 : index 2672d5ac90STres Popp// CHECK: [[VAL_9:%.*]] = affine.apply #[[$MAP0]]([[VAL_2]]){{\[}}[[VAL_0]], [[VAL_4]]] 2772d5ac90STres Popp// CHECK: [[VAL_10:%.*]] = affine.apply #[[$MAP0]]([[VAL_3]]){{\[}}[[VAL_1]], [[VAL_7]]] 28eab4a199SAlex Zinenko// CHECK: gpu.launch blocks([[VAL_11:%.*]], [[VAL_12:%.*]], [[VAL_13:%.*]]) in ([[VAL_14:%.*]] = [[VAL_10]], [[VAL_15:%.*]] = [[VAL_9]], [[VAL_16:%.*]] = [[VAL_8]]) threads([[VAL_17:%.*]], [[VAL_18:%.*]], [[VAL_19:%.*]]) in ([[VAL_20:%.*]] = [[VAL_8]], [[VAL_21:%.*]] = [[VAL_8]], [[VAL_22:%.*]] = [[VAL_8]]) { 2995371ce9SMehdi Amini// CHECK: [[VAL_23:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}[[VAL_4]], [[VAL_0]]] 3095371ce9SMehdi Amini// CHECK: [[VAL_24:%.*]] = affine.apply #[[$MAP1]]([[VAL_11]]){{\[}}[[VAL_7]], [[VAL_1]]] 31e2310704SJulian Gross// CHECK: [[VAL_25:%.*]] = memref.load [[VAL_5]]{{\[}}[[VAL_23]], [[VAL_24]]] : memref<?x?xf32> 32e2310704SJulian Gross// CHECK: memref.store [[VAL_25]], [[VAL_6]]{{\[}}[[VAL_24]], [[VAL_23]]] : memref<?x?xf32> 33eab4a199SAlex Zinenko// CHECK: gpu.terminator 34eab4a199SAlex Zinenko// CHECK: } 35eab4a199SAlex Zinenko// CHECK: return 36eab4a199SAlex Zinenko// CHECK: } 37eab4a199SAlex Zinenko// CHECK: } 38eab4a199SAlex Zinenko 39eab4a199SAlex Zinenko// ----- 40eab4a199SAlex Zinenko 41eab4a199SAlex Zinenko// tiled 2-d parallel loop mapped to block.y and block.x and thread.y and thread.x. 42eab4a199SAlex Zinenko 433028bf74SRiver Riddlefunc.func @parallel_loop_tiled(%arg0 : index, %arg1 : index, %arg2 : index, 44eab4a199SAlex Zinenko %arg3 : index, 45eab4a199SAlex Zinenko %buf : memref<?x?xf32>, 46eab4a199SAlex Zinenko %res : memref<?x?xf32>) { 47a54f4eaeSMogball %zero = arith.constant 0 : index 48a54f4eaeSMogball %one = arith.constant 1 : index 49a54f4eaeSMogball %four = arith.constant 4 : index 50eab4a199SAlex Zinenko scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 51eab4a199SAlex Zinenko step (%four, %four) { 52eab4a199SAlex Zinenko scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four) 53eab4a199SAlex Zinenko step (%one, %one) { 54a54f4eaeSMogball %idx0 = arith.addi %i0, %si0 : index 55a54f4eaeSMogball %idx1 = arith.addi %i1, %si1 : index 56e2310704SJulian Gross %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32> 57e2310704SJulian Gross memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32> 58eab4a199SAlex Zinenko } { mapping = [ 59*7bdd3722SMogball #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 60*7bdd3722SMogball #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)> 61eab4a199SAlex Zinenko ] } 62eab4a199SAlex Zinenko } { mapping = [ 63*7bdd3722SMogball #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 64*7bdd3722SMogball #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)> 65eab4a199SAlex Zinenko ] } 66eab4a199SAlex Zinenko return 67eab4a199SAlex Zinenko} 68eab4a199SAlex Zinenko 6972d5ac90STres Popp// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)> 7095371ce9SMehdi Amini// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 71eab4a199SAlex Zinenko 72eab4a199SAlex Zinenko// CHECK: module { 73eab4a199SAlex Zinenko// CHECK-LABEL: func @parallel_loop_tiled( 74eab4a199SAlex Zinenko// CHECK-SAME: [[VAL_26:%.*]]: index, [[VAL_27:%.*]]: index, [[VAL_28:%.*]]: index, [[VAL_29:%.*]]: index, [[VAL_30:%.*]]: memref<?x?xf32>, [[VAL_31:%.*]]: memref<?x?xf32>) { 75a54f4eaeSMogball// CHECK: [[VAL_32:%.*]] = arith.constant 0 : index 76a54f4eaeSMogball// CHECK: [[VAL_33:%.*]] = arith.constant 1 : index 77a54f4eaeSMogball// CHECK: [[VAL_34:%.*]] = arith.constant 4 : index 78a54f4eaeSMogball// CHECK: [[VAL_35:%.*]] = arith.constant 1 : index 7972d5ac90STres Popp// CHECK: [[VAL_36:%.*]] = affine.apply #[[$MAP0]]([[VAL_28]]){{\[}}[[VAL_26]], [[VAL_34]]] 8072d5ac90STres Popp// CHECK: [[VAL_37:%.*]] = affine.apply #[[$MAP0]]([[VAL_29]]){{\[}}[[VAL_27]], [[VAL_34]]] 8172d5ac90STres Popp// CHECK: [[VAL_38:%.*]] = affine.apply #[[$MAP0]]([[VAL_34]]){{\[}}[[VAL_32]], [[VAL_33]]] 8272d5ac90STres Popp// CHECK: [[VAL_39:%.*]] = affine.apply #[[$MAP0]]([[VAL_34]]){{\[}}[[VAL_32]], [[VAL_33]]] 83eab4a199SAlex Zinenko// CHECK: gpu.launch blocks([[VAL_40:%.*]], [[VAL_41:%.*]], [[VAL_42:%.*]]) in ([[VAL_43:%.*]] = [[VAL_37]], [[VAL_44:%.*]] = [[VAL_36]], [[VAL_45:%.*]] = [[VAL_35]]) threads([[VAL_46:%.*]], [[VAL_47:%.*]], [[VAL_48:%.*]]) in ([[VAL_49:%.*]] = [[VAL_39]], [[VAL_50:%.*]] = [[VAL_38]], [[VAL_51:%.*]] = [[VAL_35]]) { 8495371ce9SMehdi Amini// CHECK: [[VAL_52:%.*]] = affine.apply #[[$MAP1]]([[VAL_41]]){{\[}}[[VAL_34]], [[VAL_26]]] 8595371ce9SMehdi Amini// CHECK: [[VAL_53:%.*]] = affine.apply #[[$MAP1]]([[VAL_40]]){{\[}}[[VAL_34]], [[VAL_27]]] 8695371ce9SMehdi Amini// CHECK: [[VAL_54:%.*]] = affine.apply #[[$MAP1]]([[VAL_47]]){{\[}}[[VAL_33]], [[VAL_32]]] 8795371ce9SMehdi Amini// CHECK: [[VAL_55:%.*]] = affine.apply #[[$MAP1]]([[VAL_46]]){{\[}}[[VAL_33]], [[VAL_32]]] 88a54f4eaeSMogball// CHECK: [[VAL_56:%.*]] = arith.addi [[VAL_52]], [[VAL_54]] : index 89a54f4eaeSMogball// CHECK: [[VAL_57:%.*]] = arith.addi [[VAL_53]], [[VAL_55]] : index 90e2310704SJulian Gross// CHECK: [[VAL_58:%.*]] = memref.load [[VAL_30]]{{\[}}[[VAL_56]], [[VAL_57]]] : memref<?x?xf32> 91e2310704SJulian Gross// CHECK: memref.store [[VAL_58]], [[VAL_31]]{{\[}}[[VAL_57]], [[VAL_56]]] : memref<?x?xf32> 92eab4a199SAlex Zinenko// CHECK: gpu.terminator 93eab4a199SAlex Zinenko// CHECK: } 94eab4a199SAlex Zinenko// CHECK: return 95eab4a199SAlex Zinenko// CHECK: } 96eab4a199SAlex Zinenko// CHECK: } 97eab4a199SAlex Zinenko 98eab4a199SAlex Zinenko// ----- 99eab4a199SAlex Zinenko 100eab4a199SAlex Zinenko// 2-d parallel loop mapped to block.y and sequential 101eab4a199SAlex Zinenko 1023028bf74SRiver Riddlefunc.func @parallel_loop_bidy_seq(%arg0 : index, %arg1 : index, %arg2 : index, 103eab4a199SAlex Zinenko %arg3 : index, %arg4 : index, 104eab4a199SAlex Zinenko %buf : memref<?x?xf32>, 105eab4a199SAlex Zinenko %res : memref<?x?xf32>) { 106a54f4eaeSMogball %step = arith.constant 2 : index 107eab4a199SAlex Zinenko scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 108eab4a199SAlex Zinenko step (%arg4, %step) { 109e2310704SJulian Gross %val = memref.load %buf[%i0, %i1] : memref<?x?xf32> 110e2310704SJulian Gross memref.store %val, %res[%i1, %i0] : memref<?x?xf32> 111eab4a199SAlex Zinenko } { mapping = [ 112*7bdd3722SMogball #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 113*7bdd3722SMogball #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)> 114eab4a199SAlex Zinenko ] } 115eab4a199SAlex Zinenko return 116eab4a199SAlex Zinenko} 117eab4a199SAlex Zinenko 11872d5ac90STres Popp// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)> 11995371ce9SMehdi Amini// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 120eab4a199SAlex Zinenko 121eab4a199SAlex Zinenko// CHECK: module { 122eab4a199SAlex Zinenko// CHECK-LABEL: func @parallel_loop_bidy_seq( 123eab4a199SAlex Zinenko// CHECK-SAME: [[VAL_59:%.*]]: index, [[VAL_60:%.*]]: index, [[VAL_61:%.*]]: index, [[VAL_62:%.*]]: index, [[VAL_63:%.*]]: index, [[VAL_64:%.*]]: memref<?x?xf32>, [[VAL_65:%.*]]: memref<?x?xf32>) { 124a54f4eaeSMogball// CHECK: [[VAL_66:%.*]] = arith.constant 2 : index 125a54f4eaeSMogball// CHECK: [[VAL_67:%.*]] = arith.constant 1 : index 12672d5ac90STres Popp// CHECK: [[VAL_68:%.*]] = affine.apply #[[$MAP0]]([[VAL_61]]){{\[}}[[VAL_59]], [[VAL_63]]] 127eab4a199SAlex Zinenko// CHECK: gpu.launch blocks([[VAL_69:%.*]], [[VAL_70:%.*]], [[VAL_71:%.*]]) in ([[VAL_72:%.*]] = [[VAL_67]], [[VAL_73:%.*]] = [[VAL_68]], [[VAL_74:%.*]] = [[VAL_67]]) threads([[VAL_75:%.*]], [[VAL_76:%.*]], [[VAL_77:%.*]]) in ([[VAL_78:%.*]] = [[VAL_67]], [[VAL_79:%.*]] = [[VAL_67]], [[VAL_80:%.*]] = [[VAL_67]]) { 12895371ce9SMehdi Amini// CHECK: [[VAL_81:%.*]] = affine.apply #[[$MAP1]]([[VAL_70]]){{\[}}[[VAL_63]], [[VAL_59]]] 129eab4a199SAlex Zinenko// CHECK: scf.for [[VAL_82:%.*]] = [[VAL_60]] to [[VAL_62]] step [[VAL_66]] { 130e2310704SJulian Gross// CHECK: [[VAL_83:%.*]] = memref.load [[VAL_64]]{{\[}}[[VAL_81]], [[VAL_82]]] : memref<?x?xf32> 131e2310704SJulian Gross// CHECK: memref.store [[VAL_83]], [[VAL_65]]{{\[}}[[VAL_82]], [[VAL_81]]] : memref<?x?xf32> 132eab4a199SAlex Zinenko// CHECK: } 133eab4a199SAlex Zinenko// CHECK: gpu.terminator 134eab4a199SAlex Zinenko// CHECK: } 135eab4a199SAlex Zinenko// CHECK: return 136eab4a199SAlex Zinenko// CHECK: } 137eab4a199SAlex Zinenko// CHECK: } 138eab4a199SAlex Zinenko 139eab4a199SAlex Zinenko// ----- 140eab4a199SAlex Zinenko 141eab4a199SAlex Zinenko// tiled 2-d parallel loop mapped to block.y and seq. and thread.y and seq. 142eab4a199SAlex Zinenko 1433028bf74SRiver Riddlefunc.func @parallel_loop_tiled_seq(%arg0 : index, %arg1 : index, %arg2 : index, 144eab4a199SAlex Zinenko %arg3 : index, 145eab4a199SAlex Zinenko %buf : memref<?x?xf32>, 146eab4a199SAlex Zinenko %res : memref<?x?xf32>) { 147a54f4eaeSMogball %zero = arith.constant 0 : index 148a54f4eaeSMogball %one = arith.constant 1 : index 149a54f4eaeSMogball %four = arith.constant 4 : index 150eab4a199SAlex Zinenko scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 151eab4a199SAlex Zinenko step (%four, %four) { 152eab4a199SAlex Zinenko scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four) 153eab4a199SAlex Zinenko step (%one, %one) { 154a54f4eaeSMogball %idx0 = arith.addi %i0, %si0 : index 155a54f4eaeSMogball %idx1 = arith.addi %i1, %si1 : index 156e2310704SJulian Gross %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32> 157e2310704SJulian Gross memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32> 158eab4a199SAlex Zinenko } { mapping = [ 159*7bdd3722SMogball #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 160*7bdd3722SMogball #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)> 161eab4a199SAlex Zinenko ] } 162eab4a199SAlex Zinenko } { mapping = [ 163*7bdd3722SMogball #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 164*7bdd3722SMogball #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)> 165eab4a199SAlex Zinenko ] } 166eab4a199SAlex Zinenko return 167eab4a199SAlex Zinenko} 168eab4a199SAlex Zinenko 16972d5ac90STres Popp// CHECK: #[[$MAP0:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)> 17095371ce9SMehdi Amini// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 171eab4a199SAlex Zinenko 172eab4a199SAlex Zinenko// CHECK: module { 173eab4a199SAlex Zinenko// CHECK-LABEL: func @parallel_loop_tiled_seq( 174eab4a199SAlex Zinenko// CHECK-SAME: [[VAL_84:%.*]]: index, [[VAL_85:%.*]]: index, [[VAL_86:%.*]]: index, [[VAL_87:%.*]]: index, [[VAL_88:%.*]]: memref<?x?xf32>, [[VAL_89:%.*]]: memref<?x?xf32>) { 175a54f4eaeSMogball// CHECK: [[VAL_90:%.*]] = arith.constant 0 : index 176a54f4eaeSMogball// CHECK: [[VAL_91:%.*]] = arith.constant 1 : index 177a54f4eaeSMogball// CHECK: [[VAL_92:%.*]] = arith.constant 4 : index 178a54f4eaeSMogball// CHECK: [[VAL_93:%.*]] = arith.constant 1 : index 17972d5ac90STres Popp// CHECK: [[VAL_94:%.*]] = affine.apply #[[$MAP0]]([[VAL_86]]){{\[}}[[VAL_84]], [[VAL_92]]] 18072d5ac90STres Popp// CHECK: [[VAL_95:%.*]] = affine.apply #[[$MAP0]]([[VAL_92]]){{\[}}[[VAL_90]], [[VAL_91]]] 181eab4a199SAlex Zinenko// CHECK: gpu.launch blocks([[VAL_96:%.*]], [[VAL_97:%.*]], [[VAL_98:%.*]]) in ([[VAL_99:%.*]] = [[VAL_93]], [[VAL_100:%.*]] = [[VAL_94]], [[VAL_101:%.*]] = [[VAL_93]]) threads([[VAL_102:%.*]], [[VAL_103:%.*]], [[VAL_104:%.*]]) in ([[VAL_105:%.*]] = [[VAL_93]], [[VAL_106:%.*]] = [[VAL_95]], [[VAL_107:%.*]] = [[VAL_93]]) { 18295371ce9SMehdi Amini// CHECK: [[VAL_108:%.*]] = affine.apply #[[$MAP1]]([[VAL_97]]){{\[}}[[VAL_92]], [[VAL_84]]] 183eab4a199SAlex Zinenko// CHECK: scf.for [[VAL_109:%.*]] = [[VAL_85]] to [[VAL_87]] step [[VAL_92]] { 18495371ce9SMehdi Amini// CHECK: [[VAL_110:%.*]] = affine.apply #[[$MAP1]]([[VAL_103]]){{\[}}[[VAL_91]], [[VAL_90]]] 185eab4a199SAlex Zinenko// CHECK: scf.for [[VAL_111:%.*]] = [[VAL_90]] to [[VAL_92]] step [[VAL_91]] { 186a54f4eaeSMogball// CHECK: [[VAL_112:%.*]] = arith.addi [[VAL_108]], [[VAL_110]] : index 187a54f4eaeSMogball// CHECK: [[VAL_113:%.*]] = arith.addi [[VAL_109]], [[VAL_111]] : index 188e2310704SJulian Gross// CHECK: [[VAL_114:%.*]] = memref.load [[VAL_88]]{{\[}}[[VAL_112]], [[VAL_113]]] : memref<?x?xf32> 189e2310704SJulian Gross// CHECK: memref.store [[VAL_114]], [[VAL_89]]{{\[}}[[VAL_113]], [[VAL_112]]] : memref<?x?xf32> 190eab4a199SAlex Zinenko// CHECK: } 191eab4a199SAlex Zinenko// CHECK: } 192eab4a199SAlex Zinenko// CHECK: gpu.terminator 193eab4a199SAlex Zinenko// CHECK: } 194eab4a199SAlex Zinenko// CHECK: return 195eab4a199SAlex Zinenko// CHECK: } 196eab4a199SAlex Zinenko// CHECK: } 197eab4a199SAlex Zinenko 198eab4a199SAlex Zinenko// ----- 199eab4a199SAlex Zinenko 200eab4a199SAlex Zinenko#map0 = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 201eab4a199SAlex Zinenko#map1 = affine_map<(d0)[s0] -> (2, -d0 + s0)> 202eab4a199SAlex Zinenko#map2 = affine_map<(d0)[s0] -> (3, -d0 + s0)> 203eab4a199SAlex Zinenko#map3 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> 204eab4a199SAlex Zinenko 205eab4a199SAlex Zinenkomodule { 2063028bf74SRiver Riddle func.func @sum(%arg0: memref<?x?xf32, #map0>, %arg1: memref<?x?xf32, #map0>, %arg2: memref<?x?xf32, #map0>) { 207a54f4eaeSMogball %c1 = arith.constant 1 : index 208a54f4eaeSMogball %c0 = arith.constant 0 : index 209a54f4eaeSMogball %c3 = arith.constant 3 : index 210a54f4eaeSMogball %c2 = arith.constant 2 : index 211e2310704SJulian Gross %0 = memref.dim %arg0, %c0 : memref<?x?xf32, #map0> 212e2310704SJulian Gross %1 = memref.dim %arg0, %c1 : memref<?x?xf32, #map0> 213eab4a199SAlex Zinenko scf.parallel (%arg3, %arg4) = (%c0, %c0) to (%0, %1) step (%c2, %c3) { 214e2310704SJulian Gross %2 = memref.dim %arg0, %c0 : memref<?x?xf32, #map0> 215eab4a199SAlex Zinenko %3 = affine.min #map1(%arg3)[%2] 216a54f4eaeSMogball %squared_min = arith.muli %3, %3 : index 217e2310704SJulian Gross %4 = memref.dim %arg0, %c1 : memref<?x?xf32, #map0> 218eab4a199SAlex Zinenko %5 = affine.min #map2(%arg4)[%4] 219e2310704SJulian Gross %6 = memref.subview %arg0[%arg3, %arg4][%squared_min, %5][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3> 220e2310704SJulian Gross %7 = memref.dim %arg1, %c0 : memref<?x?xf32, #map0> 221eab4a199SAlex Zinenko %8 = affine.min #map1(%arg3)[%7] 222e2310704SJulian Gross %9 = memref.dim %arg1, %c1 : memref<?x?xf32, #map0> 223eab4a199SAlex Zinenko %10 = affine.min #map2(%arg4)[%9] 224e2310704SJulian Gross %11 = memref.subview %arg1[%arg3, %arg4][%8, %10][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3> 225e2310704SJulian Gross %12 = memref.dim %arg2, %c0 : memref<?x?xf32, #map0> 226eab4a199SAlex Zinenko %13 = affine.min #map1(%arg3)[%12] 227e2310704SJulian Gross %14 = memref.dim %arg2, %c1 : memref<?x?xf32, #map0> 228eab4a199SAlex Zinenko %15 = affine.min #map2(%arg4)[%14] 229e2310704SJulian Gross %16 = memref.subview %arg2[%arg3, %arg4][%13, %15][%c1, %c1] : memref<?x?xf32, #map0> to memref<?x?xf32, #map3> 230eab4a199SAlex Zinenko scf.parallel (%arg5, %arg6) = (%c0, %c0) to (%squared_min, %5) step (%c1, %c1) { 231e2310704SJulian Gross %17 = memref.load %6[%arg5, %arg6] : memref<?x?xf32, #map3> 232e2310704SJulian Gross %18 = memref.load %11[%arg5, %arg6] : memref<?x?xf32, #map3> 233e2310704SJulian Gross %19 = memref.load %16[%arg5, %arg6] : memref<?x?xf32, #map3> 234a54f4eaeSMogball %20 = arith.addf %17, %18 : f32 235e2310704SJulian Gross memref.store %20, %16[%arg5, %arg6] : memref<?x?xf32, #map3> 236eab4a199SAlex Zinenko scf.yield 237*7bdd3722SMogball } {mapping = [#gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = thread_x>, #gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = thread_y>]} 238eab4a199SAlex Zinenko scf.yield 239*7bdd3722SMogball } {mapping = [#gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = block_x>, #gpu.loop_dim_map<bound = (d0) -> (d0), map = (d0) -> (d0), processor = block_y>]} 240eab4a199SAlex Zinenko return 241eab4a199SAlex Zinenko } 242eab4a199SAlex Zinenko} 243eab4a199SAlex Zinenko 24495371ce9SMehdi Amini// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 24572d5ac90STres Popp// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)> 24695371ce9SMehdi Amini// CHECK: #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> 24795371ce9SMehdi Amini// CHECK: #[[$MAP3:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)> 24895371ce9SMehdi Amini// CHECK: #[[$MAP4:.*]] = affine_map<(d0)[s0] -> (3, -d0 + s0)> 24995371ce9SMehdi Amini// CHECK: #[[$MAP5:.*]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> 250eab4a199SAlex Zinenko 251eab4a199SAlex Zinenko// CHECK: module { 252eab4a199SAlex Zinenko// CHECK-LABEL: func @sum( 25395371ce9SMehdi Amini// CHECK-SAME: [[VAL_0:%.*]]: memref<?x?xf32, #[[$MAP0]]>, [[VAL_1:%.*]]: memref<?x?xf32, #[[$MAP0]]>, [[VAL_2:%.*]]: memref<?x?xf32, #[[$MAP0]]>) { 254a54f4eaeSMogball// CHECK: %[[C1:.*]] = arith.constant 1 : index 255a54f4eaeSMogball// CHECK: %[[C0:.*]] = arith.constant 0 : index 256a54f4eaeSMogball// CHECK: %[[C3:.*]] = arith.constant 3 : index 257a54f4eaeSMogball// CHECK: %[[C2:.*]] = arith.constant 2 : index 258e2310704SJulian Gross// CHECK: [[VAL_7:%.*]] = memref.dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]> 259e2310704SJulian Gross// CHECK: [[VAL_8:%.*]] = memref.dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]> 260a54f4eaeSMogball// CHECK: [[VAL_9:%.*]] = arith.constant 1 : index 26172d5ac90STres Popp// CHECK: [[VAL_10:%.*]] = affine.apply #[[$MAP1]]([[VAL_7]]){{\[}}%[[C0]], %[[C2]]] 26272d5ac90STres Popp// CHECK: [[VAL_11:%.*]] = affine.apply #[[$MAP1]]([[VAL_8]]){{\[}}%[[C0]], %[[C3]]] 263a54f4eaeSMogball// CHECK: [[VAL_12:%.*]] = arith.constant 4 : index 26472d5ac90STres Popp// CHECK: [[VAL_13:%.*]] = affine.apply #[[$MAP1]]([[VAL_12]]){{\[}}%[[C0]], %[[C1]]] 265a54f4eaeSMogball// CHECK: [[VAL_14:%.*]] = arith.constant 3 : index 26672d5ac90STres Popp// CHECK: [[VAL_15:%.*]] = affine.apply #[[$MAP1]]([[VAL_14]]){{\[}}%[[C0]], %[[C1]]] 267eab4a199SAlex Zinenko// CHECK: gpu.launch blocks([[VAL_16:%.*]], [[VAL_17:%.*]], [[VAL_18:%.*]]) in ([[VAL_19:%.*]] = [[VAL_10]], [[VAL_20:%.*]] = [[VAL_11]], [[VAL_21:%.*]] = [[VAL_9]]) threads([[VAL_22:%.*]], [[VAL_23:%.*]], [[VAL_24:%.*]]) in ([[VAL_25:%.*]] = [[VAL_13]], [[VAL_26:%.*]] = [[VAL_15]], [[VAL_27:%.*]] = [[VAL_9]]) { 26895371ce9SMehdi Amini// CHECK: [[VAL_28:%.*]] = affine.apply #[[$MAP2]]([[VAL_16]]){{\[}}%[[C2]], %[[C0]]] 26995371ce9SMehdi Amini// CHECK: [[VAL_29:%.*]] = affine.apply #[[$MAP2]]([[VAL_17]]){{\[}}%[[C3]], %[[C0]]] 270e2310704SJulian Gross// CHECK: [[VAL_30:%.*]] = memref.dim [[VAL_0]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]> 27195371ce9SMehdi Amini// CHECK: [[VAL_31:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_30]]] 272a54f4eaeSMogball// CHECK: [[VAL_31_SQUARED:%.*]] = arith.muli [[VAL_31]], [[VAL_31]] : index 273e2310704SJulian Gross// CHECK: [[VAL_32:%.*]] = memref.dim [[VAL_0]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]> 27495371ce9SMehdi Amini// CHECK: [[VAL_33:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_32]]] 275e2310704SJulian Gross// CHECK: [[VAL_34:%.*]] = memref.subview [[VAL_0]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_31_SQUARED]], [[VAL_33]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]> 276e2310704SJulian Gross// CHECK: [[VAL_35:%.*]] = memref.dim [[VAL_1]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]> 27795371ce9SMehdi Amini// CHECK: [[VAL_36:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_35]]] 278e2310704SJulian Gross// CHECK: [[VAL_37:%.*]] = memref.dim [[VAL_1]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]> 27995371ce9SMehdi Amini// CHECK: [[VAL_38:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_37]]] 280e2310704SJulian Gross// CHECK: [[VAL_39:%.*]] = memref.subview [[VAL_1]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_36]], [[VAL_38]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]> 281e2310704SJulian Gross// CHECK: [[VAL_40:%.*]] = memref.dim [[VAL_2]], %[[C0]] : memref<?x?xf32, #[[$MAP0]]> 28295371ce9SMehdi Amini// CHECK: [[VAL_41:%.*]] = affine.min #[[$MAP3]]([[VAL_28]]){{\[}}[[VAL_40]]] 283e2310704SJulian Gross// CHECK: [[VAL_42:%.*]] = memref.dim [[VAL_2]], %[[C1]] : memref<?x?xf32, #[[$MAP0]]> 28495371ce9SMehdi Amini// CHECK: [[VAL_43:%.*]] = affine.min #[[$MAP4]]([[VAL_29]]){{\[}}[[VAL_42]]] 285e2310704SJulian Gross// CHECK: [[VAL_44:%.*]] = memref.subview [[VAL_2]]{{\[}}[[VAL_28]], [[VAL_29]]] {{\[}}[[VAL_41]], [[VAL_43]]] {{\[}}%[[C1]], %[[C1]]] : memref<?x?xf32, #[[$MAP0]]> to memref<?x?xf32, #[[$MAP5]]> 28695371ce9SMehdi Amini// CHECK: [[VAL_45:%.*]] = affine.apply #[[$MAP2]]([[VAL_22]]){{\[}}%[[C1]], %[[C0]]] 287a54f4eaeSMogball// CHECK: [[VAL_46:%.*]] = arith.cmpi slt, [[VAL_45]], [[VAL_31_SQUARED]] : index 288eab4a199SAlex Zinenko// CHECK: scf.if [[VAL_46]] { 28995371ce9SMehdi Amini// CHECK: [[VAL_47:%.*]] = affine.apply #[[$MAP2]]([[VAL_23]]){{\[}}%[[C1]], %[[C0]]] 290a54f4eaeSMogball// CHECK: [[VAL_48:%.*]] = arith.cmpi slt, [[VAL_47]], [[VAL_33]] : index 291eab4a199SAlex Zinenko// CHECK: scf.if [[VAL_48]] { 292e2310704SJulian Gross// CHECK: [[VAL_49:%.*]] = memref.load [[VAL_34]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]> 293e2310704SJulian Gross// CHECK: [[VAL_50:%.*]] = memref.load [[VAL_39]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]> 294e2310704SJulian Gross// CHECK: [[VAL_51:%.*]] = memref.load [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]> 295a54f4eaeSMogball// CHECK: [[VAL_52:%.*]] = arith.addf [[VAL_49]], [[VAL_50]] : f32 296e2310704SJulian Gross// CHECK: memref.store [[VAL_52]], [[VAL_44]]{{\[}}[[VAL_45]], [[VAL_47]]] : memref<?x?xf32, #[[$MAP5]]> 297eab4a199SAlex Zinenko// CHECK: } 298eab4a199SAlex Zinenko// CHECK: } 299eab4a199SAlex Zinenko// CHECK: gpu.terminator 300eab4a199SAlex Zinenko// CHECK: } 301eab4a199SAlex Zinenko// CHECK: return 302eab4a199SAlex Zinenko// CHECK: } 303eab4a199SAlex Zinenko// CHECK: } 304eab4a199SAlex Zinenko 305eab4a199SAlex Zinenko// ----- 306eab4a199SAlex Zinenko 307396e7f45SArtur Bialas// Optional attribute lowering test 308396e7f45SArtur Bialas 3093028bf74SRiver Riddlefunc.func @parallel_loop_optional_attr() { 310a54f4eaeSMogball %c0 = arith.constant 0 : index 311a54f4eaeSMogball %c1 = arith.constant 1 : index 312396e7f45SArtur Bialas scf.parallel (%i0) = (%c0) to (%c1) step (%c1) { 313*7bdd3722SMogball } { mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>], optional_attr = 1 } 314396e7f45SArtur Bialas // CHECK: optional_attr = 1 315396e7f45SArtur Bialas return 316396e7f45SArtur Bialas} 317396e7f45SArtur Bialas 318396e7f45SArtur Bialas// ----- 319396e7f45SArtur Bialas 3205da2423bSStephan Herhut// Mapping to the same processor twice. Cannot be mapped. 321eab4a199SAlex Zinenko 3223028bf74SRiver Riddlefunc.func @parallel_double_map(%arg0 : index, %arg1 : index, %arg2 : index, 323eab4a199SAlex Zinenko %arg3 : index, 324eab4a199SAlex Zinenko %buf : memref<?x?xf32>, 325eab4a199SAlex Zinenko %res : memref<?x?xf32>) { 326a54f4eaeSMogball %four = arith.constant 4 : index 327eab4a199SAlex Zinenko scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 328eab4a199SAlex Zinenko step (%four, %four) { 329eab4a199SAlex Zinenko } { mapping = [ 330*7bdd3722SMogball #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 331*7bdd3722SMogball #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)> 332eab4a199SAlex Zinenko ] } 333eab4a199SAlex Zinenko return 334eab4a199SAlex Zinenko} 335eab4a199SAlex Zinenko 3365da2423bSStephan Herhut// CHECK-LABEL: @parallel_double_map 3375da2423bSStephan Herhut// CHECK: scf.parallel 3385da2423bSStephan Herhut 339eab4a199SAlex Zinenko// ----- 340eab4a199SAlex Zinenko 3415da2423bSStephan Herhut// Loop with loop-variant upper bound. Cannot be mapped. 342eab4a199SAlex Zinenko 3433028bf74SRiver Riddlefunc.func @parallel_loop_loop_variant_bound(%arg0 : index, %arg1 : index, %arg2 : index, 344eab4a199SAlex Zinenko %arg3 : index, 345eab4a199SAlex Zinenko %buf : memref<?x?xf32>, 346eab4a199SAlex Zinenko %res : memref<?x?xf32>) { 347a54f4eaeSMogball %zero = arith.constant 0 : index 348a54f4eaeSMogball %one = arith.constant 1 : index 349a54f4eaeSMogball %four = arith.constant 4 : index 350eab4a199SAlex Zinenko scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 351eab4a199SAlex Zinenko step (%four, %four) { 352eab4a199SAlex Zinenko scf.parallel (%si0, %si1) = (%zero, %zero) to (%i0, %i1) 353eab4a199SAlex Zinenko step (%one, %one) { 354a54f4eaeSMogball %idx0 = arith.addi %i0, %si0 : index 355a54f4eaeSMogball %idx1 = arith.addi %i1, %si1 : index 356e2310704SJulian Gross %val = memref.load %buf[%idx0, %idx1] : memref<?x?xf32> 357e2310704SJulian Gross memref.store %val, %res[%idx1, %idx0] : memref<?x?xf32> 358eab4a199SAlex Zinenko } { mapping = [ 359*7bdd3722SMogball #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 360*7bdd3722SMogball #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)> 361eab4a199SAlex Zinenko ] } 362eab4a199SAlex Zinenko } { mapping = [ 363*7bdd3722SMogball #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 364*7bdd3722SMogball #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)> 365eab4a199SAlex Zinenko ] } 366eab4a199SAlex Zinenko return 367eab4a199SAlex Zinenko} 3685da2423bSStephan Herhut 3695da2423bSStephan Herhut// CHECK-LABEL: @parallel_loop_loop_variant_bound 3705da2423bSStephan Herhut// CHECK: scf.parallel 3715da2423bSStephan Herhut// CHECK: scf.parallel 3725da2423bSStephan Herhut 3735da2423bSStephan Herhut// ----- 3745da2423bSStephan Herhut 3755da2423bSStephan Herhut// Loop without annotations. Cannot be mapped. 3765da2423bSStephan Herhut 3773028bf74SRiver Riddlefunc.func @parallel_no_annotations(%arg0 : index, %arg1 : index, %arg2 : index, 3785da2423bSStephan Herhut %arg3 : index, 3795da2423bSStephan Herhut %buf : memref<?x?xf32>, 3805da2423bSStephan Herhut %res : memref<?x?xf32>) { 381a54f4eaeSMogball %four = arith.constant 4 : index 3825da2423bSStephan Herhut scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 3835da2423bSStephan Herhut step (%four, %four) { 3845da2423bSStephan Herhut } 3855da2423bSStephan Herhut return 3865da2423bSStephan Herhut} 3875da2423bSStephan Herhut 3885da2423bSStephan Herhut// CHECK-LABEL: @parallel_no_annotations 3895da2423bSStephan Herhut// CHECK: scf.parallel 390