1// RUN: mlir-opt %s -test-scf-pipelining -split-input-file | FileCheck %s 2// RUN: mlir-opt %s -test-scf-pipelining=annotate -split-input-file | FileCheck %s --check-prefix ANNOTATE 3// RUN: mlir-opt %s -test-scf-pipelining=no-epilogue-peeling -split-input-file | FileCheck %s --check-prefix NOEPILOGUE 4 5// CHECK-LABEL: simple_pipeline( 6// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 7// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 8// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 9// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 10// Prologue: 11// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 12// Kernel: 13// CHECK-NEXT: %[[L1:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 14// CHECK-SAME: step %[[C1]] iter_args(%[[LARG:.*]] = %[[L0]]) -> (f32) { 15// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG]], %{{.*}} : f32 16// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 17// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 18// CHECK-NEXT: %[[LR:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 19// CHECK-NEXT: scf.yield %[[LR]] : f32 20// CHECK-NEXT: } 21// Epilogue: 22// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L1]], %{{.*}} : f32 23// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C3]]] : memref<?xf32> 24func.func @simple_pipeline(%A: memref<?xf32>, %result: memref<?xf32>) { 25 %c0 = arith.constant 0 : index 26 %c1 = arith.constant 1 : index 27 %c4 = arith.constant 4 : index 28 %cf = arith.constant 1.0 : f32 29 scf.for %i0 = %c0 to %c4 step %c1 { 30 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 31 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 32 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : memref<?xf32> 33 } { __test_pipelining_loop__ } 34 return 35} 36 37// ----- 38 39// CHECK-LABEL: simple_pipeline_step( 40// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 41// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 42// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 43// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index 44// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index 45// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 46// Prologue: 47// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 48// CHECK: %[[L1:.*]] = memref.load %[[A]][%[[C3]]] : memref<?xf32> 49// Kernel: 50// CHECK-NEXT: %[[L2:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C5]] 51// CHECK-SAME: step %[[C3]] iter_args(%[[LARG0:.*]] = %[[L0]], %[[LARG1:.*]] = %[[L1]]) -> (f32, f32) { 52// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG0]], %{{.*}} : f32 53// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 54// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C6]] : index 55// CHECK-NEXT: %[[LR:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 56// CHECK-NEXT: scf.yield %[[LARG1]], %[[LR]] : f32, f32 57// CHECK-NEXT: } 58// Epilogue: 59// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L2]]#0, %{{.*}} : f32 60// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C6]]] : memref<?xf32> 61// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[L2]]#1, %{{.*}} : f32 62// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C9]]] : memref<?xf32> 63func.func @simple_pipeline_step(%A: memref<?xf32>, %result: memref<?xf32>) { 64 %c0 = arith.constant 0 : index 65 %c3 = arith.constant 3 : index 66 %c11 = arith.constant 11 : index 67 %cf = arith.constant 1.0 : f32 68 scf.for %i0 = %c0 to %c11 step %c3 { 69 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 70 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 71 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } : memref<?xf32> 72 } { __test_pipelining_loop__ } 73 return 74} 75 76// ----- 77 78// CHECK-LABEL: three_stage( 79// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 80// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 81// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 82// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 83// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 84// Prologue: 85// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 86// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 87// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 88// Kernel: 89// CHECK-NEXT: %[[LR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C2]] 90// CHECK-SAME: step %[[C1]] iter_args(%[[ADDARG:.*]] = %[[ADD0]], 91// CHECK-SAME: %[[LARG:.*]] = %[[L1]]) -> (f32, f32) { 92// CHECK-NEXT: memref.store %[[ADDARG]], %[[R]][%[[IV]]] : memref<?xf32> 93// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LARG]], %{{.*}} : f32 94// CHECK-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 95// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 96// CHECK-NEXT: scf.yield %[[ADD1]], %[[L3]] : f32, f32 97// CHECK-NEXT: } 98// Epilogue: 99// CHECK-NEXT: memref.store %[[LR]]#0, %[[R]][%[[C2]]] : memref<?xf32> 100// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 101// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C3]]] : memref<?xf32> 102 103// Prologue: 104// ANNOTATE: memref.load {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "prologue"} 105// ANNOTATE: memref.load {{.*}} {__test_pipelining_iteration = 1 : i32, __test_pipelining_part = "prologue"} 106// Kernel: 107// ANNOTATE: scf.for 108// ANNOTATE: memref.store {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "kernel"} 109// ANNOTATE: arith.addf {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "kernel"} 110// ANNOTATE: memref.load {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "kernel"} 111// ANNOTATE: scf.yield 112// ANNOTATE: } 113// Epilogue: 114// ANNOTATE: memref.store {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "epilogue"} 115// ANNOTATE: arith.addf {{.*}} {__test_pipelining_iteration = 0 : i32, __test_pipelining_part = "epilogue"} 116// ANNOTATE: memref.store {{.*}} {__test_pipelining_iteration = 1 : i32, __test_pipelining_part = "epilogue"} 117 118// NOEPILOGUE-LABEL: three_stage( 119// NOEPILOGUE-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 120// NOEPILOGUE-DAG: %[[C0:.*]] = arith.constant 0 : index 121// NOEPILOGUE-DAG: %[[C1:.*]] = arith.constant 1 : index 122// NOEPILOGUE-DAG: %[[C2:.*]] = arith.constant 2 : index 123// NOEPILOGUE-DAG: %[[C3:.*]] = arith.constant 3 : index 124// NOEPILOGUE-DAG: %[[C4:.*]] = arith.constant 4 : index 125// NOEPILOGUE-DAG: %[[CF:.*]] = arith.constant 0.000000e+00 : f32 126// Prologue: 127// NOEPILOGUE: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 128// NOEPILOGUE-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 129// NOEPILOGUE-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 130// Kernel: 131// NOEPILOGUE-NEXT: %[[LR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C4]] 132// NOEPILOGUE-SAME: step %[[C1]] iter_args(%[[ADDARG:.*]] = %[[ADD0]], 133// NOEPILOGUE-SAME: %[[LARG:.*]] = %[[L1]]) -> (f32, f32) { 134// NOEPILOGUE-DAG: %[[S0:.*]] = arith.cmpi slt, %[[IV]], %[[C2]] : index 135// NOEPILOGUE-DAG: %[[S1:.*]] = arith.cmpi slt, %[[IV]], %[[C3]] : index 136// NOEPILOGUE-NEXT: memref.store %[[ADDARG]], %[[R]][%[[IV]]] : memref<?xf32> 137// NOEPILOGUE-NEXT: %[[ADD1:.*]] = scf.if %[[S1]] -> (f32) { 138// NOEPILOGUE-NEXT: %[[PADD:.*]] = arith.addf %[[LARG]], %{{.*}} : f32 139// NOEPILOGUE-NEXT: scf.yield %[[PADD]] : f32 140// NOEPILOGUE-NEXT: } else { 141// NOEPILOGUE-NEXT: scf.yield %[[CF]] : f32 142// NOEPILOGUE-NEXT: } 143// NOEPILOGUE-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 144// NOEPILOGUE-NEXT: %[[L3:.*]] = scf.if %[[S0]] -> (f32) { 145// NOEPILOGUE-NEXT: %[[PL:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 146// NOEPILOGUE-NEXT: scf.yield %[[PL]] : f32 147// NOEPILOGUE-NEXT: } else { 148// NOEPILOGUE-NEXT: scf.yield %[[CF]] : f32 149// NOEPILOGUE-NEXT: } 150// NOEPILOGUE-NEXT: scf.yield %[[ADD1]], %[[L3]] : f32, f32 151// NOEPILOGUE-NEXT: } 152// No epilogue should be generated. 153// NOEPILOGUE-NOT: memref.store 154// NOEPILOGUE: return 155 156func.func @three_stage(%A: memref<?xf32>, %result: memref<?xf32>) { 157 %c0 = arith.constant 0 : index 158 %c1 = arith.constant 1 : index 159 %c4 = arith.constant 4 : index 160 %cf = arith.constant 1.0 : f32 161 scf.for %i0 = %c0 to %c4 step %c1 { 162 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 163 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 164 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : memref<?xf32> 165 } { __test_pipelining_loop__ } 166 return 167} 168 169// ----- 170// CHECK-LABEL: long_liverange( 171// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 172// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 173// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 174// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 175// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 176// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index 177// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index 178// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index 179// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 180// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 181// Prologue: 182// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 183// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 184// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[C2]]] : memref<?xf32> 185// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[C3]]] : memref<?xf32> 186// Kernel: 187// CHECK-NEXT: %[[LR:.*]]:4 = scf.for %[[IV:.*]] = %[[C0]] to %[[C6]] 188// CHECK-SAME: step %[[C1]] iter_args(%[[LA0:.*]] = %[[L0]], 189// CHECK-SAME: %[[LA1:.*]] = %[[L1]], %[[LA2:.*]] = %[[L2]], 190// CHECK-SAME: %[[LA3:.*]] = %[[L3]]) -> (f32, f32, f32, f32) { 191// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LA0]], %{{.*}} : f32 192// CHECK-NEXT: memref.store %[[ADD0]], %[[R]][%[[IV]]] : memref<?xf32> 193// CHECK-NEXT: %[[IV4:.*]] = arith.addi %[[IV]], %[[C4]] : index 194// CHECK-NEXT: %[[L4:.*]] = memref.load %[[A]][%[[IV4]]] : memref<?xf32> 195// CHECK-NEXT: scf.yield %[[LA1]], %[[LA2]], %[[LA3]], %[[L4]] : f32, f32, f32, f32 196// CHECK-NEXT: } 197// Epilogue: 198// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LR]]#0, %{{.*}} : f32 199// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C6]]] : memref<?xf32> 200// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 201// CHECK-NEXT: memref.store %[[ADD2]], %[[R]][%[[C7]]] : memref<?xf32> 202// CHECK-NEXT: %[[ADD3:.*]] = arith.addf %[[LR]]#2, %{{.*}} : f32 203// CHECK-NEXT: memref.store %[[ADD3]], %[[R]][%[[C8]]] : memref<?xf32> 204// CHECK-NEXT: %[[ADD4:.*]] = arith.addf %[[LR]]#3, %{{.*}} : f32 205// CHECK-NEXT: memref.store %[[ADD4]], %[[R]][%[[C9]]] : memref<?xf32> 206func.func @long_liverange(%A: memref<?xf32>, %result: memref<?xf32>) { 207 %c0 = arith.constant 0 : index 208 %c1 = arith.constant 1 : index 209 %c10 = arith.constant 10 : index 210 %cf = arith.constant 1.0 : f32 211 scf.for %i0 = %c0 to %c10 step %c1 { 212 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 213 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 4, __test_pipelining_op_order__ = 0 } : f32 214 memref.store %A1_elem, %result[%i0] { __test_pipelining_stage__ = 4, __test_pipelining_op_order__ = 1 } : memref<?xf32> 215 } { __test_pipelining_loop__ } 216 return 217} 218 219// ----- 220 221// CHECK-LABEL: multiple_uses( 222// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 223// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 224// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 225// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 226// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 227// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index 228// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 229// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index 230// Prologue: 231// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 232// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %{{.*}} : f32 233// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 234// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[L1]], %{{.*}} : f32 235// CHECK-NEXT: %[[MUL0:.*]] = arith.mulf %[[ADD0]], %[[L0]] : f32 236// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[C2]]] : memref<?xf32> 237// Kernel: 238// CHECK-NEXT: %[[LR:.*]]:4 = scf.for %[[IV:.*]] = %[[C0]] to %[[C7]] 239// CHECK-SAME: step %[[C1]] iter_args(%[[LA1:.*]] = %[[L1]], 240// CHECK-SAME: %[[LA2:.*]] = %[[L2]], %[[ADDARG1:.*]] = %[[ADD1]], 241// CHECK-SAME: %[[MULARG0:.*]] = %[[MUL0]]) -> (f32, f32, f32, f32) { 242// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[LA2]], %{{.*}} : f32 243// CHECK-NEXT: %[[MUL1:.*]] = arith.mulf %[[ADDARG1]], %[[LA1]] : f32 244// CHECK-NEXT: memref.store %[[MULARG0]], %[[R]][%[[IV]]] : memref<?xf32> 245// CHECK-NEXT: %[[IV3:.*]] = arith.addi %[[IV]], %[[C3]] : index 246// CHECK-NEXT: %[[L3:.*]] = memref.load %[[A]][%[[IV3]]] : memref<?xf32> 247// CHECK-NEXT: scf.yield %[[LA2]], %[[L3]], %[[ADD2]], %[[MUL1]] : f32, f32, f32, f32 248// CHECK-NEXT: } 249// Epilogue: 250// CHECK-NEXT: %[[ADD3:.*]] = arith.addf %[[LR]]#1, %{{.*}} : f32 251// CHECK-NEXT: %[[MUL2:.*]] = arith.mulf %[[LR]]#2, %[[LR]]#0 : f32 252// CHECK-NEXT: memref.store %[[LR]]#3, %[[R]][%[[C7]]] : memref<?xf32> 253// CHECK-NEXT: %[[MUL3:.*]] = arith.mulf %[[ADD3]], %[[LR]]#1 : f32 254// CHECK-NEXT: memref.store %[[MUL2]], %[[R]][%[[C8]]] : memref<?xf32> 255// CHECK-NEXT: memref.store %[[MUL3]], %[[R]][%[[C9]]] : memref<?xf32> 256func.func @multiple_uses(%A: memref<?xf32>, %result: memref<?xf32>) { 257 %c0 = arith.constant 0 : index 258 %c1 = arith.constant 1 : index 259 %c10 = arith.constant 10 : index 260 %cf = arith.constant 1.0 : f32 261 scf.for %i0 = %c0 to %c10 step %c1 { 262 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 3 } : memref<?xf32> 263 %A1_elem = arith.addf %A_elem, %cf { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 264 %A2_elem = arith.mulf %A1_elem, %A_elem { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 1 } : f32 265 memref.store %A2_elem, %result[%i0] { __test_pipelining_stage__ = 3, __test_pipelining_op_order__ = 2 } : memref<?xf32> 266 } { __test_pipelining_loop__ } 267 return 268} 269 270// ----- 271 272// CHECK-LABEL: loop_carried( 273// CHECK-SAME: %[[A:.*]]: memref<?xf32>, %[[R:.*]]: memref<?xf32>) { 274// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 275// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 276// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 277// CHECK-DAG: %[[CSTF:.*]] = arith.constant 1.000000e+00 : f32 278// Prologue: 279// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 280// Kernel: 281// CHECK-NEXT: %[[LR:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 282// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 283// CHECK-SAME: %[[LARG:.*]] = %[[L0]]) -> (f32, f32) { 284// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG]], %[[C]] : f32 285// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 286// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 287// CHECK-NEXT: scf.yield %[[ADD0]], %[[L1]] : f32, f32 288// CHECK-NEXT: } 289// Epilogue: 290// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LR]]#1, %[[LR]]#0 : f32 291// CHECK-NEXT: memref.store %[[ADD1]], %[[R]][%[[C0]]] : memref<?xf32> 292func.func @loop_carried(%A: memref<?xf32>, %result: memref<?xf32>) { 293 %c0 = arith.constant 0 : index 294 %c1 = arith.constant 1 : index 295 %c4 = arith.constant 4 : index 296 %cf = arith.constant 1.0 : f32 297 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 298 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 1 } : memref<?xf32> 299 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 300 scf.yield %A1_elem : f32 301 } { __test_pipelining_loop__ } 302 memref.store %r, %result[%c0] : memref<?xf32> 303 return 304} 305 306// ----- 307 308// CHECK-LABEL: backedge_different_stage 309// CHECK-SAME: (%[[A:.*]]: memref<?xf32>) -> f32 { 310// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 311// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 312// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index 313// CHECK-DAG: %[[CSTF:.*]] = arith.constant 1.000000e+00 : f32 314// Prologue: 315// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 316// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[L0]], %[[CSTF]] : f32 317// CHECK-NEXT: %[[L1:.*]] = memref.load %[[A]][%[[C1]]] : memref<?xf32> 318// Kernel: 319// CHECK-NEXT: %[[R:.*]]:3 = scf.for %[[IV:.*]] = %[[C0]] to %[[C2]] 320// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 321// CHECK-SAME: %[[ADDARG:.*]] = %[[ADD0]], %[[LARG:.*]] = %[[L1]]) -> (f32, f32, f32) { 322// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[LARG]], %[[ADDARG]] : f32 323// CHECK-NEXT: %[[IV2:.*]] = arith.addi %[[IV]], %[[C2]] : index 324// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[IV2]]] : memref<?xf32> 325// CHECK-NEXT: scf.yield %[[ADDARG]], %[[ADD1]], %[[L2]] : f32, f32, f32 326// CHECK-NEXT: } 327// Epilogue: 328// CHECK-NEXT: %[[ADD2:.*]] = arith.addf %[[R]]#2, %[[R]]#1 : f32 329// CHECK-NEXT: return %[[ADD2]] : f32 330func.func @backedge_different_stage(%A: memref<?xf32>) -> f32 { 331 %c0 = arith.constant 0 : index 332 %c1 = arith.constant 1 : index 333 %c4 = arith.constant 4 : index 334 %cf = arith.constant 1.0 : f32 335 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 336 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 337 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 338 %A2_elem = arith.mulf %cf, %A1_elem { __test_pipelining_stage__ = 2, __test_pipelining_op_order__ = 0 } : f32 339 scf.yield %A2_elem : f32 340 } { __test_pipelining_loop__ } 341 return %r : f32 342} 343 344// ----- 345 346// CHECK-LABEL: backedge_same_stage 347// CHECK-SAME: (%[[A:.*]]: memref<?xf32>) -> f32 { 348// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 349// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index 350// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index 351// CHECK-DAG: %[[CSTF:.*]] = arith.constant 1.000000e+00 : f32 352// Prologue: 353// CHECK: %[[L0:.*]] = memref.load %[[A]][%[[C0]]] : memref<?xf32> 354// Kernel: 355// CHECK-NEXT: %[[R:.*]]:2 = scf.for %[[IV:.*]] = %[[C0]] to %[[C3]] 356// CHECK-SAME: step %[[C1]] iter_args(%[[C:.*]] = %[[CSTF]], 357// CHECK-SAME: %[[LARG:.*]] = %[[L0]]) -> (f32, f32) { 358// CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[LARG]], %[[C]] : f32 359// CHECK-NEXT: %[[IV1:.*]] = arith.addi %[[IV]], %[[C1]] : index 360// CHECK-NEXT: %[[L2:.*]] = memref.load %[[A]][%[[IV1]]] : memref<?xf32> 361// CHECK-NEXT: scf.yield %[[ADD0]], %[[L2]] : f32, f32 362// CHECK-NEXT: } 363// Epilogue: 364// CHECK-NEXT: %[[ADD1:.*]] = arith.addf %[[R]]#1, %[[R]]#0 : f32 365// CHECK-NEXT: return %[[ADD1]] : f32 366func.func @backedge_same_stage(%A: memref<?xf32>) -> f32 { 367 %c0 = arith.constant 0 : index 368 %c1 = arith.constant 1 : index 369 %c4 = arith.constant 4 : index 370 %cf = arith.constant 1.0 : f32 371 %r = scf.for %i0 = %c0 to %c4 step %c1 iter_args(%arg0 = %cf) -> (f32) { 372 %A_elem = memref.load %A[%i0] { __test_pipelining_stage__ = 0, __test_pipelining_op_order__ = 2 } : memref<?xf32> 373 %A1_elem = arith.addf %A_elem, %arg0 { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : f32 374 %A2_elem = arith.mulf %cf, %A1_elem { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1 } : f32 375 scf.yield %A2_elem : f32 376 } { __test_pipelining_loop__ } 377 return %r : f32 378} 379