1 // Test host code gen 2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 8 9 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 10 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 11 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 12 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 13 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 14 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 15 // expected-no-diagnostics 16 #ifndef HEADER 17 #define HEADER 18 19 20 template <typename T> 21 T tmain() { 22 T *a, *b, *c; 23 int n = 10000; 24 int ch = 100; 25 26 // no schedule clauses 27 #pragma omp target 28 #pragma omp teams 29 #pragma omp distribute parallel for 30 for (int i = 0; i < n; ++i) { 31 a[i] = b[i] + c[i]; 32 } 33 34 // dist_schedule: static no chunk 35 #pragma omp target 36 #pragma omp teams 37 #pragma omp distribute parallel for dist_schedule(static) 38 for (int i = 0; i < n; ++i) { 39 a[i] = b[i] + c[i]; 40 } 41 42 // dist_schedule: static chunk 43 #pragma omp target 44 #pragma omp teams 45 #pragma omp distribute parallel for dist_schedule(static, ch) 46 for (int i = 0; i < n; ++i) { 47 a[i] = b[i] + c[i]; 48 } 49 50 // schedule: static no chunk 51 #pragma omp target 52 #pragma omp teams 53 #pragma omp distribute parallel for schedule(static) 54 for (int i = 0; i < n; ++i) { 55 a[i] = b[i] + c[i]; 56 } 57 58 // schedule: static chunk 59 #pragma omp target 60 #pragma omp teams 61 #pragma omp distribute parallel for schedule(static, ch) 62 for (int i = 0; i < n; ++i) { 63 a[i] = b[i] + c[i]; 64 } 65 66 // schedule: dynamic no chunk 67 #pragma omp target 68 #pragma omp teams 69 #pragma omp distribute parallel for schedule(dynamic) 70 for (int i = 0; i < n; ++i) { 71 a[i] = b[i] + c[i]; 72 } 73 74 // schedule: dynamic chunk 75 #pragma omp target 76 #pragma omp teams 77 #pragma omp distribute parallel for schedule(dynamic, ch) 78 for (int i = 0; i < n; ++i) { 79 a[i] = b[i] + c[i]; 80 } 81 82 return T(); 83 } 84 85 int main() { 86 double *a, *b, *c; 87 int n = 10000; 88 int ch = 100; 89 90 #ifdef LAMBDA 91 // LAMBDA-LABEL: @main 92 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( 93 [&]() { 94 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( 95 96 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 97 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( 98 99 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 100 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( 101 102 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 103 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( 104 105 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 106 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( 107 108 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 109 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( 110 111 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 112 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( 113 114 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 115 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( 116 117 // no schedule clauses 118 #pragma omp target 119 #pragma omp teams 120 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( 121 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 122 123 #pragma omp distribute parallel for 124 for (int i = 0; i < n; ++i) { 125 a[i] = b[i] + c[i]; 126 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( 127 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 128 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 129 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 130 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 131 132 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 133 134 // check EUB for distribute 135 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 136 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 137 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 138 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 139 // LAMBDA-DAG: [[EUB_TRUE]]: 140 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 141 // LAMBDA: br label %[[EUB_END:.+]] 142 // LAMBDA-DAG: [[EUB_FALSE]]: 143 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 144 // LAMBDA: br label %[[EUB_END]] 145 // LAMBDA-DAG: [[EUB_END]]: 146 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 147 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 148 149 // initialize omp.iv 150 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 151 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 152 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 153 154 // check exit condition 155 // LAMBDA: [[OMP_JUMP_BACK]]: 156 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 157 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 158 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 159 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 160 161 // check that PrevLB and PrevUB are passed to the 'for' 162 // LAMBDA: [[DIST_BODY]]: 163 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 164 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 165 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 166 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 167 // check that distlb and distub are properly passed to fork_call 168 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 169 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 170 // LAMBDA: br label %[[DIST_INC:.+]] 171 172 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 173 // LAMBDA: [[DIST_INC]]: 174 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 175 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 176 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 177 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 178 // LAMBDA: br label %[[OMP_JUMP_BACK]] 179 180 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 181 // LAMBDA: ret 182 183 // implementation of 'parallel for' 184 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 185 186 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 187 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 188 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 189 190 // initialize lb and ub to PrevLB and PrevUB 191 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 192 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 193 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 194 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 195 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 196 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 197 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 198 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 199 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 200 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 201 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 202 203 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 204 // In this case we use EUB 205 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 206 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 207 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 208 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 209 // LAMBDA: [[PF_EUB_TRUE]]: 210 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 211 // LAMBDA: br label %[[PF_EUB_END:.+]] 212 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 213 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 214 // LAMBDA: br label %[[PF_EUB_END]] 215 // LAMBDA-DAG: [[PF_EUB_END]]: 216 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 217 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 218 219 // initialize omp.iv 220 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 221 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 222 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 223 224 // check exit condition 225 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 226 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 227 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 228 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 229 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 230 231 // check that PrevLB and PrevUB are passed to the 'for' 232 // LAMBDA: [[PF_BODY]]: 233 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 234 // LAMBDA: br label {{.+}} 235 236 // check stride 1 for 'for' in 'distribute parallel for' 237 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 238 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 239 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 240 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 241 242 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 243 // LAMBDA: ret 244 245 [&]() { 246 a[i] = b[i] + c[i]; 247 }(); 248 } 249 250 // dist_schedule: static no chunk (same sa default - no dist_schedule) 251 #pragma omp target 252 #pragma omp teams 253 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( 254 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 255 256 #pragma omp distribute parallel for dist_schedule(static) 257 for (int i = 0; i < n; ++i) { 258 a[i] = b[i] + c[i]; 259 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( 260 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 261 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 262 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 263 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 264 265 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 266 267 // check EUB for distribute 268 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 269 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 270 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 271 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 272 // LAMBDA-DAG: [[EUB_TRUE]]: 273 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 274 // LAMBDA: br label %[[EUB_END:.+]] 275 // LAMBDA-DAG: [[EUB_FALSE]]: 276 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 277 // LAMBDA: br label %[[EUB_END]] 278 // LAMBDA-DAG: [[EUB_END]]: 279 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 280 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 281 282 // initialize omp.iv 283 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 284 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 285 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 286 287 // check exit condition 288 // LAMBDA: [[OMP_JUMP_BACK]]: 289 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 290 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 291 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 292 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 293 294 // check that PrevLB and PrevUB are passed to the 'for' 295 // LAMBDA: [[DIST_BODY]]: 296 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 297 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 298 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 299 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 300 // check that distlb and distub are properly passed to fork_call 301 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 302 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 303 // LAMBDA: br label %[[DIST_INC:.+]] 304 305 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 306 // LAMBDA: [[DIST_INC]]: 307 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 308 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 309 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 310 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 311 // LAMBDA: br label %[[OMP_JUMP_BACK]] 312 313 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 314 // LAMBDA: ret 315 316 // implementation of 'parallel for' 317 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 318 319 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 320 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 321 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 322 323 // initialize lb and ub to PrevLB and PrevUB 324 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 325 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 326 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 327 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 328 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 329 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 330 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 331 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 332 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 333 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 334 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 335 336 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 337 // In this case we use EUB 338 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 339 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 340 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 341 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 342 // LAMBDA: [[PF_EUB_TRUE]]: 343 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 344 // LAMBDA: br label %[[PF_EUB_END:.+]] 345 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 346 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 347 // LAMBDA: br label %[[PF_EUB_END]] 348 // LAMBDA-DAG: [[PF_EUB_END]]: 349 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 350 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 351 352 // initialize omp.iv 353 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 354 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 355 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 356 357 // check exit condition 358 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 359 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 360 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 361 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 362 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 363 364 // check that PrevLB and PrevUB are passed to the 'for' 365 // LAMBDA: [[PF_BODY]]: 366 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 367 // LAMBDA: br label {{.+}} 368 369 // check stride 1 for 'for' in 'distribute parallel for' 370 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 371 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 372 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 373 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 374 375 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 376 // LAMBDA: ret 377 [&]() { 378 a[i] = b[i] + c[i]; 379 }(); 380 } 381 382 // dist_schedule: static chunk 383 #pragma omp target 384 #pragma omp teams 385 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( 386 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 387 388 #pragma omp distribute parallel for dist_schedule(static, ch) 389 for (int i = 0; i < n; ++i) { 390 a[i] = b[i] + c[i]; 391 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( 392 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 393 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 394 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 395 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 396 397 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 398 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 399 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 400 401 // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: 402 // check EUB for distribute 403 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 404 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 405 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 406 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 407 // LAMBDA-DAG: [[EUB_TRUE]]: 408 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 409 // LAMBDA: br label %[[EUB_END:.+]] 410 // LAMBDA-DAG: [[EUB_FALSE]]: 411 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 412 // LAMBDA: br label %[[EUB_END]] 413 // LAMBDA-DAG: [[EUB_END]]: 414 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 415 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 416 417 // initialize omp.iv 418 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 419 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 420 421 // check exit condition 422 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 423 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 424 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 425 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 426 427 // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: 428 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] 429 430 // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: 431 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 432 // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 433 // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 434 // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 435 436 // check that PrevLB and PrevUB are passed to the 'for' 437 // LAMBDA: [[DIST_INNER_LOOP_BODY]]: 438 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 439 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 440 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 441 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 442 // check that distlb and distub are properly passed to fork_call 443 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 444 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 445 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] 446 447 // check DistInc 448 // LAMBDA: [[DIST_INNER_LOOP_INC]]: 449 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 450 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 451 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 452 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 453 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] 454 455 // LAMBDA: [[DIST_INNER_LOOP_END]]: 456 // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] 457 458 // LAMBDA: [[DIST_OUTER_LOOP_INC]]: 459 // check NextLB and NextUB 460 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 461 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 462 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 463 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 464 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 465 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 466 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 467 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 468 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] 469 470 // outer loop exit 471 // LAMBDA: [[DIST_OUTER_LOOP_END]]: 472 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 473 // LAMBDA: ret 474 475 // skip implementation of 'parallel for': using default scheduling and was tested above 476 [&]() { 477 a[i] = b[i] + c[i]; 478 }(); 479 } 480 481 // schedule: static no chunk 482 #pragma omp target 483 #pragma omp teams 484 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( 485 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 486 487 #pragma omp distribute parallel for schedule(static) 488 for (int i = 0; i < n; ++i) { 489 a[i] = b[i] + c[i]; 490 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( 491 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 492 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 493 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 494 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 495 496 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 497 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 498 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 499 // LAMBDA: ret 500 501 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 502 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 503 504 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 505 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 506 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 507 508 // initialize lb and ub to PrevLB and PrevUB 509 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 510 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 511 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 512 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 513 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 514 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 515 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 516 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 517 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 518 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 519 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 520 521 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 522 // In this case we use EUB 523 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 524 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 525 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 526 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 527 // LAMBDA: [[PF_EUB_TRUE]]: 528 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 529 // LAMBDA: br label %[[PF_EUB_END:.+]] 530 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 531 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 532 // LAMBDA: br label %[[PF_EUB_END]] 533 // LAMBDA-DAG: [[PF_EUB_END]]: 534 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 535 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 536 537 // initialize omp.iv 538 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 539 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 540 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 541 542 // check exit condition 543 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 544 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 545 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 546 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 547 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 548 549 // check that PrevLB and PrevUB are passed to the 'for' 550 // LAMBDA: [[PF_BODY]]: 551 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 552 // LAMBDA: br label {{.+}} 553 554 // check stride 1 for 'for' in 'distribute parallel for' 555 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 556 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 557 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 558 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 559 560 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 561 // LAMBDA: ret 562 563 [&]() { 564 a[i] = b[i] + c[i]; 565 }(); 566 } 567 568 // schedule: static chunk 569 #pragma omp target 570 #pragma omp teams 571 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( 572 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 573 574 #pragma omp distribute parallel for schedule(static, ch) 575 for (int i = 0; i < n; ++i) { 576 a[i] = b[i] + c[i]; 577 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( 578 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 579 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 580 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 581 // LAMBDA: ret 582 583 // 'parallel for' implementation using outer and inner loops and PrevEUB 584 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 585 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 586 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 587 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 588 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 589 590 // initialize lb and ub to PrevLB and PrevUB 591 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 592 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 593 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 594 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 595 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 596 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 597 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 598 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 599 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 600 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 601 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 602 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 603 604 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 605 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 606 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 607 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 608 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 609 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 610 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 611 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 612 // LAMBDA: [[PF_EUB_TRUE]]: 613 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 614 // LAMBDA: br label %[[PF_EUB_END:.+]] 615 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 616 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 617 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 618 // LAMBDA: br label %[[PF_EUB_END]] 619 // LAMBDA-DAG: [[PF_EUB_END]]: 620 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 621 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 622 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 623 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 624 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 625 626 // initialize omp.iv (IV = LB) 627 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 628 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 629 630 // outer loop: while (IV < UB) { 631 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 632 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 633 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 634 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 635 636 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 637 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 638 639 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: 640 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 641 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 642 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 643 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 644 645 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 646 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 647 // skip body branch 648 // LAMBDA: br{{.+}} 649 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 650 651 // IV = IV + 1 and inner loop latch 652 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 653 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 654 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 655 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 656 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] 657 658 // check NextLB and NextUB 659 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 660 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 661 662 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 663 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 664 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 665 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 666 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 667 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 668 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 669 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 670 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 671 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 672 673 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 674 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 675 // LAMBDA: ret 676 [&]() { 677 a[i] = b[i] + c[i]; 678 }(); 679 } 680 681 // schedule: dynamic no chunk 682 #pragma omp target 683 #pragma omp teams 684 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( 685 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 686 687 #pragma omp distribute parallel for schedule(dynamic) 688 for (int i = 0; i < n; ++i) { 689 a[i] = b[i] + c[i]; 690 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( 691 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 692 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 693 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 694 // LAMBDA: ret 695 696 // 'parallel for' implementation using outer and inner loops and PrevEUB 697 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 698 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 699 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 700 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 701 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 702 703 // initialize lb and ub to PrevLB and PrevUB 704 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 705 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 706 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 707 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 708 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 709 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 710 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 711 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 712 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 713 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 714 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 715 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 716 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 717 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 718 719 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 720 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 721 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 722 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 723 724 // initialize omp.iv (IV = LB) 725 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 726 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 727 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 728 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 729 730 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 731 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 732 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 733 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 734 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 735 736 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 737 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 738 // skip body branch 739 // LAMBDA: br{{.+}} 740 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 741 742 // IV = IV + 1 and inner loop latch 743 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 744 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 745 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 746 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 747 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 748 749 // check NextLB and NextUB 750 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 751 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 752 753 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 754 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 755 756 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 757 // LAMBDA: ret 758 [&]() { 759 a[i] = b[i] + c[i]; 760 }(); 761 } 762 763 // schedule: dynamic chunk 764 #pragma omp target 765 #pragma omp teams 766 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( 767 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 768 769 #pragma omp distribute parallel for schedule(dynamic, ch) 770 for (int i = 0; i < n; ++i) { 771 a[i] = b[i] + c[i]; 772 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( 773 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 774 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 775 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 776 // LAMBDA: ret 777 778 // 'parallel for' implementation using outer and inner loops and PrevEUB 779 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 780 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 781 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 782 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 783 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 784 785 // initialize lb and ub to PrevLB and PrevUB 786 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 787 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 788 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 789 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 790 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 791 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 792 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 793 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 794 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 795 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 796 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 797 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 798 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 799 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 800 801 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 802 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 803 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 804 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 805 806 // initialize omp.iv (IV = LB) 807 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 808 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 809 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 810 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 811 812 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 813 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 814 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 815 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 816 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 817 818 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 819 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 820 // skip body branch 821 // LAMBDA: br{{.+}} 822 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 823 824 // IV = IV + 1 and inner loop latch 825 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 826 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 827 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 828 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 829 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 830 831 // check NextLB and NextUB 832 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 833 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 834 835 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 836 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 837 838 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 839 // LAMBDA: ret 840 [&]() { 841 a[i] = b[i] + c[i]; 842 }(); 843 } 844 }(); 845 return 0; 846 #else 847 // CHECK-LABEL: @main 848 849 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 850 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 851 852 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 853 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 854 855 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 856 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 857 858 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 859 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 860 861 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 862 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 863 864 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 865 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 866 867 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 868 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 869 870 // CHECK: call{{.+}} [[TMAIN:@.+]]() 871 872 // no schedule clauses 873 #pragma omp target 874 #pragma omp teams 875 // CHECK: define internal void [[OFFLOADING_FUN_1]]( 876 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 877 878 #pragma omp distribute parallel for 879 for (int i = 0; i < n; ++i) { 880 a[i] = b[i] + c[i]; 881 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 882 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 883 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 884 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 885 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 886 887 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 888 889 // check EUB for distribute 890 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 891 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 892 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 893 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 894 // CHECK-DAG: [[EUB_TRUE]]: 895 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 896 // CHECK: br label %[[EUB_END:.+]] 897 // CHECK-DAG: [[EUB_FALSE]]: 898 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 899 // CHECK: br label %[[EUB_END]] 900 // CHECK-DAG: [[EUB_END]]: 901 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 902 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 903 904 // initialize omp.iv 905 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 906 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 907 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 908 909 // check exit condition 910 // CHECK: [[OMP_JUMP_BACK]]: 911 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 912 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 913 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 914 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 915 916 // check that PrevLB and PrevUB are passed to the 'for' 917 // CHECK: [[DIST_BODY]]: 918 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 919 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 920 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 921 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 922 // check that distlb and distub are properly passed to fork_call 923 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 924 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 925 // CHECK: br label %[[DIST_INC:.+]] 926 927 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 928 // CHECK: [[DIST_INC]]: 929 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 930 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 931 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 932 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 933 // CHECK: br label %[[OMP_JUMP_BACK]] 934 935 // CHECK-DAG: call void @__kmpc_for_static_fini( 936 // CHECK: ret 937 938 // implementation of 'parallel for' 939 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 940 941 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 942 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 943 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 944 945 // initialize lb and ub to PrevLB and PrevUB 946 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 947 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 948 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 949 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 950 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 951 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 952 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 953 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 954 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 955 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 956 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 957 958 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 959 // In this case we use EUB 960 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 961 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 962 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 963 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 964 // CHECK: [[PF_EUB_TRUE]]: 965 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 966 // CHECK: br label %[[PF_EUB_END:.+]] 967 // CHECK-DAG: [[PF_EUB_FALSE]]: 968 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 969 // CHECK: br label %[[PF_EUB_END]] 970 // CHECK-DAG: [[PF_EUB_END]]: 971 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 972 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 973 974 // initialize omp.iv 975 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 976 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 977 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 978 979 // check exit condition 980 // CHECK: [[OMP_PF_JUMP_BACK]]: 981 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 982 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 983 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 984 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 985 986 // check that PrevLB and PrevUB are passed to the 'for' 987 // CHECK: [[PF_BODY]]: 988 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 989 // CHECK: br label {{.+}} 990 991 // check stride 1 for 'for' in 'distribute parallel for' 992 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 993 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 994 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 995 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 996 997 // CHECK-DAG: call void @__kmpc_for_static_fini( 998 // CHECK: ret 999 } 1000 1001 // dist_schedule: static no chunk 1002 #pragma omp target 1003 #pragma omp teams 1004 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1005 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1006 1007 #pragma omp distribute parallel for dist_schedule(static) 1008 for (int i = 0; i < n; ++i) { 1009 a[i] = b[i] + c[i]; 1010 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1011 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1012 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1013 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1014 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1015 1016 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1017 1018 // check EUB for distribute 1019 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1020 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1021 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1022 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1023 // CHECK-DAG: [[EUB_TRUE]]: 1024 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1025 // CHECK: br label %[[EUB_END:.+]] 1026 // CHECK-DAG: [[EUB_FALSE]]: 1027 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1028 // CHECK: br label %[[EUB_END]] 1029 // CHECK-DAG: [[EUB_END]]: 1030 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1031 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1032 1033 // initialize omp.iv 1034 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1035 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1036 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1037 1038 // check exit condition 1039 // CHECK: [[OMP_JUMP_BACK]]: 1040 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1041 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1042 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1043 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1044 1045 // check that PrevLB and PrevUB are passed to the 'for' 1046 // CHECK: [[DIST_BODY]]: 1047 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1048 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1049 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1050 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1051 // check that distlb and distub are properly passed to fork_call 1052 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1053 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1054 // CHECK: br label %[[DIST_INC:.+]] 1055 1056 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1057 // CHECK: [[DIST_INC]]: 1058 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1059 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1060 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1061 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1062 // CHECK: br label %[[OMP_JUMP_BACK]] 1063 1064 // CHECK-DAG: call void @__kmpc_for_static_fini( 1065 // CHECK: ret 1066 1067 // implementation of 'parallel for' 1068 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1069 1070 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1071 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1072 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1073 1074 // initialize lb and ub to PrevLB and PrevUB 1075 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1076 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1077 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1078 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1079 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1080 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1081 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1082 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1083 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1084 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1085 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1086 1087 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1088 // In this case we use EUB 1089 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1090 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1091 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1092 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1093 // CHECK: [[PF_EUB_TRUE]]: 1094 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1095 // CHECK: br label %[[PF_EUB_END:.+]] 1096 // CHECK-DAG: [[PF_EUB_FALSE]]: 1097 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1098 // CHECK: br label %[[PF_EUB_END]] 1099 // CHECK-DAG: [[PF_EUB_END]]: 1100 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1101 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1102 1103 // initialize omp.iv 1104 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1105 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1106 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1107 1108 // check exit condition 1109 // CHECK: [[OMP_PF_JUMP_BACK]]: 1110 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1111 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1112 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1113 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1114 1115 // check that PrevLB and PrevUB are passed to the 'for' 1116 // CHECK: [[PF_BODY]]: 1117 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1118 // CHECK: br label {{.+}} 1119 1120 // check stride 1 for 'for' in 'distribute parallel for' 1121 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1122 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1123 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1124 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1125 1126 // CHECK-DAG: call void @__kmpc_for_static_fini( 1127 // CHECK: ret 1128 } 1129 1130 // dist_schedule: static chunk 1131 #pragma omp target 1132 #pragma omp teams 1133 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1134 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1135 1136 #pragma omp distribute parallel for dist_schedule(static, ch) 1137 for (int i = 0; i < n; ++i) { 1138 a[i] = b[i] + c[i]; 1139 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1140 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1141 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1142 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1143 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1144 1145 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1146 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1147 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1148 1149 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1150 // check EUB for distribute 1151 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1152 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1153 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1154 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1155 // CHECK-DAG: [[EUB_TRUE]]: 1156 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1157 // CHECK: br label %[[EUB_END:.+]] 1158 // CHECK-DAG: [[EUB_FALSE]]: 1159 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1160 // CHECK: br label %[[EUB_END]] 1161 // CHECK-DAG: [[EUB_END]]: 1162 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1163 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1164 1165 // initialize omp.iv 1166 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1167 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1168 1169 // check exit condition 1170 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1171 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1172 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1173 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1174 1175 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1176 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1177 1178 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1179 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1180 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1181 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1182 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1183 1184 // check that PrevLB and PrevUB are passed to the 'for' 1185 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1186 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1187 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1188 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1189 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1190 // check that distlb and distub are properly passed to fork_call 1191 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1192 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1193 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1194 1195 // check DistInc 1196 // CHECK: [[DIST_INNER_LOOP_INC]]: 1197 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1198 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1199 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1200 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1201 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1202 1203 // CHECK: [[DIST_INNER_LOOP_END]]: 1204 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1205 1206 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1207 // check NextLB and NextUB 1208 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1209 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1210 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1211 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1212 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1213 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1214 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1215 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1216 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1217 1218 // outer loop exit 1219 // CHECK: [[DIST_OUTER_LOOP_END]]: 1220 // CHECK-DAG: call void @__kmpc_for_static_fini( 1221 // CHECK: ret 1222 1223 // skip implementation of 'parallel for': using default scheduling and was tested above 1224 } 1225 1226 // schedule: static no chunk 1227 #pragma omp target 1228 #pragma omp teams 1229 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1230 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1231 1232 #pragma omp distribute parallel for schedule(static) 1233 for (int i = 0; i < n; ++i) { 1234 a[i] = b[i] + c[i]; 1235 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1236 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1237 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1238 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1239 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1240 1241 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1242 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1243 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1244 // CHECK: ret 1245 1246 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1247 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1248 1249 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1250 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1251 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1252 1253 // initialize lb and ub to PrevLB and PrevUB 1254 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1255 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1256 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1257 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1258 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1259 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1260 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1261 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1262 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1263 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1264 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1265 1266 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1267 // In this case we use EUB 1268 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1269 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1270 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1271 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1272 // CHECK: [[PF_EUB_TRUE]]: 1273 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1274 // CHECK: br label %[[PF_EUB_END:.+]] 1275 // CHECK-DAG: [[PF_EUB_FALSE]]: 1276 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1277 // CHECK: br label %[[PF_EUB_END]] 1278 // CHECK-DAG: [[PF_EUB_END]]: 1279 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1280 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1281 1282 // initialize omp.iv 1283 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1284 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1285 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1286 1287 // check exit condition 1288 // CHECK: [[OMP_PF_JUMP_BACK]]: 1289 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1290 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1291 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1292 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1293 1294 // check that PrevLB and PrevUB are passed to the 'for' 1295 // CHECK: [[PF_BODY]]: 1296 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1297 // CHECK: br label {{.+}} 1298 1299 // check stride 1 for 'for' in 'distribute parallel for' 1300 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1301 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1302 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1303 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1304 1305 // CHECK-DAG: call void @__kmpc_for_static_fini( 1306 // CHECK: ret 1307 } 1308 1309 // schedule: static chunk 1310 #pragma omp target 1311 #pragma omp teams 1312 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 1313 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 1314 1315 #pragma omp distribute parallel for schedule(static, ch) 1316 for (int i = 0; i < n; ++i) { 1317 a[i] = b[i] + c[i]; 1318 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 1319 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1320 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 1321 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1322 // CHECK: ret 1323 1324 // 'parallel for' implementation using outer and inner loops and PrevEUB 1325 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1326 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1327 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1328 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1329 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1330 1331 // initialize lb and ub to PrevLB and PrevUB 1332 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1333 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1334 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1335 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1336 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1337 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1338 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1339 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1340 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1341 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1342 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1343 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1344 1345 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 1346 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1347 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1348 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 1349 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1350 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 1351 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 1352 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1353 // CHECK: [[PF_EUB_TRUE]]: 1354 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1355 // CHECK: br label %[[PF_EUB_END:.+]] 1356 // CHECK-DAG: [[PF_EUB_FALSE]]: 1357 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1358 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 1359 // CHECK: br label %[[PF_EUB_END]] 1360 // CHECK-DAG: [[PF_EUB_END]]: 1361 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 1362 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 1363 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 1364 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 1365 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 1366 1367 // initialize omp.iv (IV = LB) 1368 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1369 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1370 1371 // outer loop: while (IV < UB) { 1372 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1373 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1374 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1375 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1376 1377 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1378 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 1379 1380 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 1381 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1382 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1383 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1384 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1385 1386 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1387 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1388 // skip body branch 1389 // CHECK: br{{.+}} 1390 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1391 1392 // IV = IV + 1 and inner loop latch 1393 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1394 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1395 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1396 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1397 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 1398 1399 // check NextLB and NextUB 1400 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1401 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1402 1403 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1404 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1405 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1406 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 1407 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 1408 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1409 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1410 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 1411 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 1412 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1413 1414 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1415 // CHECK-DAG: call void @__kmpc_for_static_fini( 1416 // CHECK: ret 1417 } 1418 1419 // schedule: dynamic no chunk 1420 #pragma omp target 1421 #pragma omp teams 1422 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 1423 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 1424 1425 #pragma omp distribute parallel for schedule(dynamic) 1426 for (int i = 0; i < n; ++i) { 1427 a[i] = b[i] + c[i]; 1428 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 1429 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1430 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 1431 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1432 // CHECK: ret 1433 1434 // 'parallel for' implementation using outer and inner loops and PrevEUB 1435 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1436 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1437 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1438 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1439 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1440 1441 // initialize lb and ub to PrevLB and PrevUB 1442 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1443 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1444 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1445 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1446 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1447 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1448 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1449 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1450 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1451 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1452 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1453 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1454 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1455 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1456 1457 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1458 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1459 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1460 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1461 1462 // initialize omp.iv (IV = LB) 1463 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1464 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1465 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1466 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1467 1468 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1469 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1470 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1471 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1472 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1473 1474 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1475 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1476 // skip body branch 1477 // CHECK: br{{.+}} 1478 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1479 1480 // IV = IV + 1 and inner loop latch 1481 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1482 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1483 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1484 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1485 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1486 1487 // check NextLB and NextUB 1488 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1489 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1490 1491 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1492 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1493 1494 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1495 // CHECK: ret 1496 } 1497 1498 // schedule: dynamic chunk 1499 #pragma omp target 1500 #pragma omp teams 1501 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 1502 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 1503 1504 #pragma omp distribute parallel for schedule(dynamic, ch) 1505 for (int i = 0; i < n; ++i) { 1506 a[i] = b[i] + c[i]; 1507 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 1508 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1509 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 1510 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1511 // CHECK: ret 1512 1513 // 'parallel for' implementation using outer and inner loops and PrevEUB 1514 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1515 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1516 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1517 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1518 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1519 1520 // initialize lb and ub to PrevLB and PrevUB 1521 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1522 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1523 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1524 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1525 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1526 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1527 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1528 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1529 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1530 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1531 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1532 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1533 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1534 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1535 1536 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1537 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1538 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1539 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1540 1541 // initialize omp.iv (IV = LB) 1542 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1543 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1544 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1545 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1546 1547 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1548 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1549 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1550 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1551 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1552 1553 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1554 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1555 // skip body branch 1556 // CHECK: br{{.+}} 1557 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1558 1559 // IV = IV + 1 and inner loop latch 1560 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1561 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1562 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1563 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1564 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1565 1566 // check NextLB and NextUB 1567 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1568 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1569 1570 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1571 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1572 1573 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1574 // CHECK: ret 1575 } 1576 1577 return tmain<int>(); 1578 #endif 1579 } 1580 1581 // check code 1582 // CHECK: define{{.+}} [[TMAIN]]() 1583 1584 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1585 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 1586 1587 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1588 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 1589 1590 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1591 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 1592 1593 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1594 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 1595 1596 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1597 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 1598 1599 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1600 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 1601 1602 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1603 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 1604 1605 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( 1606 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 1607 1608 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 1609 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1610 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1611 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1612 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1613 1614 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1615 1616 // check EUB for distribute 1617 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1618 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1619 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1620 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1621 // CHECK-DAG: [[EUB_TRUE]]: 1622 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1623 // CHECK: br label %[[EUB_END:.+]] 1624 // CHECK-DAG: [[EUB_FALSE]]: 1625 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1626 // CHECK: br label %[[EUB_END]] 1627 // CHECK-DAG: [[EUB_END]]: 1628 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1629 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1630 1631 // initialize omp.iv 1632 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1633 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1634 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1635 1636 // check exit condition 1637 // CHECK: [[OMP_JUMP_BACK]]: 1638 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1639 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1640 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1641 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1642 1643 // check that PrevLB and PrevUB are passed to the 'for' 1644 // CHECK: [[DIST_BODY]]: 1645 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1646 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1647 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1648 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1649 // check that distlb and distub are properly passed to fork_call 1650 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1651 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1652 // CHECK: br label %[[DIST_INC:.+]] 1653 1654 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1655 // CHECK: [[DIST_INC]]: 1656 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1657 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1658 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1659 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1660 // CHECK: br label %[[OMP_JUMP_BACK]] 1661 1662 // CHECK-DAG: call void @__kmpc_for_static_fini( 1663 // CHECK: ret 1664 1665 // implementation of 'parallel for' 1666 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1667 1668 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1669 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1670 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1671 1672 // initialize lb and ub to PrevLB and PrevUB 1673 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1674 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1675 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1676 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1677 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1678 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1679 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1680 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1681 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1682 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1683 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1684 1685 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1686 // In this case we use EUB 1687 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1688 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1689 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1690 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1691 // CHECK: [[PF_EUB_TRUE]]: 1692 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1693 // CHECK: br label %[[PF_EUB_END:.+]] 1694 // CHECK-DAG: [[PF_EUB_FALSE]]: 1695 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1696 // CHECK: br label %[[PF_EUB_END]] 1697 // CHECK-DAG: [[PF_EUB_END]]: 1698 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1699 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1700 1701 // initialize omp.iv 1702 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1703 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1704 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1705 1706 // check exit condition 1707 // CHECK: [[OMP_PF_JUMP_BACK]]: 1708 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1709 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1710 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1711 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1712 1713 // check that PrevLB and PrevUB are passed to the 'for' 1714 // CHECK: [[PF_BODY]]: 1715 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1716 // CHECK: br label {{.+}} 1717 1718 // check stride 1 for 'for' in 'distribute parallel for' 1719 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1720 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1721 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1722 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1723 1724 // CHECK-DAG: call void @__kmpc_for_static_fini( 1725 // CHECK: ret 1726 1727 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1728 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1729 1730 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1731 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1732 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1733 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1734 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1735 1736 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1737 1738 // check EUB for distribute 1739 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1740 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1741 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1742 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1743 // CHECK-DAG: [[EUB_TRUE]]: 1744 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1745 // CHECK: br label %[[EUB_END:.+]] 1746 // CHECK-DAG: [[EUB_FALSE]]: 1747 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1748 // CHECK: br label %[[EUB_END]] 1749 // CHECK-DAG: [[EUB_END]]: 1750 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1751 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1752 1753 // initialize omp.iv 1754 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1755 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1756 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1757 1758 // check exit condition 1759 // CHECK: [[OMP_JUMP_BACK]]: 1760 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1761 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1762 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1763 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1764 1765 // check that PrevLB and PrevUB are passed to the 'for' 1766 // CHECK: [[DIST_BODY]]: 1767 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1768 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1769 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1770 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1771 // check that distlb and distub are properly passed to fork_call 1772 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1773 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1774 // CHECK: br label %[[DIST_INC:.+]] 1775 1776 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1777 // CHECK: [[DIST_INC]]: 1778 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1779 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1780 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1781 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1782 // CHECK: br label %[[OMP_JUMP_BACK]] 1783 1784 // CHECK-DAG: call void @__kmpc_for_static_fini( 1785 // CHECK: ret 1786 1787 // implementation of 'parallel for' 1788 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1789 1790 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1791 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1792 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1793 1794 // initialize lb and ub to PrevLB and PrevUB 1795 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1796 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1797 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1798 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1799 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1800 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1801 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1802 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1803 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1804 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1805 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1806 1807 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1808 // In this case we use EUB 1809 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1810 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1811 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1812 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1813 // CHECK: [[PF_EUB_TRUE]]: 1814 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1815 // CHECK: br label %[[PF_EUB_END:.+]] 1816 // CHECK-DAG: [[PF_EUB_FALSE]]: 1817 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1818 // CHECK: br label %[[PF_EUB_END]] 1819 // CHECK-DAG: [[PF_EUB_END]]: 1820 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1821 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1822 1823 // initialize omp.iv 1824 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1825 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1826 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1827 1828 // check exit condition 1829 // CHECK: [[OMP_PF_JUMP_BACK]]: 1830 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1831 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1832 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1833 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1834 1835 // check that PrevLB and PrevUB are passed to the 'for' 1836 // CHECK: [[PF_BODY]]: 1837 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1838 // CHECK: br label {{.+}} 1839 1840 // check stride 1 for 'for' in 'distribute parallel for' 1841 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1842 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1843 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1844 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1845 1846 // CHECK-DAG: call void @__kmpc_for_static_fini( 1847 // CHECK: ret 1848 1849 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1850 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1851 1852 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1853 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1854 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1855 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1856 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1857 1858 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1859 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1860 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1861 1862 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1863 // check EUB for distribute 1864 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1865 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1866 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1867 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1868 // CHECK-DAG: [[EUB_TRUE]]: 1869 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1870 // CHECK: br label %[[EUB_END:.+]] 1871 // CHECK-DAG: [[EUB_FALSE]]: 1872 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1873 // CHECK: br label %[[EUB_END]] 1874 // CHECK-DAG: [[EUB_END]]: 1875 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1876 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1877 1878 // initialize omp.iv 1879 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1880 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1881 1882 // check exit condition 1883 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1884 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1885 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1886 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1887 1888 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1889 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1890 1891 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1892 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1893 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1894 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1895 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1896 1897 // check that PrevLB and PrevUB are passed to the 'for' 1898 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1899 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1900 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1901 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1902 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1903 // check that distlb and distub are properly passed to fork_call 1904 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1905 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1906 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1907 1908 // check DistInc 1909 // CHECK: [[DIST_INNER_LOOP_INC]]: 1910 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1911 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1912 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1913 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1914 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1915 1916 // CHECK: [[DIST_INNER_LOOP_END]]: 1917 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1918 1919 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1920 // check NextLB and NextUB 1921 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1922 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1923 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1924 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1925 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1926 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1927 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1928 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1929 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1930 1931 // outer loop exit 1932 // CHECK: [[DIST_OUTER_LOOP_END]]: 1933 // CHECK-DAG: call void @__kmpc_for_static_fini( 1934 // CHECK: ret 1935 1936 // skip implementation of 'parallel for': using default scheduling and was tested above 1937 1938 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1939 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1940 1941 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1942 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1943 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1944 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1945 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1946 1947 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1948 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1949 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1950 // CHECK: ret 1951 1952 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1953 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1954 1955 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1956 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1957 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1958 1959 // initialize lb and ub to PrevLB and PrevUB 1960 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1961 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1962 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1963 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1964 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1965 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1966 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1967 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1968 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1969 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1970 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1971 1972 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1973 // In this case we use EUB 1974 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1975 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1976 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1977 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1978 // CHECK: [[PF_EUB_TRUE]]: 1979 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1980 // CHECK: br label %[[PF_EUB_END:.+]] 1981 // CHECK-DAG: [[PF_EUB_FALSE]]: 1982 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1983 // CHECK: br label %[[PF_EUB_END]] 1984 // CHECK-DAG: [[PF_EUB_END]]: 1985 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1986 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1987 1988 // initialize omp.iv 1989 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1990 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1991 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1992 1993 // check exit condition 1994 // CHECK: [[OMP_PF_JUMP_BACK]]: 1995 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1996 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1997 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1998 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1999 2000 // check that PrevLB and PrevUB are passed to the 'for' 2001 // CHECK: [[PF_BODY]]: 2002 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2003 // CHECK: br label {{.+}} 2004 2005 // check stride 1 for 'for' in 'distribute parallel for' 2006 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 2007 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 2008 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 2009 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 2010 2011 // CHECK-DAG: call void @__kmpc_for_static_fini( 2012 // CHECK: ret 2013 2014 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 2015 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 2016 2017 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 2018 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2019 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 2020 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2021 // CHECK: ret 2022 2023 // 'parallel for' implementation using outer and inner loops and PrevEUB 2024 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2025 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2026 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2027 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2028 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2029 2030 // initialize lb and ub to PrevLB and PrevUB 2031 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2032 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2033 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2034 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2035 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2036 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2037 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2038 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2039 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2040 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2041 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2042 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2043 2044 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 2045 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2046 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2047 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 2048 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2049 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 2050 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 2051 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2052 // CHECK: [[PF_EUB_TRUE]]: 2053 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2054 // CHECK: br label %[[PF_EUB_END:.+]] 2055 // CHECK-DAG: [[PF_EUB_FALSE]]: 2056 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2057 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 2058 // CHECK: br label %[[PF_EUB_END]] 2059 // CHECK-DAG: [[PF_EUB_END]]: 2060 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 2061 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 2062 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 2063 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 2064 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 2065 2066 // initialize omp.iv (IV = LB) 2067 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2068 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2069 2070 // outer loop: while (IV < UB) { 2071 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2072 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2073 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2074 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2075 2076 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2077 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 2078 2079 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 2080 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2081 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2082 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2083 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2084 2085 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2086 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2087 // skip body branch 2088 // CHECK: br{{.+}} 2089 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2090 2091 // IV = IV + 1 and inner loop latch 2092 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2093 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2094 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2095 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2096 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 2097 2098 // check NextLB and NextUB 2099 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2100 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2101 2102 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2103 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2104 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2105 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 2106 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 2107 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2108 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2109 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 2110 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 2111 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2112 2113 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2114 // CHECK-DAG: call void @__kmpc_for_static_fini( 2115 // CHECK: ret 2116 2117 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 2118 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 2119 2120 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 2121 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2122 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 2123 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2124 // CHECK: ret 2125 2126 // 'parallel for' implementation using outer and inner loops and PrevEUB 2127 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2128 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2129 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2130 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2131 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2132 2133 // initialize lb and ub to PrevLB and PrevUB 2134 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2135 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2136 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2137 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2138 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2139 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2140 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2141 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2142 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2143 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2144 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2145 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2146 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2147 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2148 2149 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2150 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2151 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2152 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2153 2154 // initialize omp.iv (IV = LB) 2155 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2156 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2157 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2158 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2159 2160 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2161 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2162 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2163 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2164 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2165 2166 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2167 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2168 // skip body branch 2169 // CHECK: br{{.+}} 2170 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2171 2172 // IV = IV + 1 and inner loop latch 2173 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2174 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2175 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2176 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2177 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2178 2179 // check NextLB and NextUB 2180 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2181 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2182 2183 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2184 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2185 2186 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2187 // CHECK: ret 2188 2189 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 2190 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 2191 2192 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 2193 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2194 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 2195 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2196 // CHECK: ret 2197 2198 // 'parallel for' implementation using outer and inner loops and PrevEUB 2199 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2200 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2201 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2202 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2203 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2204 2205 // initialize lb and ub to PrevLB and PrevUB 2206 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2207 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2208 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2209 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2210 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2211 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2212 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2213 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2214 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2215 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2216 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2217 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2218 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2219 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2220 2221 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2222 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2223 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2224 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2225 2226 // initialize omp.iv (IV = LB) 2227 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2228 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2229 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2230 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2231 2232 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2233 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2234 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2235 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2236 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2237 2238 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2239 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2240 // skip body branch 2241 // CHECK: br{{.+}} 2242 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2243 2244 // IV = IV + 1 and inner loop latch 2245 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2246 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2247 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2248 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2249 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2250 2251 // check NextLB and NextUB 2252 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2253 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2254 2255 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2256 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2257 2258 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2259 // CHECK: ret 2260 #endif 2261