1 // Test host code gen 2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 8 9 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 10 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 11 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 12 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 13 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 14 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 15 // expected-no-diagnostics 16 #ifndef HEADER 17 #define HEADER 18 19 20 template <typename T> 21 T tmain() { 22 T *a, *b, *c; 23 int n = 10000; 24 int ch = 100; 25 26 // no schedule clauses 27 #pragma omp target 28 #pragma omp teams 29 #pragma omp distribute parallel for 30 for (int i = 0; i < n; ++i) { 31 #pragma omp cancel for 32 a[i] = b[i] + c[i]; 33 } 34 35 // dist_schedule: static no chunk 36 #pragma omp target 37 #pragma omp teams 38 #pragma omp distribute parallel for dist_schedule(static) 39 for (int i = 0; i < n; ++i) { 40 a[i] = b[i] + c[i]; 41 } 42 43 // dist_schedule: static chunk 44 #pragma omp target 45 #pragma omp teams 46 #pragma omp distribute parallel for dist_schedule(static, ch) 47 for (int i = 0; i < n; ++i) { 48 a[i] = b[i] + c[i]; 49 } 50 51 // schedule: static no chunk 52 #pragma omp target 53 #pragma omp teams 54 #pragma omp distribute parallel for schedule(static) 55 for (int i = 0; i < n; ++i) { 56 a[i] = b[i] + c[i]; 57 } 58 59 // schedule: static chunk 60 #pragma omp target 61 #pragma omp teams 62 #pragma omp distribute parallel for schedule(static, ch) 63 for (int i = 0; i < n; ++i) { 64 a[i] = b[i] + c[i]; 65 } 66 67 // schedule: dynamic no chunk 68 #pragma omp target 69 #pragma omp teams 70 #pragma omp distribute parallel for schedule(dynamic) 71 for (int i = 0; i < n; ++i) { 72 a[i] = b[i] + c[i]; 73 } 74 75 // schedule: dynamic chunk 76 #pragma omp target 77 #pragma omp teams 78 #pragma omp distribute parallel for schedule(dynamic, ch) 79 for (int i = 0; i < n; ++i) { 80 a[i] = b[i] + c[i]; 81 } 82 83 return T(); 84 } 85 86 int main() { 87 double *a, *b, *c; 88 int n = 10000; 89 int ch = 100; 90 91 #ifdef LAMBDA 92 // LAMBDA-LABEL: @main 93 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( 94 [&]() { 95 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( 96 97 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 98 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( 99 100 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 101 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( 102 103 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 104 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( 105 106 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 107 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( 108 109 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 110 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( 111 112 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 113 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( 114 115 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 116 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( 117 118 // no schedule clauses 119 #pragma omp target 120 #pragma omp teams 121 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( 122 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 123 124 #pragma omp distribute parallel for 125 for (int i = 0; i < n; ++i) { 126 a[i] = b[i] + c[i]; 127 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( 128 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 129 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 130 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 131 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 132 133 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 134 135 // check EUB for distribute 136 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 137 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 138 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 139 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 140 // LAMBDA-DAG: [[EUB_TRUE]]: 141 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 142 // LAMBDA: br label %[[EUB_END:.+]] 143 // LAMBDA-DAG: [[EUB_FALSE]]: 144 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 145 // LAMBDA: br label %[[EUB_END]] 146 // LAMBDA-DAG: [[EUB_END]]: 147 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 148 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 149 150 // initialize omp.iv 151 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 152 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 153 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 154 155 // check exit condition 156 // LAMBDA: [[OMP_JUMP_BACK]]: 157 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 158 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 159 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 160 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 161 162 // check that PrevLB and PrevUB are passed to the 'for' 163 // LAMBDA: [[DIST_BODY]]: 164 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 165 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 166 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 167 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 168 // check that distlb and distub are properly passed to fork_call 169 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 170 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 171 // LAMBDA: br label %[[DIST_INC:.+]] 172 173 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 174 // LAMBDA: [[DIST_INC]]: 175 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 176 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 177 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 178 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 179 // LAMBDA: br label %[[OMP_JUMP_BACK]] 180 181 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 182 // LAMBDA: ret 183 184 // implementation of 'parallel for' 185 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 186 187 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 188 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 189 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 190 191 // initialize lb and ub to PrevLB and PrevUB 192 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 193 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 194 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 195 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 196 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 197 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 198 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 199 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 200 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 201 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 202 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 203 204 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 205 // In this case we use EUB 206 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 207 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 208 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 209 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 210 // LAMBDA: [[PF_EUB_TRUE]]: 211 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 212 // LAMBDA: br label %[[PF_EUB_END:.+]] 213 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 214 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 215 // LAMBDA: br label %[[PF_EUB_END]] 216 // LAMBDA-DAG: [[PF_EUB_END]]: 217 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 218 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 219 220 // initialize omp.iv 221 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 222 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 223 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 224 225 // check exit condition 226 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 227 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 228 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 229 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 230 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 231 232 // check that PrevLB and PrevUB are passed to the 'for' 233 // LAMBDA: [[PF_BODY]]: 234 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 235 // LAMBDA: br label {{.+}} 236 237 // check stride 1 for 'for' in 'distribute parallel for' 238 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 239 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 240 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 241 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 242 243 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 244 // LAMBDA: ret 245 246 [&]() { 247 a[i] = b[i] + c[i]; 248 }(); 249 } 250 251 // dist_schedule: static no chunk (same sa default - no dist_schedule) 252 #pragma omp target 253 #pragma omp teams 254 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( 255 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 256 257 #pragma omp distribute parallel for dist_schedule(static) 258 for (int i = 0; i < n; ++i) { 259 a[i] = b[i] + c[i]; 260 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( 261 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 262 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 263 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 264 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 265 266 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 267 268 // check EUB for distribute 269 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 270 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 271 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 272 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 273 // LAMBDA-DAG: [[EUB_TRUE]]: 274 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 275 // LAMBDA: br label %[[EUB_END:.+]] 276 // LAMBDA-DAG: [[EUB_FALSE]]: 277 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 278 // LAMBDA: br label %[[EUB_END]] 279 // LAMBDA-DAG: [[EUB_END]]: 280 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 281 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 282 283 // initialize omp.iv 284 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 285 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 286 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 287 288 // check exit condition 289 // LAMBDA: [[OMP_JUMP_BACK]]: 290 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 291 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 292 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 293 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 294 295 // check that PrevLB and PrevUB are passed to the 'for' 296 // LAMBDA: [[DIST_BODY]]: 297 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 298 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 299 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 300 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 301 // check that distlb and distub are properly passed to fork_call 302 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 303 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 304 // LAMBDA: br label %[[DIST_INC:.+]] 305 306 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 307 // LAMBDA: [[DIST_INC]]: 308 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 309 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 310 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 311 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 312 // LAMBDA: br label %[[OMP_JUMP_BACK]] 313 314 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 315 // LAMBDA: ret 316 317 // implementation of 'parallel for' 318 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 319 320 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 321 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 322 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 323 324 // initialize lb and ub to PrevLB and PrevUB 325 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 326 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 327 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 328 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 329 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 330 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 331 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 332 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 333 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 334 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 335 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 336 337 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 338 // In this case we use EUB 339 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 340 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 341 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 342 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 343 // LAMBDA: [[PF_EUB_TRUE]]: 344 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 345 // LAMBDA: br label %[[PF_EUB_END:.+]] 346 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 347 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 348 // LAMBDA: br label %[[PF_EUB_END]] 349 // LAMBDA-DAG: [[PF_EUB_END]]: 350 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 351 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 352 353 // initialize omp.iv 354 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 355 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 356 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 357 358 // check exit condition 359 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 360 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 361 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 362 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 363 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 364 365 // check that PrevLB and PrevUB are passed to the 'for' 366 // LAMBDA: [[PF_BODY]]: 367 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 368 // LAMBDA: br label {{.+}} 369 370 // check stride 1 for 'for' in 'distribute parallel for' 371 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 372 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 373 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 374 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 375 376 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 377 // LAMBDA: ret 378 [&]() { 379 a[i] = b[i] + c[i]; 380 }(); 381 } 382 383 // dist_schedule: static chunk 384 #pragma omp target 385 #pragma omp teams 386 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( 387 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 388 389 #pragma omp distribute parallel for dist_schedule(static, ch) 390 for (int i = 0; i < n; ++i) { 391 a[i] = b[i] + c[i]; 392 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( 393 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 394 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 395 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 396 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 397 398 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 399 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 400 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 401 402 // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: 403 // check EUB for distribute 404 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 405 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 406 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 407 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 408 // LAMBDA-DAG: [[EUB_TRUE]]: 409 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 410 // LAMBDA: br label %[[EUB_END:.+]] 411 // LAMBDA-DAG: [[EUB_FALSE]]: 412 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 413 // LAMBDA: br label %[[EUB_END]] 414 // LAMBDA-DAG: [[EUB_END]]: 415 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 416 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 417 418 // initialize omp.iv 419 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 420 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 421 422 // check exit condition 423 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 424 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 425 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 426 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 427 428 // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: 429 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] 430 431 // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: 432 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 433 // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 434 // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 435 // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 436 437 // check that PrevLB and PrevUB are passed to the 'for' 438 // LAMBDA: [[DIST_INNER_LOOP_BODY]]: 439 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 440 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 441 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 442 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 443 // check that distlb and distub are properly passed to fork_call 444 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 445 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 446 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] 447 448 // check DistInc 449 // LAMBDA: [[DIST_INNER_LOOP_INC]]: 450 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 451 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 452 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 453 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 454 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] 455 456 // LAMBDA: [[DIST_INNER_LOOP_END]]: 457 // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] 458 459 // LAMBDA: [[DIST_OUTER_LOOP_INC]]: 460 // check NextLB and NextUB 461 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 462 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 463 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 464 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 465 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 466 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 467 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 468 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 469 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] 470 471 // outer loop exit 472 // LAMBDA: [[DIST_OUTER_LOOP_END]]: 473 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 474 // LAMBDA: ret 475 476 // skip implementation of 'parallel for': using default scheduling and was tested above 477 [&]() { 478 a[i] = b[i] + c[i]; 479 }(); 480 } 481 482 // schedule: static no chunk 483 #pragma omp target 484 #pragma omp teams 485 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( 486 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 487 488 #pragma omp distribute parallel for schedule(static) 489 for (int i = 0; i < n; ++i) { 490 a[i] = b[i] + c[i]; 491 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( 492 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 493 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 494 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 495 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 496 497 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 498 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 499 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 500 // LAMBDA: ret 501 502 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 503 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 504 505 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 506 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 507 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 508 509 // initialize lb and ub to PrevLB and PrevUB 510 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 511 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 512 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 513 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 514 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 515 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 516 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 517 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 518 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 519 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 520 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 521 522 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 523 // In this case we use EUB 524 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 525 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 526 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 527 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 528 // LAMBDA: [[PF_EUB_TRUE]]: 529 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 530 // LAMBDA: br label %[[PF_EUB_END:.+]] 531 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 532 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 533 // LAMBDA: br label %[[PF_EUB_END]] 534 // LAMBDA-DAG: [[PF_EUB_END]]: 535 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 536 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 537 538 // initialize omp.iv 539 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 540 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 541 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 542 543 // check exit condition 544 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 545 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 546 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 547 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 548 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 549 550 // check that PrevLB and PrevUB are passed to the 'for' 551 // LAMBDA: [[PF_BODY]]: 552 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 553 // LAMBDA: br label {{.+}} 554 555 // check stride 1 for 'for' in 'distribute parallel for' 556 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 557 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 558 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 559 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 560 561 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 562 // LAMBDA: ret 563 564 [&]() { 565 a[i] = b[i] + c[i]; 566 }(); 567 } 568 569 // schedule: static chunk 570 #pragma omp target 571 #pragma omp teams 572 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( 573 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 574 575 #pragma omp distribute parallel for schedule(static, ch) 576 for (int i = 0; i < n; ++i) { 577 a[i] = b[i] + c[i]; 578 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( 579 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 580 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 581 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 582 // LAMBDA: ret 583 584 // 'parallel for' implementation using outer and inner loops and PrevEUB 585 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 586 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 587 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 588 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 589 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 590 591 // initialize lb and ub to PrevLB and PrevUB 592 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 593 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 594 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 595 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 596 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 597 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 598 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 599 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 600 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 601 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 602 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 603 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 604 605 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 606 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 607 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 608 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 609 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 610 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 611 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 612 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 613 // LAMBDA: [[PF_EUB_TRUE]]: 614 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 615 // LAMBDA: br label %[[PF_EUB_END:.+]] 616 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 617 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 618 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 619 // LAMBDA: br label %[[PF_EUB_END]] 620 // LAMBDA-DAG: [[PF_EUB_END]]: 621 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 622 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 623 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 624 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 625 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 626 627 // initialize omp.iv (IV = LB) 628 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 629 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 630 631 // outer loop: while (IV < UB) { 632 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 633 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 634 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 635 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 636 637 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 638 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 639 640 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: 641 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 642 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 643 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 644 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 645 646 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 647 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 648 // skip body branch 649 // LAMBDA: br{{.+}} 650 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 651 652 // IV = IV + 1 and inner loop latch 653 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 654 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 655 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 656 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 657 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] 658 659 // check NextLB and NextUB 660 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 661 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 662 663 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 664 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 665 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 666 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 667 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 668 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 669 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 670 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 671 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 672 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 673 674 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 675 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 676 // LAMBDA: ret 677 [&]() { 678 a[i] = b[i] + c[i]; 679 }(); 680 } 681 682 // schedule: dynamic no chunk 683 #pragma omp target 684 #pragma omp teams 685 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( 686 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 687 688 #pragma omp distribute parallel for schedule(dynamic) 689 for (int i = 0; i < n; ++i) { 690 a[i] = b[i] + c[i]; 691 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( 692 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 693 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 694 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 695 // LAMBDA: ret 696 697 // 'parallel for' implementation using outer and inner loops and PrevEUB 698 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 699 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 700 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 701 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 702 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 703 704 // initialize lb and ub to PrevLB and PrevUB 705 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 706 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 707 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 708 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 709 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 710 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 711 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 712 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 713 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 714 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 715 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 716 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 717 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 718 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 719 720 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 721 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 722 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 723 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 724 725 // initialize omp.iv (IV = LB) 726 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 727 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 728 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 729 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 730 731 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 732 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 733 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 734 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 735 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 736 737 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 738 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 739 // skip body branch 740 // LAMBDA: br{{.+}} 741 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 742 743 // IV = IV + 1 and inner loop latch 744 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 745 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 746 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 747 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 748 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 749 750 // check NextLB and NextUB 751 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 752 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 753 754 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 755 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 756 757 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 758 // LAMBDA: ret 759 [&]() { 760 a[i] = b[i] + c[i]; 761 }(); 762 } 763 764 // schedule: dynamic chunk 765 #pragma omp target 766 #pragma omp teams 767 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( 768 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 769 770 #pragma omp distribute parallel for schedule(dynamic, ch) 771 for (int i = 0; i < n; ++i) { 772 a[i] = b[i] + c[i]; 773 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( 774 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 775 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 776 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 777 // LAMBDA: ret 778 779 // 'parallel for' implementation using outer and inner loops and PrevEUB 780 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 781 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 782 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 783 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 784 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 785 786 // initialize lb and ub to PrevLB and PrevUB 787 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 788 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 789 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 790 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 791 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 792 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 793 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 794 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 795 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 796 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 797 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 798 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 799 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 800 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 801 802 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 803 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 804 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 805 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 806 807 // initialize omp.iv (IV = LB) 808 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 809 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 810 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 811 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 812 813 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 814 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 815 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 816 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 817 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 818 819 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 820 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 821 // skip body branch 822 // LAMBDA: br{{.+}} 823 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 824 825 // IV = IV + 1 and inner loop latch 826 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 827 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 828 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 829 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 830 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 831 832 // check NextLB and NextUB 833 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 834 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 835 836 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 837 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 838 839 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 840 // LAMBDA: ret 841 [&]() { 842 a[i] = b[i] + c[i]; 843 }(); 844 } 845 }(); 846 return 0; 847 #else 848 // CHECK-LABEL: @main 849 850 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 851 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 852 853 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 854 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 855 856 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 857 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 858 859 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 860 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 861 862 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 863 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 864 865 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 866 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 867 868 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 869 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 870 871 // CHECK: call{{.+}} [[TMAIN:@.+]]() 872 873 // no schedule clauses 874 #pragma omp target 875 #pragma omp teams 876 // CHECK: define internal void [[OFFLOADING_FUN_1]]( 877 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 878 879 #pragma omp distribute parallel for 880 for (int i = 0; i < n; ++i) { 881 a[i] = b[i] + c[i]; 882 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 883 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 884 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 885 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 886 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 887 888 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 889 890 // check EUB for distribute 891 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 892 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 893 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 894 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 895 // CHECK-DAG: [[EUB_TRUE]]: 896 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 897 // CHECK: br label %[[EUB_END:.+]] 898 // CHECK-DAG: [[EUB_FALSE]]: 899 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 900 // CHECK: br label %[[EUB_END]] 901 // CHECK-DAG: [[EUB_END]]: 902 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 903 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 904 905 // initialize omp.iv 906 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 907 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 908 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 909 910 // check exit condition 911 // CHECK: [[OMP_JUMP_BACK]]: 912 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 913 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 914 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 915 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 916 917 // check that PrevLB and PrevUB are passed to the 'for' 918 // CHECK: [[DIST_BODY]]: 919 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 920 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 921 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 922 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 923 // check that distlb and distub are properly passed to fork_call 924 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 925 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 926 // CHECK: br label %[[DIST_INC:.+]] 927 928 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 929 // CHECK: [[DIST_INC]]: 930 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 931 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 932 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 933 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 934 // CHECK: br label %[[OMP_JUMP_BACK]] 935 936 // CHECK-DAG: call void @__kmpc_for_static_fini( 937 // CHECK: ret 938 939 // implementation of 'parallel for' 940 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 941 942 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 943 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 944 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 945 946 // initialize lb and ub to PrevLB and PrevUB 947 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 948 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 949 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 950 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 951 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 952 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 953 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 954 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 955 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 956 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 957 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 958 959 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 960 // In this case we use EUB 961 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 962 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 963 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 964 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 965 // CHECK: [[PF_EUB_TRUE]]: 966 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 967 // CHECK: br label %[[PF_EUB_END:.+]] 968 // CHECK-DAG: [[PF_EUB_FALSE]]: 969 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 970 // CHECK: br label %[[PF_EUB_END]] 971 // CHECK-DAG: [[PF_EUB_END]]: 972 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 973 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 974 975 // initialize omp.iv 976 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 977 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 978 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 979 980 // check exit condition 981 // CHECK: [[OMP_PF_JUMP_BACK]]: 982 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 983 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 984 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 985 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 986 987 // check that PrevLB and PrevUB are passed to the 'for' 988 // CHECK: [[PF_BODY]]: 989 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 990 // CHECK: br label {{.+}} 991 992 // check stride 1 for 'for' in 'distribute parallel for' 993 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 994 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 995 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 996 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 997 998 // CHECK-DAG: call void @__kmpc_for_static_fini( 999 // CHECK: ret 1000 } 1001 1002 // dist_schedule: static no chunk 1003 #pragma omp target 1004 #pragma omp teams 1005 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1006 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1007 1008 #pragma omp distribute parallel for dist_schedule(static) 1009 for (int i = 0; i < n; ++i) { 1010 a[i] = b[i] + c[i]; 1011 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1012 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1013 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1014 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1015 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1016 1017 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1018 1019 // check EUB for distribute 1020 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1021 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1022 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1023 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1024 // CHECK-DAG: [[EUB_TRUE]]: 1025 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1026 // CHECK: br label %[[EUB_END:.+]] 1027 // CHECK-DAG: [[EUB_FALSE]]: 1028 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1029 // CHECK: br label %[[EUB_END]] 1030 // CHECK-DAG: [[EUB_END]]: 1031 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1032 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1033 1034 // initialize omp.iv 1035 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1036 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1037 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1038 1039 // check exit condition 1040 // CHECK: [[OMP_JUMP_BACK]]: 1041 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1042 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1043 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1044 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1045 1046 // check that PrevLB and PrevUB are passed to the 'for' 1047 // CHECK: [[DIST_BODY]]: 1048 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1049 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1050 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1051 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1052 // check that distlb and distub are properly passed to fork_call 1053 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1054 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1055 // CHECK: br label %[[DIST_INC:.+]] 1056 1057 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1058 // CHECK: [[DIST_INC]]: 1059 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1060 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1061 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1062 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1063 // CHECK: br label %[[OMP_JUMP_BACK]] 1064 1065 // CHECK-DAG: call void @__kmpc_for_static_fini( 1066 // CHECK: ret 1067 1068 // implementation of 'parallel for' 1069 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1070 1071 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1072 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1073 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1074 1075 // initialize lb and ub to PrevLB and PrevUB 1076 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1077 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1078 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1079 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1080 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1081 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1082 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1083 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1084 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1085 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1086 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1087 1088 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1089 // In this case we use EUB 1090 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1091 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1092 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1093 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1094 // CHECK: [[PF_EUB_TRUE]]: 1095 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1096 // CHECK: br label %[[PF_EUB_END:.+]] 1097 // CHECK-DAG: [[PF_EUB_FALSE]]: 1098 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1099 // CHECK: br label %[[PF_EUB_END]] 1100 // CHECK-DAG: [[PF_EUB_END]]: 1101 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1102 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1103 1104 // initialize omp.iv 1105 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1106 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1107 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1108 1109 // check exit condition 1110 // CHECK: [[OMP_PF_JUMP_BACK]]: 1111 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1112 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1113 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1114 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1115 1116 // check that PrevLB and PrevUB are passed to the 'for' 1117 // CHECK: [[PF_BODY]]: 1118 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1119 // CHECK: br label {{.+}} 1120 1121 // check stride 1 for 'for' in 'distribute parallel for' 1122 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1123 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1124 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1125 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1126 1127 // CHECK-DAG: call void @__kmpc_for_static_fini( 1128 // CHECK: ret 1129 } 1130 1131 // dist_schedule: static chunk 1132 #pragma omp target 1133 #pragma omp teams 1134 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1135 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1136 1137 #pragma omp distribute parallel for dist_schedule(static, ch) 1138 for (int i = 0; i < n; ++i) { 1139 a[i] = b[i] + c[i]; 1140 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1141 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1142 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1143 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1144 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1145 1146 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1147 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1148 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1149 1150 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1151 // check EUB for distribute 1152 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1153 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1154 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1155 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1156 // CHECK-DAG: [[EUB_TRUE]]: 1157 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1158 // CHECK: br label %[[EUB_END:.+]] 1159 // CHECK-DAG: [[EUB_FALSE]]: 1160 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1161 // CHECK: br label %[[EUB_END]] 1162 // CHECK-DAG: [[EUB_END]]: 1163 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1164 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1165 1166 // initialize omp.iv 1167 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1168 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1169 1170 // check exit condition 1171 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1172 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1173 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1174 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1175 1176 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1177 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1178 1179 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1180 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1181 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1182 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1183 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1184 1185 // check that PrevLB and PrevUB are passed to the 'for' 1186 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1187 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1188 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1189 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1190 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1191 // check that distlb and distub are properly passed to fork_call 1192 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1193 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1194 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1195 1196 // check DistInc 1197 // CHECK: [[DIST_INNER_LOOP_INC]]: 1198 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1199 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1200 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1201 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1202 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1203 1204 // CHECK: [[DIST_INNER_LOOP_END]]: 1205 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1206 1207 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1208 // check NextLB and NextUB 1209 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1210 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1211 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1212 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1213 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1214 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1215 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1216 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1217 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1218 1219 // outer loop exit 1220 // CHECK: [[DIST_OUTER_LOOP_END]]: 1221 // CHECK-DAG: call void @__kmpc_for_static_fini( 1222 // CHECK: ret 1223 1224 // skip implementation of 'parallel for': using default scheduling and was tested above 1225 } 1226 1227 // schedule: static no chunk 1228 #pragma omp target 1229 #pragma omp teams 1230 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1231 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1232 1233 #pragma omp distribute parallel for schedule(static) 1234 for (int i = 0; i < n; ++i) { 1235 a[i] = b[i] + c[i]; 1236 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1237 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1238 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1239 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1240 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1241 1242 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1243 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1244 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1245 // CHECK: ret 1246 1247 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1248 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1249 1250 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1251 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1252 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1253 1254 // initialize lb and ub to PrevLB and PrevUB 1255 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1256 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1257 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1258 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1259 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1260 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1261 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1262 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1263 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1264 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1265 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1266 1267 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1268 // In this case we use EUB 1269 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1270 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1271 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1272 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1273 // CHECK: [[PF_EUB_TRUE]]: 1274 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1275 // CHECK: br label %[[PF_EUB_END:.+]] 1276 // CHECK-DAG: [[PF_EUB_FALSE]]: 1277 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1278 // CHECK: br label %[[PF_EUB_END]] 1279 // CHECK-DAG: [[PF_EUB_END]]: 1280 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1281 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1282 1283 // initialize omp.iv 1284 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1285 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1286 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1287 1288 // check exit condition 1289 // CHECK: [[OMP_PF_JUMP_BACK]]: 1290 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1291 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1292 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1293 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1294 1295 // check that PrevLB and PrevUB are passed to the 'for' 1296 // CHECK: [[PF_BODY]]: 1297 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1298 // CHECK: br label {{.+}} 1299 1300 // check stride 1 for 'for' in 'distribute parallel for' 1301 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1302 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1303 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1304 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1305 1306 // CHECK-DAG: call void @__kmpc_for_static_fini( 1307 // CHECK: ret 1308 } 1309 1310 // schedule: static chunk 1311 #pragma omp target 1312 #pragma omp teams 1313 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 1314 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 1315 1316 #pragma omp distribute parallel for schedule(static, ch) 1317 for (int i = 0; i < n; ++i) { 1318 a[i] = b[i] + c[i]; 1319 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 1320 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1321 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 1322 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1323 // CHECK: ret 1324 1325 // 'parallel for' implementation using outer and inner loops and PrevEUB 1326 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1327 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1328 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1329 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1330 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1331 1332 // initialize lb and ub to PrevLB and PrevUB 1333 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1334 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1335 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1336 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1337 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1338 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1339 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1340 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1341 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1342 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1343 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1344 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1345 1346 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 1347 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1348 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1349 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 1350 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1351 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 1352 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 1353 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1354 // CHECK: [[PF_EUB_TRUE]]: 1355 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1356 // CHECK: br label %[[PF_EUB_END:.+]] 1357 // CHECK-DAG: [[PF_EUB_FALSE]]: 1358 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1359 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 1360 // CHECK: br label %[[PF_EUB_END]] 1361 // CHECK-DAG: [[PF_EUB_END]]: 1362 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 1363 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 1364 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 1365 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 1366 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 1367 1368 // initialize omp.iv (IV = LB) 1369 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1370 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1371 1372 // outer loop: while (IV < UB) { 1373 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1374 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1375 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1376 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1377 1378 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1379 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 1380 1381 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 1382 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1383 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1384 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1385 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1386 1387 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1388 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1389 // skip body branch 1390 // CHECK: br{{.+}} 1391 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1392 1393 // IV = IV + 1 and inner loop latch 1394 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1395 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1396 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1397 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1398 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 1399 1400 // check NextLB and NextUB 1401 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1402 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1403 1404 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1405 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1406 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1407 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 1408 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 1409 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1410 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1411 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 1412 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 1413 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1414 1415 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1416 // CHECK-DAG: call void @__kmpc_for_static_fini( 1417 // CHECK: ret 1418 } 1419 1420 // schedule: dynamic no chunk 1421 #pragma omp target 1422 #pragma omp teams 1423 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 1424 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 1425 1426 #pragma omp distribute parallel for schedule(dynamic) 1427 for (int i = 0; i < n; ++i) { 1428 a[i] = b[i] + c[i]; 1429 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 1430 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1431 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 1432 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1433 // CHECK: ret 1434 1435 // 'parallel for' implementation using outer and inner loops and PrevEUB 1436 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1437 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1438 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1439 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1440 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1441 1442 // initialize lb and ub to PrevLB and PrevUB 1443 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1444 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1445 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1446 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1447 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1448 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1449 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1450 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1451 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1452 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1453 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1454 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1455 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1456 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1457 1458 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1459 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1460 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1461 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1462 1463 // initialize omp.iv (IV = LB) 1464 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1465 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1466 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1467 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1468 1469 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1470 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1471 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1472 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1473 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1474 1475 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1476 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1477 // skip body branch 1478 // CHECK: br{{.+}} 1479 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1480 1481 // IV = IV + 1 and inner loop latch 1482 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1483 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1484 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1485 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1486 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1487 1488 // check NextLB and NextUB 1489 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1490 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1491 1492 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1493 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1494 1495 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1496 // CHECK: ret 1497 } 1498 1499 // schedule: dynamic chunk 1500 #pragma omp target 1501 #pragma omp teams 1502 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 1503 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 1504 1505 #pragma omp distribute parallel for schedule(dynamic, ch) 1506 for (int i = 0; i < n; ++i) { 1507 a[i] = b[i] + c[i]; 1508 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 1509 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1510 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 1511 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1512 // CHECK: ret 1513 1514 // 'parallel for' implementation using outer and inner loops and PrevEUB 1515 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1516 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1517 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1518 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1519 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1520 1521 // initialize lb and ub to PrevLB and PrevUB 1522 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1523 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1524 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1525 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1526 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1527 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1528 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1529 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1530 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1531 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1532 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1533 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1534 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1535 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1536 1537 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1538 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1539 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1540 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1541 1542 // initialize omp.iv (IV = LB) 1543 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1544 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1545 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1546 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1547 1548 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1549 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1550 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1551 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1552 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1553 1554 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1555 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1556 // skip body branch 1557 // CHECK: br{{.+}} 1558 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1559 1560 // IV = IV + 1 and inner loop latch 1561 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1562 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1563 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1564 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1565 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1566 1567 // check NextLB and NextUB 1568 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1569 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1570 1571 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1572 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1573 1574 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1575 // CHECK: ret 1576 } 1577 1578 return tmain<int>(); 1579 #endif 1580 } 1581 1582 // check code 1583 // CHECK: define{{.+}} [[TMAIN]]() 1584 1585 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1586 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 1587 1588 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1589 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 1590 1591 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1592 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 1593 1594 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1595 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 1596 1597 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1598 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 1599 1600 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1601 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 1602 1603 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1604 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 1605 1606 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( 1607 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 1608 1609 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 1610 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1611 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1612 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1613 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1614 1615 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1616 1617 // check EUB for distribute 1618 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1619 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1620 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1621 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1622 // CHECK-DAG: [[EUB_TRUE]]: 1623 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1624 // CHECK: br label %[[EUB_END:.+]] 1625 // CHECK-DAG: [[EUB_FALSE]]: 1626 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1627 // CHECK: br label %[[EUB_END]] 1628 // CHECK-DAG: [[EUB_END]]: 1629 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1630 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1631 1632 // initialize omp.iv 1633 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1634 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1635 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1636 1637 // check exit condition 1638 // CHECK: [[OMP_JUMP_BACK]]: 1639 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1640 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1641 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1642 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1643 1644 // check that PrevLB and PrevUB are passed to the 'for' 1645 // CHECK: [[DIST_BODY]]: 1646 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1647 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1648 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1649 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1650 // check that distlb and distub are properly passed to fork_call 1651 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1652 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1653 // CHECK: br label %[[DIST_INC:.+]] 1654 1655 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1656 // CHECK: [[DIST_INC]]: 1657 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1658 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1659 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1660 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1661 // CHECK: br label %[[OMP_JUMP_BACK]] 1662 1663 // CHECK-DAG: call void @__kmpc_for_static_fini( 1664 // CHECK: ret 1665 1666 // implementation of 'parallel for' 1667 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1668 1669 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1670 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1671 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1672 1673 // initialize lb and ub to PrevLB and PrevUB 1674 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1675 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1676 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1677 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1678 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1679 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1680 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1681 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1682 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1683 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1684 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1685 1686 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1687 // In this case we use EUB 1688 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1689 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1690 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1691 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1692 // CHECK: [[PF_EUB_TRUE]]: 1693 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1694 // CHECK: br label %[[PF_EUB_END:.+]] 1695 // CHECK-DAG: [[PF_EUB_FALSE]]: 1696 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1697 // CHECK: br label %[[PF_EUB_END]] 1698 // CHECK-DAG: [[PF_EUB_END]]: 1699 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1700 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1701 1702 // initialize omp.iv 1703 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1704 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1705 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1706 1707 // check exit condition 1708 // CHECK: [[OMP_PF_JUMP_BACK]]: 1709 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1710 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1711 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1712 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1713 1714 // check that PrevLB and PrevUB are passed to the 'for' 1715 // CHECK: [[PF_BODY]]: 1716 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1717 // CHECK: br label {{.+}} 1718 1719 // check stride 1 for 'for' in 'distribute parallel for' 1720 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1721 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1722 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1723 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1724 1725 // CHECK-DAG: call void @__kmpc_for_static_fini( 1726 // CHECK: ret 1727 1728 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1729 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1730 1731 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1732 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1733 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1734 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1735 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1736 1737 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1738 1739 // check EUB for distribute 1740 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1741 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1742 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1743 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1744 // CHECK-DAG: [[EUB_TRUE]]: 1745 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1746 // CHECK: br label %[[EUB_END:.+]] 1747 // CHECK-DAG: [[EUB_FALSE]]: 1748 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1749 // CHECK: br label %[[EUB_END]] 1750 // CHECK-DAG: [[EUB_END]]: 1751 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1752 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1753 1754 // initialize omp.iv 1755 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1756 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1757 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1758 1759 // check exit condition 1760 // CHECK: [[OMP_JUMP_BACK]]: 1761 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1762 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1763 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1764 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1765 1766 // check that PrevLB and PrevUB are passed to the 'for' 1767 // CHECK: [[DIST_BODY]]: 1768 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1769 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1770 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1771 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1772 // check that distlb and distub are properly passed to fork_call 1773 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1774 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1775 // CHECK: br label %[[DIST_INC:.+]] 1776 1777 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1778 // CHECK: [[DIST_INC]]: 1779 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1780 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1781 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1782 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1783 // CHECK: br label %[[OMP_JUMP_BACK]] 1784 1785 // CHECK-DAG: call void @__kmpc_for_static_fini( 1786 // CHECK: ret 1787 1788 // implementation of 'parallel for' 1789 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1790 1791 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1792 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1793 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1794 1795 // initialize lb and ub to PrevLB and PrevUB 1796 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1797 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1798 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1799 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1800 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1801 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1802 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1803 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1804 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1805 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1806 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1807 1808 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1809 // In this case we use EUB 1810 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1811 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1812 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1813 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1814 // CHECK: [[PF_EUB_TRUE]]: 1815 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1816 // CHECK: br label %[[PF_EUB_END:.+]] 1817 // CHECK-DAG: [[PF_EUB_FALSE]]: 1818 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1819 // CHECK: br label %[[PF_EUB_END]] 1820 // CHECK-DAG: [[PF_EUB_END]]: 1821 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1822 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1823 1824 // initialize omp.iv 1825 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1826 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1827 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1828 1829 // check exit condition 1830 // CHECK: [[OMP_PF_JUMP_BACK]]: 1831 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1832 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1833 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1834 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1835 1836 // check that PrevLB and PrevUB are passed to the 'for' 1837 // CHECK: [[PF_BODY]]: 1838 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1839 // CHECK: br label {{.+}} 1840 1841 // check stride 1 for 'for' in 'distribute parallel for' 1842 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1843 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1844 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1845 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1846 1847 // CHECK-DAG: call void @__kmpc_for_static_fini( 1848 // CHECK: ret 1849 1850 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1851 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1852 1853 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1854 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1855 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1856 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1857 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1858 1859 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1860 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1861 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1862 1863 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1864 // check EUB for distribute 1865 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1866 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1867 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1868 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1869 // CHECK-DAG: [[EUB_TRUE]]: 1870 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1871 // CHECK: br label %[[EUB_END:.+]] 1872 // CHECK-DAG: [[EUB_FALSE]]: 1873 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1874 // CHECK: br label %[[EUB_END]] 1875 // CHECK-DAG: [[EUB_END]]: 1876 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1877 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1878 1879 // initialize omp.iv 1880 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1881 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1882 1883 // check exit condition 1884 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1885 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1886 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1887 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1888 1889 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1890 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1891 1892 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1893 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1894 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1895 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1896 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1897 1898 // check that PrevLB and PrevUB are passed to the 'for' 1899 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1900 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1901 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1902 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1903 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1904 // check that distlb and distub are properly passed to fork_call 1905 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1906 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1907 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1908 1909 // check DistInc 1910 // CHECK: [[DIST_INNER_LOOP_INC]]: 1911 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1912 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1913 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1914 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1915 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1916 1917 // CHECK: [[DIST_INNER_LOOP_END]]: 1918 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1919 1920 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1921 // check NextLB and NextUB 1922 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1923 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1924 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1925 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1926 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1927 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1928 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1929 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1930 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1931 1932 // outer loop exit 1933 // CHECK: [[DIST_OUTER_LOOP_END]]: 1934 // CHECK-DAG: call void @__kmpc_for_static_fini( 1935 // CHECK: ret 1936 1937 // skip implementation of 'parallel for': using default scheduling and was tested above 1938 1939 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1940 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1941 1942 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1943 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1944 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1945 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1946 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1947 1948 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1949 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1950 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1951 // CHECK: ret 1952 1953 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1954 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1955 1956 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1957 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1958 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1959 1960 // initialize lb and ub to PrevLB and PrevUB 1961 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1962 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1963 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1964 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1965 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1966 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1967 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1968 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1969 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1970 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1971 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1972 1973 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1974 // In this case we use EUB 1975 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1976 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1977 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1978 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1979 // CHECK: [[PF_EUB_TRUE]]: 1980 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1981 // CHECK: br label %[[PF_EUB_END:.+]] 1982 // CHECK-DAG: [[PF_EUB_FALSE]]: 1983 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1984 // CHECK: br label %[[PF_EUB_END]] 1985 // CHECK-DAG: [[PF_EUB_END]]: 1986 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1987 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1988 1989 // initialize omp.iv 1990 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1991 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1992 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1993 1994 // check exit condition 1995 // CHECK: [[OMP_PF_JUMP_BACK]]: 1996 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1997 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1998 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1999 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 2000 2001 // check that PrevLB and PrevUB are passed to the 'for' 2002 // CHECK: [[PF_BODY]]: 2003 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2004 // CHECK: br label {{.+}} 2005 2006 // check stride 1 for 'for' in 'distribute parallel for' 2007 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 2008 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 2009 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 2010 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 2011 2012 // CHECK-DAG: call void @__kmpc_for_static_fini( 2013 // CHECK: ret 2014 2015 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 2016 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 2017 2018 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 2019 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2020 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 2021 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2022 // CHECK: ret 2023 2024 // 'parallel for' implementation using outer and inner loops and PrevEUB 2025 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2026 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2027 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2028 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2029 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2030 2031 // initialize lb and ub to PrevLB and PrevUB 2032 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2033 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2034 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2035 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2036 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2037 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2038 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2039 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2040 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2041 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2042 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2043 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2044 2045 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 2046 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2047 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2048 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 2049 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2050 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 2051 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 2052 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2053 // CHECK: [[PF_EUB_TRUE]]: 2054 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2055 // CHECK: br label %[[PF_EUB_END:.+]] 2056 // CHECK-DAG: [[PF_EUB_FALSE]]: 2057 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2058 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 2059 // CHECK: br label %[[PF_EUB_END]] 2060 // CHECK-DAG: [[PF_EUB_END]]: 2061 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 2062 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 2063 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 2064 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 2065 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 2066 2067 // initialize omp.iv (IV = LB) 2068 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2069 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2070 2071 // outer loop: while (IV < UB) { 2072 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2073 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2074 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2075 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2076 2077 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2078 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 2079 2080 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 2081 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2082 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2083 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2084 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2085 2086 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2087 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2088 // skip body branch 2089 // CHECK: br{{.+}} 2090 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2091 2092 // IV = IV + 1 and inner loop latch 2093 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2094 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2095 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2096 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2097 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 2098 2099 // check NextLB and NextUB 2100 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2101 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2102 2103 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2104 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2105 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2106 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 2107 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 2108 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2109 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2110 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 2111 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 2112 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2113 2114 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2115 // CHECK-DAG: call void @__kmpc_for_static_fini( 2116 // CHECK: ret 2117 2118 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 2119 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 2120 2121 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 2122 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2123 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 2124 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2125 // CHECK: ret 2126 2127 // 'parallel for' implementation using outer and inner loops and PrevEUB 2128 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2129 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2130 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2131 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2132 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2133 2134 // initialize lb and ub to PrevLB and PrevUB 2135 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2136 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2137 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2138 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2139 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2140 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2141 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2142 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2143 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2144 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2145 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2146 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2147 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2148 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2149 2150 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2151 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2152 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2153 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2154 2155 // initialize omp.iv (IV = LB) 2156 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2157 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2158 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2159 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2160 2161 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2162 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2163 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2164 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2165 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2166 2167 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2168 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2169 // skip body branch 2170 // CHECK: br{{.+}} 2171 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2172 2173 // IV = IV + 1 and inner loop latch 2174 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2175 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2176 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2177 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2178 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2179 2180 // check NextLB and NextUB 2181 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2182 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2183 2184 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2185 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2186 2187 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2188 // CHECK: ret 2189 2190 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 2191 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 2192 2193 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 2194 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2195 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 2196 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2197 // CHECK: ret 2198 2199 // 'parallel for' implementation using outer and inner loops and PrevEUB 2200 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2201 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2202 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2203 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2204 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2205 2206 // initialize lb and ub to PrevLB and PrevUB 2207 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2208 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2209 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2210 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2211 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2212 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2213 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2214 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2215 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2216 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2217 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2218 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2219 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2220 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2221 2222 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2223 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2224 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2225 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2226 2227 // initialize omp.iv (IV = LB) 2228 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2229 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2230 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2231 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2232 2233 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2234 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2235 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2236 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2237 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2238 2239 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2240 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2241 // skip body branch 2242 // CHECK: br{{.+}} 2243 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2244 2245 // IV = IV + 1 and inner loop latch 2246 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2247 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2248 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2249 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2250 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2251 2252 // check NextLB and NextUB 2253 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2254 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2255 2256 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2257 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2258 2259 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2260 // CHECK: ret 2261 #endif 2262