1 // Test host code gen 2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 8 9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 16 17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 23 24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} 31 // expected-no-diagnostics 32 #ifndef HEADER 33 #define HEADER 34 35 36 template <typename T> 37 T tmain() { 38 T *a, *b, *c; 39 int n = 10000; 40 int ch = 100; 41 42 // no schedule clauses 43 #pragma omp target 44 #pragma omp teams 45 #pragma omp distribute parallel for 46 for (int i = 0; i < n; ++i) { 47 #pragma omp cancel for 48 a[i] = b[i] + c[i]; 49 } 50 51 // dist_schedule: static no chunk 52 #pragma omp target 53 #pragma omp teams 54 #pragma omp distribute parallel for dist_schedule(static) 55 for (int i = 0; i < n; ++i) { 56 a[i] = b[i] + c[i]; 57 } 58 59 // dist_schedule: static chunk 60 #pragma omp target 61 #pragma omp teams 62 #pragma omp distribute parallel for dist_schedule(static, ch) 63 for (int i = 0; i < n; ++i) { 64 a[i] = b[i] + c[i]; 65 } 66 67 // schedule: static no chunk 68 #pragma omp target 69 #pragma omp teams 70 #pragma omp distribute parallel for schedule(static) 71 for (int i = 0; i < n; ++i) { 72 a[i] = b[i] + c[i]; 73 } 74 75 // schedule: static chunk 76 #pragma omp target 77 #pragma omp teams 78 #pragma omp distribute parallel for schedule(static, ch) 79 for (int i = 0; i < n; ++i) { 80 a[i] = b[i] + c[i]; 81 } 82 83 // schedule: dynamic no chunk 84 #pragma omp target 85 #pragma omp teams 86 #pragma omp distribute parallel for schedule(dynamic) 87 for (int i = 0; i < n; ++i) { 88 a[i] = b[i] + c[i]; 89 } 90 91 // schedule: dynamic chunk 92 #pragma omp target 93 #pragma omp teams 94 #pragma omp distribute parallel for schedule(dynamic, ch) 95 for (int i = 0; i < n; ++i) { 96 a[i] = b[i] + c[i]; 97 } 98 99 return T(); 100 } 101 102 int main() { 103 double *a, *b, *c; 104 int n = 10000; 105 int ch = 100; 106 107 #ifdef LAMBDA 108 // LAMBDA-LABEL: @main 109 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( 110 [&]() { 111 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( 112 113 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 114 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( 115 116 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 117 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( 118 119 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 120 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( 121 122 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 123 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( 124 125 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 126 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( 127 128 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 129 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( 130 131 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 132 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( 133 134 // no schedule clauses 135 #pragma omp target 136 #pragma omp teams 137 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( 138 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 139 140 #pragma omp distribute parallel for 141 for (int i = 0; i < n; ++i) { 142 a[i] = b[i] + c[i]; 143 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( 144 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 145 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 146 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 147 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 148 149 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 150 151 // check EUB for distribute 152 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 153 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 154 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 155 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 156 // LAMBDA-DAG: [[EUB_TRUE]]: 157 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 158 // LAMBDA: br label %[[EUB_END:.+]] 159 // LAMBDA-DAG: [[EUB_FALSE]]: 160 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 161 // LAMBDA: br label %[[EUB_END]] 162 // LAMBDA-DAG: [[EUB_END]]: 163 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 164 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 165 166 // initialize omp.iv 167 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 168 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 169 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 170 171 // check exit condition 172 // LAMBDA: [[OMP_JUMP_BACK]]: 173 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 174 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 175 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 176 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 177 178 // check that PrevLB and PrevUB are passed to the 'for' 179 // LAMBDA: [[DIST_BODY]]: 180 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 181 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 182 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 183 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 184 // check that distlb and distub are properly passed to fork_call 185 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 186 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 187 // LAMBDA: br label %[[DIST_INC:.+]] 188 189 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 190 // LAMBDA: [[DIST_INC]]: 191 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 192 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 193 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 194 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 195 // LAMBDA: br label %[[OMP_JUMP_BACK]] 196 197 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 198 // LAMBDA: ret 199 200 // implementation of 'parallel for' 201 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 202 203 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 204 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 205 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 206 207 // initialize lb and ub to PrevLB and PrevUB 208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 209 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 210 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 211 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 212 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 213 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 214 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 215 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 216 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 217 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 218 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 219 220 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 221 // In this case we use EUB 222 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 223 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 224 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 225 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 226 // LAMBDA: [[PF_EUB_TRUE]]: 227 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 228 // LAMBDA: br label %[[PF_EUB_END:.+]] 229 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 230 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 231 // LAMBDA: br label %[[PF_EUB_END]] 232 // LAMBDA-DAG: [[PF_EUB_END]]: 233 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 234 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 235 236 // initialize omp.iv 237 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 238 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 239 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 240 241 // check exit condition 242 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 243 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 244 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 245 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 246 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 247 248 // check that PrevLB and PrevUB are passed to the 'for' 249 // LAMBDA: [[PF_BODY]]: 250 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 251 // LAMBDA: br label {{.+}} 252 253 // check stride 1 for 'for' in 'distribute parallel for' 254 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 255 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 256 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 257 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 258 259 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 260 // LAMBDA: ret 261 262 [&]() { 263 a[i] = b[i] + c[i]; 264 }(); 265 } 266 267 // dist_schedule: static no chunk (same sa default - no dist_schedule) 268 #pragma omp target 269 #pragma omp teams 270 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( 271 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 272 273 #pragma omp distribute parallel for dist_schedule(static) 274 for (int i = 0; i < n; ++i) { 275 a[i] = b[i] + c[i]; 276 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( 277 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 278 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 279 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 280 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 281 282 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 283 284 // check EUB for distribute 285 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 286 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 287 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 288 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 289 // LAMBDA-DAG: [[EUB_TRUE]]: 290 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 291 // LAMBDA: br label %[[EUB_END:.+]] 292 // LAMBDA-DAG: [[EUB_FALSE]]: 293 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 294 // LAMBDA: br label %[[EUB_END]] 295 // LAMBDA-DAG: [[EUB_END]]: 296 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 297 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 298 299 // initialize omp.iv 300 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 301 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 302 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 303 304 // check exit condition 305 // LAMBDA: [[OMP_JUMP_BACK]]: 306 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 307 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 308 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 309 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 310 311 // check that PrevLB and PrevUB are passed to the 'for' 312 // LAMBDA: [[DIST_BODY]]: 313 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 314 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 315 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 316 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 317 // check that distlb and distub are properly passed to fork_call 318 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 319 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 320 // LAMBDA: br label %[[DIST_INC:.+]] 321 322 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 323 // LAMBDA: [[DIST_INC]]: 324 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 325 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 326 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 327 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 328 // LAMBDA: br label %[[OMP_JUMP_BACK]] 329 330 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 331 // LAMBDA: ret 332 333 // implementation of 'parallel for' 334 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 335 336 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 337 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 338 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 339 340 // initialize lb and ub to PrevLB and PrevUB 341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 342 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 343 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 344 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 345 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 346 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 347 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 348 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 349 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 350 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 351 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 352 353 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 354 // In this case we use EUB 355 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 356 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 357 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 358 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 359 // LAMBDA: [[PF_EUB_TRUE]]: 360 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 361 // LAMBDA: br label %[[PF_EUB_END:.+]] 362 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 363 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 364 // LAMBDA: br label %[[PF_EUB_END]] 365 // LAMBDA-DAG: [[PF_EUB_END]]: 366 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 367 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 368 369 // initialize omp.iv 370 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 371 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 372 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 373 374 // check exit condition 375 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 376 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 377 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 378 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 379 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 380 381 // check that PrevLB and PrevUB are passed to the 'for' 382 // LAMBDA: [[PF_BODY]]: 383 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 384 // LAMBDA: br label {{.+}} 385 386 // check stride 1 for 'for' in 'distribute parallel for' 387 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 388 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 389 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 390 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 391 392 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 393 // LAMBDA: ret 394 [&]() { 395 a[i] = b[i] + c[i]; 396 }(); 397 } 398 399 // dist_schedule: static chunk 400 #pragma omp target 401 #pragma omp teams 402 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( 403 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 404 405 #pragma omp distribute parallel for dist_schedule(static, ch) 406 for (int i = 0; i < n; ++i) { 407 a[i] = b[i] + c[i]; 408 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( 409 // LAMBDA: alloca 410 // LAMBDA: alloca 411 // LAMBDA: alloca 412 // LAMBDA: alloca 413 // LAMBDA: alloca 414 // LAMBDA: alloca 415 // LAMBDA: alloca 416 // LAMBDA: [[OMP_IV:%.+]] = alloca 417 // LAMBDA: alloca 418 // LAMBDA: alloca 419 // LAMBDA: alloca 420 // LAMBDA: alloca 421 // LAMBDA: [[OMP_LB:%.+]] = alloca 422 // LAMBDA: [[OMP_UB:%.+]] = alloca 423 // LAMBDA: [[OMP_ST:%.+]] = alloca 424 425 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 426 427 // check EUB for distribute 428 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 429 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} 430 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 431 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 432 // LAMBDA-DAG: [[EUB_TRUE]]: 433 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 434 // LAMBDA: br label %[[EUB_END:.+]] 435 // LAMBDA-DAG: [[EUB_FALSE]]: 436 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 437 // LAMBDA: br label %[[EUB_END]] 438 // LAMBDA-DAG: [[EUB_END]]: 439 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 440 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 441 442 // initialize omp.iv 443 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 444 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 445 446 // check exit condition 447 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 448 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} 449 // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 450 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] 451 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 452 453 // check that PrevLB and PrevUB are passed to the 'for' 454 // LAMBDA: [[DIST_INNER_LOOP_BODY]]: 455 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 456 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 457 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 458 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 459 // check that distlb and distub are properly passed to fork_call 460 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 461 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 462 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] 463 464 // check DistInc 465 // LAMBDA: [[DIST_INNER_LOOP_INC]]: 466 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 467 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 468 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 469 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 470 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 471 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 472 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 473 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 474 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 475 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 476 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 477 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 478 479 // Update UB 480 // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 481 // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} 482 // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] 483 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] 484 // LAMBDA-DAG: [[EUB_TRUE_1]]: 485 // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} 486 // LAMBDA: br label %[[EUB_END_1:.+]] 487 // LAMBDA-DAG: [[EUB_FALSE_1]]: 488 // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], 489 // LAMBDA: br label %[[EUB_END_1]] 490 // LAMBDA-DAG: [[EUB_END_1]]: 491 // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] 492 // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], 493 494 // Store LB in IV 495 // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 496 // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], 497 498 // LAMBDA: [[DIST_INNER_LOOP_END]]: 499 // LAMBDA: br label %[[LOOP_EXIT:.+]] 500 501 // loop exit 502 // LAMBDA: [[LOOP_EXIT]]: 503 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 504 // LAMBDA: ret 505 506 // skip implementation of 'parallel for': using default scheduling and was tested above 507 [&]() { 508 a[i] = b[i] + c[i]; 509 }(); 510 } 511 512 // schedule: static no chunk 513 #pragma omp target 514 #pragma omp teams 515 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( 516 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 517 518 #pragma omp distribute parallel for schedule(static) 519 for (int i = 0; i < n; ++i) { 520 a[i] = b[i] + c[i]; 521 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( 522 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 523 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 524 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 525 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 526 527 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 528 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 529 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 530 // LAMBDA: ret 531 532 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 533 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 534 535 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 536 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 537 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 538 539 // initialize lb and ub to PrevLB and PrevUB 540 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 541 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 542 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 543 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 544 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 545 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 546 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 547 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 548 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 549 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 550 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 551 552 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 553 // In this case we use EUB 554 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 555 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 556 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 557 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 558 // LAMBDA: [[PF_EUB_TRUE]]: 559 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 560 // LAMBDA: br label %[[PF_EUB_END:.+]] 561 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 562 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 563 // LAMBDA: br label %[[PF_EUB_END]] 564 // LAMBDA-DAG: [[PF_EUB_END]]: 565 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 566 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 567 568 // initialize omp.iv 569 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 570 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 571 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 572 573 // check exit condition 574 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 575 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 576 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 577 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 578 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 579 580 // check that PrevLB and PrevUB are passed to the 'for' 581 // LAMBDA: [[PF_BODY]]: 582 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 583 // LAMBDA: br label {{.+}} 584 585 // check stride 1 for 'for' in 'distribute parallel for' 586 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 587 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 588 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 589 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 590 591 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 592 // LAMBDA: ret 593 594 [&]() { 595 a[i] = b[i] + c[i]; 596 }(); 597 } 598 599 // schedule: static chunk 600 #pragma omp target 601 #pragma omp teams 602 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( 603 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 604 605 #pragma omp distribute parallel for schedule(static, ch) 606 for (int i = 0; i < n; ++i) { 607 a[i] = b[i] + c[i]; 608 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( 609 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 610 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 611 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 612 // LAMBDA: ret 613 614 // 'parallel for' implementation using outer and inner loops and PrevEUB 615 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 616 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 617 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 618 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 619 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 620 621 // initialize lb and ub to PrevLB and PrevUB 622 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 623 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 624 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 625 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 626 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 627 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 628 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 629 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 630 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 631 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 632 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 633 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 634 635 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 636 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 637 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 638 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 639 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 640 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 641 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 642 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 643 // LAMBDA: [[PF_EUB_TRUE]]: 644 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 645 // LAMBDA: br label %[[PF_EUB_END:.+]] 646 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 647 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 648 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 649 // LAMBDA: br label %[[PF_EUB_END]] 650 // LAMBDA-DAG: [[PF_EUB_END]]: 651 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 652 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 653 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 654 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 655 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 656 657 // initialize omp.iv (IV = LB) 658 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 659 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 660 661 // outer loop: while (IV < UB) { 662 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 663 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 664 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 665 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 666 667 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 668 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 669 670 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: 671 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 672 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 673 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 674 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 675 676 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 677 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 678 // skip body branch 679 // LAMBDA: br{{.+}} 680 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 681 682 // IV = IV + 1 and inner loop latch 683 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 684 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 685 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 686 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 687 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] 688 689 // check NextLB and NextUB 690 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 691 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 692 693 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 694 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 695 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 696 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 697 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 698 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 699 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 700 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 701 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 702 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 703 704 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 705 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 706 // LAMBDA: ret 707 [&]() { 708 a[i] = b[i] + c[i]; 709 }(); 710 } 711 712 // schedule: dynamic no chunk 713 #pragma omp target 714 #pragma omp teams 715 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( 716 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 717 718 #pragma omp distribute parallel for schedule(dynamic) 719 for (int i = 0; i < n; ++i) { 720 a[i] = b[i] + c[i]; 721 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( 722 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 723 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 724 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 725 // LAMBDA: ret 726 727 // 'parallel for' implementation using outer and inner loops and PrevEUB 728 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 729 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 730 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 731 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 732 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 733 734 // initialize lb and ub to PrevLB and PrevUB 735 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 736 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 737 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 738 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 739 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 740 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 741 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 742 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 743 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 744 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 745 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 746 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 747 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 748 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 749 750 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 751 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 752 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 753 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 754 755 // initialize omp.iv (IV = LB) 756 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 757 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 758 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 759 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 760 761 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 762 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 763 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 764 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 765 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 766 767 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 768 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 769 // skip body branch 770 // LAMBDA: br{{.+}} 771 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 772 773 // IV = IV + 1 and inner loop latch 774 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 775 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 776 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 777 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 778 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 779 780 // check NextLB and NextUB 781 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 782 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 783 784 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 785 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 786 787 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 788 // LAMBDA: ret 789 [&]() { 790 a[i] = b[i] + c[i]; 791 }(); 792 } 793 794 // schedule: dynamic chunk 795 #pragma omp target 796 #pragma omp teams 797 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( 798 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 799 800 #pragma omp distribute parallel for schedule(dynamic, ch) 801 for (int i = 0; i < n; ++i) { 802 a[i] = b[i] + c[i]; 803 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( 804 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 805 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 806 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 807 // LAMBDA: ret 808 809 // 'parallel for' implementation using outer and inner loops and PrevEUB 810 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 811 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 812 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 813 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 814 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 815 816 // initialize lb and ub to PrevLB and PrevUB 817 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 818 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 819 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 820 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 821 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 822 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 823 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 824 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 825 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 826 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 827 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 828 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 829 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 830 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 831 832 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 833 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 834 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 835 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 836 837 // initialize omp.iv (IV = LB) 838 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 839 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 840 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 841 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 842 843 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 844 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 845 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 846 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 847 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 848 849 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 850 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 851 // skip body branch 852 // LAMBDA: br{{.+}} 853 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 854 855 // IV = IV + 1 and inner loop latch 856 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 857 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 858 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 859 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 860 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 861 862 // check NextLB and NextUB 863 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 864 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 865 866 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 867 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 868 869 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 870 // LAMBDA: ret 871 [&]() { 872 a[i] = b[i] + c[i]; 873 }(); 874 } 875 }(); 876 return 0; 877 #else 878 // CHECK-LABEL: @main 879 880 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 881 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 882 883 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 884 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 885 886 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 887 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 888 889 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 890 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 891 892 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 893 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 894 895 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 896 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 897 898 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 899 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 900 901 // CHECK: call{{.+}} [[TMAIN:@.+]]() 902 903 // no schedule clauses 904 #pragma omp target 905 #pragma omp teams 906 // CHECK: define internal void [[OFFLOADING_FUN_1]]( 907 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 908 909 #pragma omp distribute parallel for 910 for (int i = 0; i < n; ++i) { 911 a[i] = b[i] + c[i]; 912 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 913 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 914 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 915 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 916 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 917 918 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 919 920 // check EUB for distribute 921 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 922 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 923 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 924 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 925 // CHECK-DAG: [[EUB_TRUE]]: 926 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 927 // CHECK: br label %[[EUB_END:.+]] 928 // CHECK-DAG: [[EUB_FALSE]]: 929 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 930 // CHECK: br label %[[EUB_END]] 931 // CHECK-DAG: [[EUB_END]]: 932 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 933 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 934 935 // initialize omp.iv 936 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 937 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 938 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 939 940 // check exit condition 941 // CHECK: [[OMP_JUMP_BACK]]: 942 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 943 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 944 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 945 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 946 947 // check that PrevLB and PrevUB are passed to the 'for' 948 // CHECK: [[DIST_BODY]]: 949 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 950 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 951 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 952 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 953 // check that distlb and distub are properly passed to fork_call 954 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 955 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 956 // CHECK: br label %[[DIST_INC:.+]] 957 958 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 959 // CHECK: [[DIST_INC]]: 960 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 961 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 962 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 963 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 964 // CHECK: br label %[[OMP_JUMP_BACK]] 965 966 // CHECK-DAG: call void @__kmpc_for_static_fini( 967 // CHECK: ret 968 969 // implementation of 'parallel for' 970 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 971 972 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 973 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 974 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 975 976 // initialize lb and ub to PrevLB and PrevUB 977 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 978 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 979 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 980 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 981 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 982 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 983 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 984 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 985 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 986 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 987 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 988 989 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 990 // In this case we use EUB 991 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 992 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 993 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 994 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 995 // CHECK: [[PF_EUB_TRUE]]: 996 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 997 // CHECK: br label %[[PF_EUB_END:.+]] 998 // CHECK-DAG: [[PF_EUB_FALSE]]: 999 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1000 // CHECK: br label %[[PF_EUB_END]] 1001 // CHECK-DAG: [[PF_EUB_END]]: 1002 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1003 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1004 1005 // initialize omp.iv 1006 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1007 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1008 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1009 1010 // check exit condition 1011 // CHECK: [[OMP_PF_JUMP_BACK]]: 1012 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1013 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1014 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1015 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1016 1017 // check that PrevLB and PrevUB are passed to the 'for' 1018 // CHECK: [[PF_BODY]]: 1019 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1020 // CHECK: br label {{.+}} 1021 1022 // check stride 1 for 'for' in 'distribute parallel for' 1023 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1024 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1025 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1026 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1027 1028 // CHECK-DAG: call void @__kmpc_for_static_fini( 1029 // CHECK: ret 1030 } 1031 1032 // dist_schedule: static no chunk 1033 #pragma omp target 1034 #pragma omp teams 1035 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1036 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1037 1038 #pragma omp distribute parallel for dist_schedule(static) 1039 for (int i = 0; i < n; ++i) { 1040 a[i] = b[i] + c[i]; 1041 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1042 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1043 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1044 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1045 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1046 1047 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1048 1049 // check EUB for distribute 1050 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1051 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1052 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1053 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1054 // CHECK-DAG: [[EUB_TRUE]]: 1055 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1056 // CHECK: br label %[[EUB_END:.+]] 1057 // CHECK-DAG: [[EUB_FALSE]]: 1058 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1059 // CHECK: br label %[[EUB_END]] 1060 // CHECK-DAG: [[EUB_END]]: 1061 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1062 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1063 1064 // initialize omp.iv 1065 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1066 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1067 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1068 1069 // check exit condition 1070 // CHECK: [[OMP_JUMP_BACK]]: 1071 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1072 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1073 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1074 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1075 1076 // check that PrevLB and PrevUB are passed to the 'for' 1077 // CHECK: [[DIST_BODY]]: 1078 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1079 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1080 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1081 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1082 // check that distlb and distub are properly passed to fork_call 1083 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1084 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1085 // CHECK: br label %[[DIST_INC:.+]] 1086 1087 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1088 // CHECK: [[DIST_INC]]: 1089 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1090 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1091 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1092 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1093 // CHECK: br label %[[OMP_JUMP_BACK]] 1094 1095 // CHECK-DAG: call void @__kmpc_for_static_fini( 1096 // CHECK: ret 1097 1098 // implementation of 'parallel for' 1099 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1100 1101 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1102 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1103 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1104 1105 // initialize lb and ub to PrevLB and PrevUB 1106 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1107 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1108 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1109 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1110 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1111 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1112 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1113 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1114 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1115 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1116 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1117 1118 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1119 // In this case we use EUB 1120 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1121 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1122 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1123 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1124 // CHECK: [[PF_EUB_TRUE]]: 1125 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1126 // CHECK: br label %[[PF_EUB_END:.+]] 1127 // CHECK-DAG: [[PF_EUB_FALSE]]: 1128 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1129 // CHECK: br label %[[PF_EUB_END]] 1130 // CHECK-DAG: [[PF_EUB_END]]: 1131 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1132 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1133 1134 // initialize omp.iv 1135 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1136 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1137 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1138 1139 // check exit condition 1140 // CHECK: [[OMP_PF_JUMP_BACK]]: 1141 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1142 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1143 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1144 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1145 1146 // check that PrevLB and PrevUB are passed to the 'for' 1147 // CHECK: [[PF_BODY]]: 1148 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1149 // CHECK: br label {{.+}} 1150 1151 // check stride 1 for 'for' in 'distribute parallel for' 1152 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1153 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1154 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1155 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1156 1157 // CHECK-DAG: call void @__kmpc_for_static_fini( 1158 // CHECK: ret 1159 } 1160 1161 // dist_schedule: static chunk 1162 #pragma omp target 1163 #pragma omp teams 1164 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1165 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1166 1167 #pragma omp distribute parallel for dist_schedule(static, ch) 1168 for (int i = 0; i < n; ++i) { 1169 a[i] = b[i] + c[i]; 1170 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1171 // CHECK: alloca 1172 // CHECK: alloca 1173 // CHECK: alloca 1174 // CHECK: alloca 1175 // CHECK: alloca 1176 // CHECK: alloca 1177 // CHECK: alloca 1178 // CHECK: [[OMP_IV:%.+]] = alloca 1179 // CHECK: alloca 1180 // CHECK: alloca 1181 // CHECK: alloca 1182 // CHECK: alloca 1183 // CHECK: [[OMP_LB:%.+]] = alloca 1184 // CHECK: [[OMP_UB:%.+]] = alloca 1185 // CHECK: [[OMP_ST:%.+]] = alloca 1186 1187 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1188 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1189 1190 // check EUB for distribute 1191 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1192 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} 1193 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1194 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1195 // CHECK-DAG: [[EUB_TRUE]]: 1196 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1197 // CHECK: br label %[[EUB_END:.+]] 1198 // CHECK-DAG: [[EUB_FALSE]]: 1199 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1200 // CHECK: br label %[[EUB_END]] 1201 // CHECK-DAG: [[EUB_END]]: 1202 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1203 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1204 1205 // initialize omp.iv 1206 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1207 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1208 1209 // check exit condition 1210 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1211 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} 1212 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 1213 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] 1214 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1215 1216 // check that PrevLB and PrevUB are passed to the 'for' 1217 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1218 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1219 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1220 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1221 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1222 // check that distlb and distub are properly passed to fork_call 1223 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1224 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1225 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1226 1227 // check DistInc 1228 // CHECK: [[DIST_INNER_LOOP_INC]]: 1229 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1230 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1231 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1232 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1233 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1234 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1235 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1236 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1237 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1238 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1239 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1240 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1241 1242 // Update UB 1243 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1244 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} 1245 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] 1246 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] 1247 // CHECK-DAG: [[EUB_TRUE_1]]: 1248 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} 1249 // CHECK: br label %[[EUB_END_1:.+]] 1250 // CHECK-DAG: [[EUB_FALSE_1]]: 1251 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], 1252 // CHECK: br label %[[EUB_END_1]] 1253 // CHECK-DAG: [[EUB_END_1]]: 1254 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] 1255 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], 1256 1257 // Store LB in IV 1258 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1259 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], 1260 1261 // CHECK: [[DIST_INNER_LOOP_END]]: 1262 // CHECK: br label %[[LOOP_EXIT:.+]] 1263 1264 // loop exit 1265 // CHECK: [[LOOP_EXIT]]: 1266 // CHECK-DAG: call void @__kmpc_for_static_fini( 1267 // CHECK: ret 1268 1269 // skip implementation of 'parallel for': using default scheduling and was tested above 1270 } 1271 1272 // schedule: static no chunk 1273 #pragma omp target 1274 #pragma omp teams 1275 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1276 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1277 1278 #pragma omp distribute parallel for schedule(static) 1279 for (int i = 0; i < n; ++i) { 1280 a[i] = b[i] + c[i]; 1281 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1282 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1283 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1284 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1285 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1286 1287 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1288 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1289 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1290 // CHECK: ret 1291 1292 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1293 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1294 1295 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1296 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1297 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1298 1299 // initialize lb and ub to PrevLB and PrevUB 1300 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1301 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1302 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1303 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1304 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1305 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1306 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1307 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1308 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1309 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1310 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1311 1312 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1313 // In this case we use EUB 1314 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1315 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1316 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1317 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1318 // CHECK: [[PF_EUB_TRUE]]: 1319 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1320 // CHECK: br label %[[PF_EUB_END:.+]] 1321 // CHECK-DAG: [[PF_EUB_FALSE]]: 1322 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1323 // CHECK: br label %[[PF_EUB_END]] 1324 // CHECK-DAG: [[PF_EUB_END]]: 1325 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1326 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1327 1328 // initialize omp.iv 1329 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1330 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1331 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1332 1333 // check exit condition 1334 // CHECK: [[OMP_PF_JUMP_BACK]]: 1335 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1336 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1337 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1338 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1339 1340 // check that PrevLB and PrevUB are passed to the 'for' 1341 // CHECK: [[PF_BODY]]: 1342 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1343 // CHECK: br label {{.+}} 1344 1345 // check stride 1 for 'for' in 'distribute parallel for' 1346 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1347 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1348 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1349 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1350 1351 // CHECK-DAG: call void @__kmpc_for_static_fini( 1352 // CHECK: ret 1353 } 1354 1355 // schedule: static chunk 1356 #pragma omp target 1357 #pragma omp teams 1358 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 1359 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 1360 1361 #pragma omp distribute parallel for schedule(static, ch) 1362 for (int i = 0; i < n; ++i) { 1363 a[i] = b[i] + c[i]; 1364 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 1365 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1366 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 1367 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1368 // CHECK: ret 1369 1370 // 'parallel for' implementation using outer and inner loops and PrevEUB 1371 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1372 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1373 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1374 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1375 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1376 1377 // initialize lb and ub to PrevLB and PrevUB 1378 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1379 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1380 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1381 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1382 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1383 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1384 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1385 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1386 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1387 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1388 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1389 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1390 1391 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 1392 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1393 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1394 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 1395 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1396 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 1397 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 1398 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1399 // CHECK: [[PF_EUB_TRUE]]: 1400 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1401 // CHECK: br label %[[PF_EUB_END:.+]] 1402 // CHECK-DAG: [[PF_EUB_FALSE]]: 1403 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1404 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 1405 // CHECK: br label %[[PF_EUB_END]] 1406 // CHECK-DAG: [[PF_EUB_END]]: 1407 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 1408 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 1409 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 1410 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 1411 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 1412 1413 // initialize omp.iv (IV = LB) 1414 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1415 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1416 1417 // outer loop: while (IV < UB) { 1418 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1419 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1420 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1421 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1422 1423 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1424 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 1425 1426 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 1427 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1428 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1429 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1430 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1431 1432 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1433 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1434 // skip body branch 1435 // CHECK: br{{.+}} 1436 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1437 1438 // IV = IV + 1 and inner loop latch 1439 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1440 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1441 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1442 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1443 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 1444 1445 // check NextLB and NextUB 1446 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1447 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1448 1449 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1450 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1451 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1452 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 1453 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 1454 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1455 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1456 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 1457 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 1458 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1459 1460 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1461 // CHECK-DAG: call void @__kmpc_for_static_fini( 1462 // CHECK: ret 1463 } 1464 1465 // schedule: dynamic no chunk 1466 #pragma omp target 1467 #pragma omp teams 1468 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 1469 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 1470 1471 #pragma omp distribute parallel for schedule(dynamic) 1472 for (int i = 0; i < n; ++i) { 1473 a[i] = b[i] + c[i]; 1474 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 1475 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1476 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 1477 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1478 // CHECK: ret 1479 1480 // 'parallel for' implementation using outer and inner loops and PrevEUB 1481 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1482 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1483 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1484 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1485 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1486 1487 // initialize lb and ub to PrevLB and PrevUB 1488 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1489 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1490 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1491 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1492 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1493 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1494 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1495 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1496 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1497 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1498 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1499 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1500 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1501 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1502 1503 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1504 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1505 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1506 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1507 1508 // initialize omp.iv (IV = LB) 1509 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1510 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1511 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1512 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1513 1514 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1515 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1516 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1517 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1518 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1519 1520 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1521 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1522 // skip body branch 1523 // CHECK: br{{.+}} 1524 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1525 1526 // IV = IV + 1 and inner loop latch 1527 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1528 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1529 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1530 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1531 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1532 1533 // check NextLB and NextUB 1534 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1535 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1536 1537 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1538 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1539 1540 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1541 // CHECK: ret 1542 } 1543 1544 // schedule: dynamic chunk 1545 #pragma omp target 1546 #pragma omp teams 1547 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 1548 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 1549 1550 #pragma omp distribute parallel for schedule(dynamic, ch) 1551 for (int i = 0; i < n; ++i) { 1552 a[i] = b[i] + c[i]; 1553 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 1554 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1555 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 1556 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1557 // CHECK: ret 1558 1559 // 'parallel for' implementation using outer and inner loops and PrevEUB 1560 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1561 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1562 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1563 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1564 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1565 1566 // initialize lb and ub to PrevLB and PrevUB 1567 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1568 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1569 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1570 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1571 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1572 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1573 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1574 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1575 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1576 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1577 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1578 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1579 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1580 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1581 1582 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1583 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1584 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1585 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1586 1587 // initialize omp.iv (IV = LB) 1588 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1589 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1590 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1591 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1592 1593 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1594 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1595 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1596 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1597 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1598 1599 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1600 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1601 // skip body branch 1602 // CHECK: br{{.+}} 1603 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1604 1605 // IV = IV + 1 and inner loop latch 1606 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1607 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1608 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1609 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1610 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1611 1612 // check NextLB and NextUB 1613 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1614 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1615 1616 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1617 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1618 1619 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1620 // CHECK: ret 1621 } 1622 1623 return tmain<int>(); 1624 #endif 1625 } 1626 1627 // check code 1628 // CHECK: define{{.+}} [[TMAIN]]() 1629 1630 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1631 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 1632 1633 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1634 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 1635 1636 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1637 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 1638 1639 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1640 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 1641 1642 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1643 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 1644 1645 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1646 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 1647 1648 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1649 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 1650 1651 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( 1652 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 1653 1654 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 1655 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1656 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1657 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1658 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1659 1660 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1661 1662 // check EUB for distribute 1663 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1664 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1665 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1666 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1667 // CHECK-DAG: [[EUB_TRUE]]: 1668 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1669 // CHECK: br label %[[EUB_END:.+]] 1670 // CHECK-DAG: [[EUB_FALSE]]: 1671 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1672 // CHECK: br label %[[EUB_END]] 1673 // CHECK-DAG: [[EUB_END]]: 1674 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1675 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1676 1677 // initialize omp.iv 1678 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1679 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1680 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1681 1682 // check exit condition 1683 // CHECK: [[OMP_JUMP_BACK]]: 1684 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1685 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1686 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1687 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1688 1689 // check that PrevLB and PrevUB are passed to the 'for' 1690 // CHECK: [[DIST_BODY]]: 1691 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1692 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1693 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1694 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1695 // check that distlb and distub are properly passed to fork_call 1696 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1697 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1698 // CHECK: br label %[[DIST_INC:.+]] 1699 1700 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1701 // CHECK: [[DIST_INC]]: 1702 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1703 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1704 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1705 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1706 // CHECK: br label %[[OMP_JUMP_BACK]] 1707 1708 // CHECK-DAG: call void @__kmpc_for_static_fini( 1709 // CHECK: ret 1710 1711 // implementation of 'parallel for' 1712 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1713 1714 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1715 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1716 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1717 1718 // initialize lb and ub to PrevLB and PrevUB 1719 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1720 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1721 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1722 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1723 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1724 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1725 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1726 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1727 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1728 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1729 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1730 1731 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1732 // In this case we use EUB 1733 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1734 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1735 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1736 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1737 // CHECK: [[PF_EUB_TRUE]]: 1738 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1739 // CHECK: br label %[[PF_EUB_END:.+]] 1740 // CHECK-DAG: [[PF_EUB_FALSE]]: 1741 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1742 // CHECK: br label %[[PF_EUB_END]] 1743 // CHECK-DAG: [[PF_EUB_END]]: 1744 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1745 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1746 1747 // initialize omp.iv 1748 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1749 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1750 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1751 1752 // check exit condition 1753 // CHECK: [[OMP_PF_JUMP_BACK]]: 1754 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1755 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1756 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1757 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1758 1759 // check that PrevLB and PrevUB are passed to the 'for' 1760 // CHECK: [[PF_BODY]]: 1761 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1762 // CHECK: br label {{.+}} 1763 1764 // check stride 1 for 'for' in 'distribute parallel for' 1765 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1766 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1767 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1768 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1769 1770 // CHECK-DAG: call void @__kmpc_for_static_fini( 1771 // CHECK: ret 1772 1773 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1774 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1775 1776 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1777 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1778 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1779 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1780 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1781 1782 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1783 1784 // check EUB for distribute 1785 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1786 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1787 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1788 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1789 // CHECK-DAG: [[EUB_TRUE]]: 1790 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1791 // CHECK: br label %[[EUB_END:.+]] 1792 // CHECK-DAG: [[EUB_FALSE]]: 1793 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1794 // CHECK: br label %[[EUB_END]] 1795 // CHECK-DAG: [[EUB_END]]: 1796 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1797 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1798 1799 // initialize omp.iv 1800 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1801 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1802 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1803 1804 // check exit condition 1805 // CHECK: [[OMP_JUMP_BACK]]: 1806 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1807 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1808 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1809 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1810 1811 // check that PrevLB and PrevUB are passed to the 'for' 1812 // CHECK: [[DIST_BODY]]: 1813 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1814 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1815 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1816 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1817 // check that distlb and distub are properly passed to fork_call 1818 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1819 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1820 // CHECK: br label %[[DIST_INC:.+]] 1821 1822 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1823 // CHECK: [[DIST_INC]]: 1824 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1825 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1826 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1827 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1828 // CHECK: br label %[[OMP_JUMP_BACK]] 1829 1830 // CHECK-DAG: call void @__kmpc_for_static_fini( 1831 // CHECK: ret 1832 1833 // implementation of 'parallel for' 1834 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1835 1836 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1837 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1838 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1839 1840 // initialize lb and ub to PrevLB and PrevUB 1841 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1842 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1843 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1844 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1845 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1846 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1847 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1848 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1849 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1850 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1851 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1852 1853 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1854 // In this case we use EUB 1855 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1856 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1857 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1858 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1859 // CHECK: [[PF_EUB_TRUE]]: 1860 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1861 // CHECK: br label %[[PF_EUB_END:.+]] 1862 // CHECK-DAG: [[PF_EUB_FALSE]]: 1863 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1864 // CHECK: br label %[[PF_EUB_END]] 1865 // CHECK-DAG: [[PF_EUB_END]]: 1866 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1867 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1868 1869 // initialize omp.iv 1870 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1871 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1872 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1873 1874 // check exit condition 1875 // CHECK: [[OMP_PF_JUMP_BACK]]: 1876 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1877 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1878 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1879 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1880 1881 // check that PrevLB and PrevUB are passed to the 'for' 1882 // CHECK: [[PF_BODY]]: 1883 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1884 // CHECK: br label {{.+}} 1885 1886 // check stride 1 for 'for' in 'distribute parallel for' 1887 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1888 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1889 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1890 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1891 1892 // CHECK-DAG: call void @__kmpc_for_static_fini( 1893 // CHECK: ret 1894 1895 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1896 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1897 1898 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1899 // CHECK: alloca 1900 // CHECK: alloca 1901 // CHECK: alloca 1902 // CHECK: alloca 1903 // CHECK: alloca 1904 // CHECK: alloca 1905 // CHECK: alloca 1906 // CHECK: [[OMP_IV:%.+]] = alloca 1907 // CHECK: alloca 1908 // CHECK: alloca 1909 // CHECK: alloca 1910 // CHECK: alloca 1911 // CHECK: [[OMP_LB:%.+]] = alloca 1912 // CHECK: [[OMP_UB:%.+]] = alloca 1913 // CHECK: [[OMP_ST:%.+]] = alloca 1914 1915 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1916 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1917 1918 // check EUB for distribute 1919 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1920 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} 1921 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1922 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1923 // CHECK-DAG: [[EUB_TRUE]]: 1924 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1925 // CHECK: br label %[[EUB_END:.+]] 1926 // CHECK-DAG: [[EUB_FALSE]]: 1927 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1928 // CHECK: br label %[[EUB_END]] 1929 // CHECK-DAG: [[EUB_END]]: 1930 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1931 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1932 1933 // initialize omp.iv 1934 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1935 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1936 1937 // check exit condition 1938 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1939 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} 1940 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 1941 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] 1942 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1943 1944 // check that PrevLB and PrevUB are passed to the 'for' 1945 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1946 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1947 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1948 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1949 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1950 // check that distlb and distub are properly passed to fork_call 1951 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1952 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1953 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1954 1955 // check DistInc 1956 // CHECK: [[DIST_INNER_LOOP_INC]]: 1957 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1958 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1959 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1960 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1961 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1962 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1963 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1964 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1965 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1966 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1967 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1968 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1969 1970 // Update UB 1971 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1972 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} 1973 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] 1974 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] 1975 // CHECK-DAG: [[EUB_TRUE_1]]: 1976 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} 1977 // CHECK: br label %[[EUB_END_1:.+]] 1978 // CHECK-DAG: [[EUB_FALSE_1]]: 1979 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], 1980 // CHECK: br label %[[EUB_END_1]] 1981 // CHECK-DAG: [[EUB_END_1]]: 1982 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] 1983 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], 1984 1985 // Store LB in IV 1986 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1987 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], 1988 1989 // CHECK: [[DIST_INNER_LOOP_END]]: 1990 // CHECK: br label %[[LOOP_EXIT:.+]] 1991 1992 // loop exit 1993 // CHECK: [[LOOP_EXIT]]: 1994 // CHECK-DAG: call void @__kmpc_for_static_fini( 1995 // CHECK: ret 1996 1997 // skip implementation of 'parallel for': using default scheduling and was tested above 1998 1999 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 2000 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 2001 2002 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 2003 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 2004 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 2005 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 2006 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 2007 2008 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2009 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 2010 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2011 // CHECK: ret 2012 2013 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 2014 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2015 2016 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2017 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2018 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2019 2020 // initialize lb and ub to PrevLB and PrevUB 2021 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2022 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2023 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2024 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2025 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2026 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2027 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2028 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2029 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2030 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2031 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2032 2033 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 2034 // In this case we use EUB 2035 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2036 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 2037 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 2038 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2039 // CHECK: [[PF_EUB_TRUE]]: 2040 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 2041 // CHECK: br label %[[PF_EUB_END:.+]] 2042 // CHECK-DAG: [[PF_EUB_FALSE]]: 2043 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2044 // CHECK: br label %[[PF_EUB_END]] 2045 // CHECK-DAG: [[PF_EUB_END]]: 2046 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 2047 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 2048 2049 // initialize omp.iv 2050 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2051 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2052 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 2053 2054 // check exit condition 2055 // CHECK: [[OMP_PF_JUMP_BACK]]: 2056 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 2057 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 2058 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2059 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 2060 2061 // check that PrevLB and PrevUB are passed to the 'for' 2062 // CHECK: [[PF_BODY]]: 2063 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2064 // CHECK: br label {{.+}} 2065 2066 // check stride 1 for 'for' in 'distribute parallel for' 2067 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 2068 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 2069 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 2070 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 2071 2072 // CHECK-DAG: call void @__kmpc_for_static_fini( 2073 // CHECK: ret 2074 2075 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 2076 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 2077 2078 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 2079 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2080 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 2081 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2082 // CHECK: ret 2083 2084 // 'parallel for' implementation using outer and inner loops and PrevEUB 2085 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2086 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2087 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2088 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2089 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2090 2091 // initialize lb and ub to PrevLB and PrevUB 2092 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2093 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2094 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2095 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2096 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2097 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2098 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2099 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2100 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2101 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2102 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2103 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2104 2105 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 2106 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2107 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2108 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 2109 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2110 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 2111 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 2112 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2113 // CHECK: [[PF_EUB_TRUE]]: 2114 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2115 // CHECK: br label %[[PF_EUB_END:.+]] 2116 // CHECK-DAG: [[PF_EUB_FALSE]]: 2117 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2118 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 2119 // CHECK: br label %[[PF_EUB_END]] 2120 // CHECK-DAG: [[PF_EUB_END]]: 2121 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 2122 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 2123 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 2124 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 2125 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 2126 2127 // initialize omp.iv (IV = LB) 2128 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2129 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2130 2131 // outer loop: while (IV < UB) { 2132 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2133 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2134 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2135 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2136 2137 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2138 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 2139 2140 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 2141 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2142 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2143 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2144 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2145 2146 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2147 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2148 // skip body branch 2149 // CHECK: br{{.+}} 2150 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2151 2152 // IV = IV + 1 and inner loop latch 2153 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2154 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2155 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2156 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2157 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 2158 2159 // check NextLB and NextUB 2160 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2161 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2162 2163 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2164 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2165 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2166 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 2167 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 2168 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2169 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2170 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 2171 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 2172 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2173 2174 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2175 // CHECK-DAG: call void @__kmpc_for_static_fini( 2176 // CHECK: ret 2177 2178 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 2179 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 2180 2181 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 2182 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2183 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 2184 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2185 // CHECK: ret 2186 2187 // 'parallel for' implementation using outer and inner loops and PrevEUB 2188 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2189 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2190 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2191 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2192 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2193 2194 // initialize lb and ub to PrevLB and PrevUB 2195 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2196 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2197 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2198 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2199 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2200 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2201 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2202 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2203 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2204 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2205 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2206 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2207 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2208 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2209 2210 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2211 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2212 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2213 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2214 2215 // initialize omp.iv (IV = LB) 2216 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2217 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2218 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2219 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2220 2221 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2222 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2223 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2224 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2225 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2226 2227 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2228 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2229 // skip body branch 2230 // CHECK: br{{.+}} 2231 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2232 2233 // IV = IV + 1 and inner loop latch 2234 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2235 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2236 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2237 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2238 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2239 2240 // check NextLB and NextUB 2241 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2242 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2243 2244 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2245 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2246 2247 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2248 // CHECK: ret 2249 2250 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 2251 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 2252 2253 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 2254 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2255 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 2256 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2257 // CHECK: ret 2258 2259 // 'parallel for' implementation using outer and inner loops and PrevEUB 2260 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2261 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2262 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2263 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2264 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2265 2266 // initialize lb and ub to PrevLB and PrevUB 2267 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2268 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2269 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2270 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2271 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2272 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2273 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2274 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2275 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2276 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2277 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2278 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2279 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2280 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2281 2282 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2283 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2284 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2285 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2286 2287 // initialize omp.iv (IV = LB) 2288 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2289 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2290 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2291 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2292 2293 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2294 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2295 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2296 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2297 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2298 2299 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2300 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2301 // skip body branch 2302 // CHECK: br{{.+}} 2303 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2304 2305 // IV = IV + 1 and inner loop latch 2306 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2307 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2308 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2309 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2310 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2311 2312 // check NextLB and NextUB 2313 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2314 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2315 2316 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2317 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2318 2319 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2320 // CHECK: ret 2321 #endif 2322