1 // Test host code gen 2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 8 9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 16 17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 23 24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} 31 // expected-no-diagnostics 32 #ifndef HEADER 33 #define HEADER 34 35 36 template <typename T> 37 T tmain() { 38 T *a, *b, *c; 39 int n = 10000; 40 int ch = 100; 41 42 // no schedule clauses 43 #pragma omp target 44 #pragma omp teams 45 #pragma omp distribute parallel for simd 46 for (int i = 0; i < n; ++i) { 47 a[i] = b[i] + c[i]; 48 } 49 50 // dist_schedule: static no chunk 51 #pragma omp target 52 #pragma omp teams 53 #pragma omp distribute parallel for simd dist_schedule(static) 54 for (int i = 0; i < n; ++i) { 55 a[i] = b[i] + c[i]; 56 } 57 58 // dist_schedule: static chunk 59 #pragma omp target 60 #pragma omp teams 61 #pragma omp distribute parallel for simd dist_schedule(static, ch) 62 for (int i = 0; i < n; ++i) { 63 a[i] = b[i] + c[i]; 64 } 65 66 // schedule: static no chunk 67 #pragma omp target 68 #pragma omp teams 69 #pragma omp distribute parallel for simd schedule(static) 70 for (int i = 0; i < n; ++i) { 71 a[i] = b[i] + c[i]; 72 } 73 74 // schedule: static chunk 75 #pragma omp target 76 #pragma omp teams 77 #pragma omp distribute parallel for simd schedule(static, ch) 78 for (int i = 0; i < n; ++i) { 79 a[i] = b[i] + c[i]; 80 } 81 82 // schedule: dynamic no chunk 83 #pragma omp target 84 #pragma omp teams 85 #pragma omp distribute parallel for simd schedule(dynamic) 86 for (int i = 0; i < n; ++i) { 87 a[i] = b[i] + c[i]; 88 } 89 90 // schedule: dynamic chunk 91 #pragma omp target 92 #pragma omp teams 93 #pragma omp distribute parallel for simd schedule(dynamic, ch) 94 for (int i = 0; i < n; ++i) { 95 a[i] = b[i] + c[i]; 96 } 97 98 return T(); 99 } 100 101 int main() { 102 double *a, *b, *c; 103 int n = 10000; 104 int ch = 100; 105 106 #ifdef LAMBDA 107 // LAMBDA-LABEL: @main 108 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( 109 [&]() { 110 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( 111 112 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 113 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( 114 115 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 116 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( 117 118 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 119 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( 120 121 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 122 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( 123 124 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 125 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( 126 127 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 128 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( 129 130 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper( 131 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( 132 133 // no schedule clauses 134 #pragma omp target 135 #pragma omp teams 136 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( 137 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 138 139 #pragma omp distribute parallel for simd 140 for (int i = 0; i < n; ++i) { 141 a[i] = b[i] + c[i]; 142 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( 143 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 144 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 145 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 146 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 147 148 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 149 150 // check EUB for distribute 151 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 152 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 153 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 154 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 155 // LAMBDA-DAG: [[EUB_TRUE]]: 156 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 157 // LAMBDA: br label %[[EUB_END:.+]] 158 // LAMBDA-DAG: [[EUB_FALSE]]: 159 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 160 // LAMBDA: br label %[[EUB_END]] 161 // LAMBDA-DAG: [[EUB_END]]: 162 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 163 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 164 165 // initialize omp.iv 166 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 167 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 168 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 169 170 // check exit condition 171 // LAMBDA: [[OMP_JUMP_BACK]]: 172 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 173 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 174 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 175 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 176 177 // check that PrevLB and PrevUB are passed to the 'for' 178 // LAMBDA: [[DIST_BODY]]: 179 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 180 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 181 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 182 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 183 // check that distlb and distub are properly passed to fork_call 184 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 185 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 186 // LAMBDA: br label %[[DIST_INC:.+]] 187 188 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 189 // LAMBDA: [[DIST_INC]]: 190 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 191 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 192 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 193 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 194 // LAMBDA: br label %[[OMP_JUMP_BACK]] 195 196 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 197 // LAMBDA: ret 198 199 // implementation of 'parallel for' 200 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 201 202 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 203 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 204 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 205 206 // initialize lb and ub to PrevLB and PrevUB 207 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 209 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 210 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 211 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 212 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 213 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 214 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 215 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 216 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 217 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 218 219 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 220 // In this case we use EUB 221 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 222 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 223 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 224 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 225 // LAMBDA: [[PF_EUB_TRUE]]: 226 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 227 // LAMBDA: br label %[[PF_EUB_END:.+]] 228 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 229 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 230 // LAMBDA: br label %[[PF_EUB_END]] 231 // LAMBDA-DAG: [[PF_EUB_END]]: 232 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 233 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 234 235 // initialize omp.iv 236 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 237 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 238 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 239 240 // check exit condition 241 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 242 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 243 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 244 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 245 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 246 247 // check that PrevLB and PrevUB are passed to the 'for' 248 // LAMBDA: [[PF_BODY]]: 249 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 250 // LAMBDA: br label {{.+}} 251 252 // check stride 1 for 'for' in 'distribute parallel for simd' 253 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 254 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 255 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 256 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 257 258 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 259 // LAMBDA: ret 260 261 [&]() { 262 a[i] = b[i] + c[i]; 263 }(); 264 } 265 266 // dist_schedule: static no chunk (same sa default - no dist_schedule) 267 #pragma omp target 268 #pragma omp teams 269 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( 270 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 271 272 #pragma omp distribute parallel for simd dist_schedule(static) 273 for (int i = 0; i < n; ++i) { 274 a[i] = b[i] + c[i]; 275 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( 276 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 277 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 278 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 279 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 280 281 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 282 283 // check EUB for distribute 284 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 285 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 286 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 287 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 288 // LAMBDA-DAG: [[EUB_TRUE]]: 289 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 290 // LAMBDA: br label %[[EUB_END:.+]] 291 // LAMBDA-DAG: [[EUB_FALSE]]: 292 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 293 // LAMBDA: br label %[[EUB_END]] 294 // LAMBDA-DAG: [[EUB_END]]: 295 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 296 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 297 298 // initialize omp.iv 299 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 300 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 301 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 302 303 // check exit condition 304 // LAMBDA: [[OMP_JUMP_BACK]]: 305 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 306 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 307 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 308 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 309 310 // check that PrevLB and PrevUB are passed to the 'for' 311 // LAMBDA: [[DIST_BODY]]: 312 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 313 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 314 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 315 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 316 // check that distlb and distub are properly passed to fork_call 317 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 318 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 319 // LAMBDA: br label %[[DIST_INC:.+]] 320 321 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 322 // LAMBDA: [[DIST_INC]]: 323 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 324 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 325 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 326 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 327 // LAMBDA: br label %[[OMP_JUMP_BACK]] 328 329 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 330 // LAMBDA: ret 331 332 // implementation of 'parallel for' 333 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 334 335 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 336 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 337 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 338 339 // initialize lb and ub to PrevLB and PrevUB 340 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 342 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 343 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 344 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 345 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 346 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 347 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 348 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 349 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 350 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 351 352 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 353 // In this case we use EUB 354 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 355 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 356 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 357 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 358 // LAMBDA: [[PF_EUB_TRUE]]: 359 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 360 // LAMBDA: br label %[[PF_EUB_END:.+]] 361 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 362 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 363 // LAMBDA: br label %[[PF_EUB_END]] 364 // LAMBDA-DAG: [[PF_EUB_END]]: 365 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 366 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 367 368 // initialize omp.iv 369 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 370 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 371 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 372 373 // check exit condition 374 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 375 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 376 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 377 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 378 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 379 380 // check that PrevLB and PrevUB are passed to the 'for' 381 // LAMBDA: [[PF_BODY]]: 382 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 383 // LAMBDA: br label {{.+}} 384 385 // check stride 1 for 'for' in 'distribute parallel for simd' 386 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 387 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 388 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 389 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 390 391 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 392 // LAMBDA: ret 393 [&]() { 394 a[i] = b[i] + c[i]; 395 }(); 396 } 397 398 // dist_schedule: static chunk 399 #pragma omp target 400 #pragma omp teams 401 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( 402 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 403 404 #pragma omp distribute parallel for simd dist_schedule(static, ch) 405 for (int i = 0; i < n; ++i) { 406 a[i] = b[i] + c[i]; 407 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( 408 // LAMBDA: alloca 409 // LAMBDA: alloca 410 // LAMBDA: alloca 411 // LAMBDA: alloca 412 // LAMBDA: alloca 413 // LAMBDA: alloca 414 // LAMBDA: alloca 415 // LAMBDA: [[OMP_IV:%.+]] = alloca 416 // LAMBDA: alloca 417 // LAMBDA: alloca 418 // LAMBDA: alloca 419 // LAMBDA: alloca 420 // LAMBDA: [[OMP_LB:%.+]] = alloca 421 // LAMBDA: [[OMP_UB:%.+]] = alloca 422 // LAMBDA: [[OMP_ST:%.+]] = alloca 423 424 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 425 426 // check EUB for distribute 427 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 428 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} 429 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 430 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 431 // LAMBDA-DAG: [[EUB_TRUE]]: 432 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 433 // LAMBDA: br label %[[EUB_END:.+]] 434 // LAMBDA-DAG: [[EUB_FALSE]]: 435 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 436 // LAMBDA: br label %[[EUB_END]] 437 // LAMBDA-DAG: [[EUB_END]]: 438 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 439 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 440 441 // initialize omp.iv 442 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 443 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 444 445 // check exit condition 446 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 447 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} 448 // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 449 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] 450 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 451 452 // check that PrevLB and PrevUB are passed to the 'for' 453 // LAMBDA: [[DIST_INNER_LOOP_BODY]]: 454 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 455 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 456 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 457 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 458 // check that distlb and distub are properly passed to fork_call 459 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 460 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 461 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] 462 463 // check DistInc 464 // LAMBDA: [[DIST_INNER_LOOP_INC]]: 465 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 466 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 467 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 468 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 469 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 470 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 471 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 472 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 473 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 474 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 475 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 476 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 477 478 // Update UB 479 // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 480 // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} 481 // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] 482 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] 483 // LAMBDA-DAG: [[EUB_TRUE_1]]: 484 // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} 485 // LAMBDA: br label %[[EUB_END_1:.+]] 486 // LAMBDA-DAG: [[EUB_FALSE_1]]: 487 // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], 488 // LAMBDA: br label %[[EUB_END_1]] 489 // LAMBDA-DAG: [[EUB_END_1]]: 490 // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] 491 // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], 492 493 // Store LB in IV 494 // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 495 // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], 496 497 // LAMBDA: [[DIST_INNER_LOOP_END]]: 498 // LAMBDA: br label %[[LOOP_EXIT:.+]] 499 500 // loop exit 501 // LAMBDA: [[LOOP_EXIT]]: 502 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 503 // LAMBDA: ret 504 505 // skip implementation of 'parallel for': using default scheduling and was tested above 506 [&]() { 507 a[i] = b[i] + c[i]; 508 }(); 509 } 510 511 // schedule: static no chunk 512 #pragma omp target 513 #pragma omp teams 514 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( 515 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 516 517 #pragma omp distribute parallel for simd schedule(static) 518 for (int i = 0; i < n; ++i) { 519 a[i] = b[i] + c[i]; 520 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( 521 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 522 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 523 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 524 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 525 526 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 527 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 528 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 529 // LAMBDA: ret 530 531 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 532 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 533 534 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 535 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 536 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 537 538 // initialize lb and ub to PrevLB and PrevUB 539 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 540 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 541 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 542 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 543 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 544 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 545 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 546 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 547 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 548 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 549 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 550 551 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 552 // In this case we use EUB 553 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 554 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 555 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 556 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 557 // LAMBDA: [[PF_EUB_TRUE]]: 558 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 559 // LAMBDA: br label %[[PF_EUB_END:.+]] 560 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 561 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 562 // LAMBDA: br label %[[PF_EUB_END]] 563 // LAMBDA-DAG: [[PF_EUB_END]]: 564 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 565 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 566 567 // initialize omp.iv 568 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 569 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 570 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 571 572 // check exit condition 573 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 574 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 575 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 576 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 577 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 578 579 // check that PrevLB and PrevUB are passed to the 'for' 580 // LAMBDA: [[PF_BODY]]: 581 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 582 // LAMBDA: br label {{.+}} 583 584 // check stride 1 for 'for' in 'distribute parallel for simd' 585 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 586 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 587 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 588 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 589 590 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 591 // LAMBDA: ret 592 593 [&]() { 594 a[i] = b[i] + c[i]; 595 }(); 596 } 597 598 // schedule: static chunk 599 #pragma omp target 600 #pragma omp teams 601 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( 602 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 603 604 #pragma omp distribute parallel for simd schedule(static, ch) 605 for (int i = 0; i < n; ++i) { 606 a[i] = b[i] + c[i]; 607 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( 608 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 609 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 610 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 611 // LAMBDA: ret 612 613 // 'parallel for' implementation using outer and inner loops and PrevEUB 614 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 615 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 616 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 617 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 618 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 619 620 // initialize lb and ub to PrevLB and PrevUB 621 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 622 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 623 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 624 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 625 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 626 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 627 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 628 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 629 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 630 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 631 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 632 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 633 634 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 635 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 636 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 637 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 638 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 639 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 640 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 641 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 642 // LAMBDA: [[PF_EUB_TRUE]]: 643 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 644 // LAMBDA: br label %[[PF_EUB_END:.+]] 645 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 646 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 647 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 648 // LAMBDA: br label %[[PF_EUB_END]] 649 // LAMBDA-DAG: [[PF_EUB_END]]: 650 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 651 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 652 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 653 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 654 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 655 656 // initialize omp.iv (IV = LB) 657 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 658 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 659 660 // outer loop: while (IV < UB) { 661 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 662 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 663 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 664 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 665 666 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 667 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 668 669 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: 670 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 671 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 672 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 673 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 674 675 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 676 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 677 // skip body branch 678 // LAMBDA: br{{.+}} 679 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 680 681 // IV = IV + 1 and inner loop latch 682 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 683 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 684 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 685 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 686 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] 687 688 // check NextLB and NextUB 689 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 690 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 691 692 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 693 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 694 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 695 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 696 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 697 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 698 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 699 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 700 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 701 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 702 703 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 704 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 705 // LAMBDA: ret 706 [&]() { 707 a[i] = b[i] + c[i]; 708 }(); 709 } 710 711 // schedule: dynamic no chunk 712 #pragma omp target 713 #pragma omp teams 714 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( 715 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 716 717 #pragma omp distribute parallel for simd schedule(dynamic) 718 for (int i = 0; i < n; ++i) { 719 a[i] = b[i] + c[i]; 720 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( 721 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 722 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 723 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 724 // LAMBDA: ret 725 726 // 'parallel for' implementation using outer and inner loops and PrevEUB 727 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 728 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 729 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 730 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 731 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 732 733 // initialize lb and ub to PrevLB and PrevUB 734 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 735 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 736 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 737 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 738 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 739 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 740 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 741 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 742 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 743 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 744 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 745 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 746 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 747 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 748 749 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 750 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 751 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 752 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 753 754 // initialize omp.iv (IV = LB) 755 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 756 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 757 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 758 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 759 760 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 761 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 762 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 763 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 764 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 765 766 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 767 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 768 // skip body branch 769 // LAMBDA: br{{.+}} 770 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 771 772 // IV = IV + 1 and inner loop latch 773 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 774 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 775 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 776 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 777 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 778 779 // check NextLB and NextUB 780 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 781 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 782 783 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 784 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 785 786 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 787 // LAMBDA: ret 788 [&]() { 789 a[i] = b[i] + c[i]; 790 }(); 791 } 792 793 // schedule: dynamic chunk 794 #pragma omp target 795 #pragma omp teams 796 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( 797 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 798 799 #pragma omp distribute parallel for simd schedule(dynamic, ch) 800 for (int i = 0; i < n; ++i) { 801 a[i] = b[i] + c[i]; 802 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( 803 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 804 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 805 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 806 // LAMBDA: ret 807 808 // 'parallel for' implementation using outer and inner loops and PrevEUB 809 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 810 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 811 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 812 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 813 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 814 815 // initialize lb and ub to PrevLB and PrevUB 816 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 817 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 818 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 819 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 820 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 821 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 822 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 823 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 824 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 825 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 826 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 827 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 828 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 829 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 830 831 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 832 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 833 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 834 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 835 836 // initialize omp.iv (IV = LB) 837 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 838 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 839 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 840 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 841 842 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 843 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 844 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 845 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 846 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 847 848 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 849 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 850 // skip body branch 851 // LAMBDA: br{{.+}} 852 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 853 854 // IV = IV + 1 and inner loop latch 855 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 856 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 857 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 858 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 859 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 860 861 // check NextLB and NextUB 862 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 863 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 864 865 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 866 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 867 868 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 869 // LAMBDA: ret 870 [&]() { 871 a[i] = b[i] + c[i]; 872 }(); 873 } 874 }(); 875 return 0; 876 #else 877 // CHECK-LABEL: @main 878 879 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 880 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 881 882 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 883 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 884 885 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 886 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 887 888 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 889 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 890 891 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 892 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 893 894 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 895 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 896 897 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 898 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 899 900 // CHECK: call{{.+}} [[TMAIN:@.+]]() 901 902 // no schedule clauses 903 #pragma omp target 904 #pragma omp teams 905 // CHECK: define internal void [[OFFLOADING_FUN_1]]( 906 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 907 908 #pragma omp distribute parallel for simd 909 for (int i = 0; i < n; ++i) { 910 a[i] = b[i] + c[i]; 911 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 912 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 913 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 914 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 915 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 916 917 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 918 919 // check EUB for distribute 920 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 921 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 922 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 923 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 924 // CHECK-DAG: [[EUB_TRUE]]: 925 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 926 // CHECK: br label %[[EUB_END:.+]] 927 // CHECK-DAG: [[EUB_FALSE]]: 928 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 929 // CHECK: br label %[[EUB_END]] 930 // CHECK-DAG: [[EUB_END]]: 931 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 932 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 933 934 // initialize omp.iv 935 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 936 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 937 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 938 939 // check exit condition 940 // CHECK: [[OMP_JUMP_BACK]]: 941 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 942 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 943 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 944 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 945 946 // check that PrevLB and PrevUB are passed to the 'for' 947 // CHECK: [[DIST_BODY]]: 948 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 949 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 950 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 951 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 952 // check that distlb and distub are properly passed to fork_call 953 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 954 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 955 // CHECK: br label %[[DIST_INC:.+]] 956 957 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 958 // CHECK: [[DIST_INC]]: 959 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 960 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 961 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 962 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 963 // CHECK: br label %[[OMP_JUMP_BACK]] 964 965 // CHECK-DAG: call void @__kmpc_for_static_fini( 966 // CHECK: ret 967 968 // implementation of 'parallel for' 969 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 970 971 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 972 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 973 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 974 975 // initialize lb and ub to PrevLB and PrevUB 976 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 977 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 978 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 979 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 980 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 981 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 982 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 983 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 984 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 985 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 986 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 987 988 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 989 // In this case we use EUB 990 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 991 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 992 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 993 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 994 // CHECK: [[PF_EUB_TRUE]]: 995 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 996 // CHECK: br label %[[PF_EUB_END:.+]] 997 // CHECK-DAG: [[PF_EUB_FALSE]]: 998 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 999 // CHECK: br label %[[PF_EUB_END]] 1000 // CHECK-DAG: [[PF_EUB_END]]: 1001 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1002 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1003 1004 // initialize omp.iv 1005 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1006 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1007 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1008 1009 // check exit condition 1010 // CHECK: [[OMP_PF_JUMP_BACK]]: 1011 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1012 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1013 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1014 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1015 1016 // check that PrevLB and PrevUB are passed to the 'for' 1017 // CHECK: [[PF_BODY]]: 1018 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1019 // CHECK: br label {{.+}} 1020 1021 // check stride 1 for 'for' in 'distribute parallel for simd' 1022 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1023 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1024 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1025 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1026 1027 // CHECK-DAG: call void @__kmpc_for_static_fini( 1028 // CHECK: ret 1029 } 1030 1031 // dist_schedule: static no chunk 1032 #pragma omp target 1033 #pragma omp teams 1034 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1035 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1036 1037 #pragma omp distribute parallel for simd dist_schedule(static) 1038 for (int i = 0; i < n; ++i) { 1039 a[i] = b[i] + c[i]; 1040 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1041 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1042 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1043 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1044 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1045 1046 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1047 1048 // check EUB for distribute 1049 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1050 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1051 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1052 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1053 // CHECK-DAG: [[EUB_TRUE]]: 1054 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1055 // CHECK: br label %[[EUB_END:.+]] 1056 // CHECK-DAG: [[EUB_FALSE]]: 1057 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1058 // CHECK: br label %[[EUB_END]] 1059 // CHECK-DAG: [[EUB_END]]: 1060 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1061 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1062 1063 // initialize omp.iv 1064 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1065 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1066 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1067 1068 // check exit condition 1069 // CHECK: [[OMP_JUMP_BACK]]: 1070 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1071 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1072 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1073 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1074 1075 // check that PrevLB and PrevUB are passed to the 'for' 1076 // CHECK: [[DIST_BODY]]: 1077 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1078 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1079 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1080 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1081 // check that distlb and distub are properly passed to fork_call 1082 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1083 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1084 // CHECK: br label %[[DIST_INC:.+]] 1085 1086 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1087 // CHECK: [[DIST_INC]]: 1088 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1089 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1090 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1091 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1092 // CHECK: br label %[[OMP_JUMP_BACK]] 1093 1094 // CHECK-DAG: call void @__kmpc_for_static_fini( 1095 // CHECK: ret 1096 1097 // implementation of 'parallel for' 1098 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1099 1100 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1101 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1102 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1103 1104 // initialize lb and ub to PrevLB and PrevUB 1105 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1106 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1107 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1108 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1109 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1110 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1111 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1112 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1113 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1114 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1115 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1116 1117 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1118 // In this case we use EUB 1119 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1120 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1121 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1122 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1123 // CHECK: [[PF_EUB_TRUE]]: 1124 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1125 // CHECK: br label %[[PF_EUB_END:.+]] 1126 // CHECK-DAG: [[PF_EUB_FALSE]]: 1127 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1128 // CHECK: br label %[[PF_EUB_END]] 1129 // CHECK-DAG: [[PF_EUB_END]]: 1130 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1131 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1132 1133 // initialize omp.iv 1134 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1135 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1136 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1137 1138 // check exit condition 1139 // CHECK: [[OMP_PF_JUMP_BACK]]: 1140 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1141 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1142 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1143 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1144 1145 // check that PrevLB and PrevUB are passed to the 'for' 1146 // CHECK: [[PF_BODY]]: 1147 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1148 // CHECK: br label {{.+}} 1149 1150 // check stride 1 for 'for' in 'distribute parallel for simd' 1151 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1152 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1153 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1154 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1155 1156 // CHECK-DAG: call void @__kmpc_for_static_fini( 1157 // CHECK: ret 1158 } 1159 1160 // dist_schedule: static chunk 1161 #pragma omp target 1162 #pragma omp teams 1163 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1164 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1165 1166 #pragma omp distribute parallel for simd dist_schedule(static, ch) 1167 for (int i = 0; i < n; ++i) { 1168 a[i] = b[i] + c[i]; 1169 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1170 // CHECK: alloca 1171 // CHECK: alloca 1172 // CHECK: alloca 1173 // CHECK: alloca 1174 // CHECK: alloca 1175 // CHECK: alloca 1176 // CHECK: alloca 1177 // CHECK: [[OMP_IV:%.+]] = alloca 1178 // CHECK: alloca 1179 // CHECK: alloca 1180 // CHECK: alloca 1181 // CHECK: alloca 1182 // CHECK: [[OMP_LB:%.+]] = alloca 1183 // CHECK: [[OMP_UB:%.+]] = alloca 1184 // CHECK: [[OMP_ST:%.+]] = alloca 1185 1186 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1187 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1188 1189 // check EUB for distribute 1190 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1191 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} 1192 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1193 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1194 // CHECK-DAG: [[EUB_TRUE]]: 1195 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1196 // CHECK: br label %[[EUB_END:.+]] 1197 // CHECK-DAG: [[EUB_FALSE]]: 1198 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1199 // CHECK: br label %[[EUB_END]] 1200 // CHECK-DAG: [[EUB_END]]: 1201 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1202 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1203 1204 // initialize omp.iv 1205 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1206 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1207 1208 // check exit condition 1209 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1210 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} 1211 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 1212 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] 1213 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1214 1215 // check that PrevLB and PrevUB are passed to the 'for' 1216 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1217 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1218 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1219 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1220 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1221 // check that distlb and distub are properly passed to fork_call 1222 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1223 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1224 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1225 1226 // check DistInc 1227 // CHECK: [[DIST_INNER_LOOP_INC]]: 1228 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1229 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1230 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1231 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1232 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1233 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1234 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1235 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1236 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1237 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1238 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1239 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1240 1241 // Update UB 1242 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1243 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} 1244 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] 1245 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] 1246 // CHECK-DAG: [[EUB_TRUE_1]]: 1247 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} 1248 // CHECK: br label %[[EUB_END_1:.+]] 1249 // CHECK-DAG: [[EUB_FALSE_1]]: 1250 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], 1251 // CHECK: br label %[[EUB_END_1]] 1252 // CHECK-DAG: [[EUB_END_1]]: 1253 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] 1254 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], 1255 1256 // Store LB in IV 1257 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1258 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], 1259 1260 // CHECK: [[DIST_INNER_LOOP_END]]: 1261 // CHECK: br label %[[LOOP_EXIT:.+]] 1262 1263 // loop exit 1264 // CHECK: [[LOOP_EXIT]]: 1265 // CHECK-DAG: call void @__kmpc_for_static_fini( 1266 // CHECK: ret 1267 1268 // skip implementation of 'parallel for': using default scheduling and was tested above 1269 } 1270 1271 // schedule: static no chunk 1272 #pragma omp target 1273 #pragma omp teams 1274 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1275 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1276 1277 #pragma omp distribute parallel for simd schedule(static) 1278 for (int i = 0; i < n; ++i) { 1279 a[i] = b[i] + c[i]; 1280 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1281 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1282 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1283 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1284 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1285 1286 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1287 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1288 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1289 // CHECK: ret 1290 1291 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1292 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1293 1294 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1295 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1296 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1297 1298 // initialize lb and ub to PrevLB and PrevUB 1299 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1300 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1301 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1302 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1303 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1304 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1305 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1306 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1307 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1308 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1309 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1310 1311 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1312 // In this case we use EUB 1313 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1314 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1315 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1316 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1317 // CHECK: [[PF_EUB_TRUE]]: 1318 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1319 // CHECK: br label %[[PF_EUB_END:.+]] 1320 // CHECK-DAG: [[PF_EUB_FALSE]]: 1321 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1322 // CHECK: br label %[[PF_EUB_END]] 1323 // CHECK-DAG: [[PF_EUB_END]]: 1324 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1325 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1326 1327 // initialize omp.iv 1328 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1329 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1330 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1331 1332 // check exit condition 1333 // CHECK: [[OMP_PF_JUMP_BACK]]: 1334 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1335 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1336 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1337 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1338 1339 // check that PrevLB and PrevUB are passed to the 'for' 1340 // CHECK: [[PF_BODY]]: 1341 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1342 // CHECK: br label {{.+}} 1343 1344 // check stride 1 for 'for' in 'distribute parallel for simd' 1345 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1346 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1347 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1348 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1349 1350 // CHECK-DAG: call void @__kmpc_for_static_fini( 1351 // CHECK: ret 1352 } 1353 1354 // schedule: static chunk 1355 #pragma omp target 1356 #pragma omp teams 1357 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 1358 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 1359 1360 #pragma omp distribute parallel for simd schedule(static, ch) 1361 for (int i = 0; i < n; ++i) { 1362 a[i] = b[i] + c[i]; 1363 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 1364 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1365 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 1366 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1367 // CHECK: ret 1368 1369 // 'parallel for' implementation using outer and inner loops and PrevEUB 1370 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1371 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1372 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1373 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1374 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1375 1376 // initialize lb and ub to PrevLB and PrevUB 1377 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1378 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1379 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1380 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1381 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1382 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1383 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1384 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1385 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1386 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1387 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1388 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1389 1390 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 1391 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1392 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1393 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 1394 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1395 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 1396 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 1397 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1398 // CHECK: [[PF_EUB_TRUE]]: 1399 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1400 // CHECK: br label %[[PF_EUB_END:.+]] 1401 // CHECK-DAG: [[PF_EUB_FALSE]]: 1402 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1403 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 1404 // CHECK: br label %[[PF_EUB_END]] 1405 // CHECK-DAG: [[PF_EUB_END]]: 1406 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 1407 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 1408 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 1409 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 1410 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 1411 1412 // initialize omp.iv (IV = LB) 1413 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1414 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1415 1416 // outer loop: while (IV < UB) { 1417 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1418 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1419 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1420 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1421 1422 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1423 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 1424 1425 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 1426 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1427 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1428 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1429 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1430 1431 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1432 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1433 // skip body branch 1434 // CHECK: br{{.+}} 1435 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1436 1437 // IV = IV + 1 and inner loop latch 1438 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1439 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1440 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1441 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1442 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 1443 1444 // check NextLB and NextUB 1445 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1446 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1447 1448 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1449 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1450 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1451 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 1452 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 1453 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1454 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1455 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 1456 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 1457 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1458 1459 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1460 // CHECK-DAG: call void @__kmpc_for_static_fini( 1461 // CHECK: ret 1462 } 1463 1464 // schedule: dynamic no chunk 1465 #pragma omp target 1466 #pragma omp teams 1467 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 1468 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 1469 1470 #pragma omp distribute parallel for simd schedule(dynamic) 1471 for (int i = 0; i < n; ++i) { 1472 a[i] = b[i] + c[i]; 1473 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 1474 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1475 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 1476 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1477 // CHECK: ret 1478 1479 // 'parallel for' implementation using outer and inner loops and PrevEUB 1480 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1481 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1482 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1483 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1484 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1485 1486 // initialize lb and ub to PrevLB and PrevUB 1487 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1488 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1489 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1490 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1491 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1492 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1493 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1494 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1495 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1496 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1497 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1498 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1499 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1500 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1501 1502 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1503 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1504 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1505 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1506 1507 // initialize omp.iv (IV = LB) 1508 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1509 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1510 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1511 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1512 1513 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1514 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1515 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1516 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1517 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1518 1519 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1520 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1521 // skip body branch 1522 // CHECK: br{{.+}} 1523 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1524 1525 // IV = IV + 1 and inner loop latch 1526 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1527 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1528 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1529 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1530 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1531 1532 // check NextLB and NextUB 1533 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1534 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1535 1536 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1537 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1538 1539 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1540 // CHECK: ret 1541 } 1542 1543 // schedule: dynamic chunk 1544 #pragma omp target 1545 #pragma omp teams 1546 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 1547 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 1548 1549 #pragma omp distribute parallel for simd schedule(dynamic, ch) 1550 for (int i = 0; i < n; ++i) { 1551 a[i] = b[i] + c[i]; 1552 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 1553 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1554 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 1555 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1556 // CHECK: ret 1557 1558 // 'parallel for' implementation using outer and inner loops and PrevEUB 1559 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1560 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1561 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1562 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1563 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1564 1565 // initialize lb and ub to PrevLB and PrevUB 1566 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1567 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1568 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1569 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1570 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1571 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1572 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1573 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1574 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1575 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1576 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1577 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1578 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1579 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1580 1581 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1582 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1583 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1584 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1585 1586 // initialize omp.iv (IV = LB) 1587 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1588 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1589 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1590 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1591 1592 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1593 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1594 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1595 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1596 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1597 1598 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1599 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1600 // skip body branch 1601 // CHECK: br{{.+}} 1602 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1603 1604 // IV = IV + 1 and inner loop latch 1605 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1606 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1607 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1608 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1609 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1610 1611 // check NextLB and NextUB 1612 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1613 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1614 1615 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1616 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1617 1618 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1619 // CHECK: ret 1620 } 1621 1622 return tmain<int>(); 1623 #endif 1624 } 1625 1626 // check code 1627 // CHECK: define{{.+}} [[TMAIN]]() 1628 1629 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1630 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 1631 1632 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1633 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 1634 1635 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1636 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 1637 1638 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1639 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 1640 1641 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1642 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 1643 1644 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1645 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 1646 1647 // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper( 1648 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 1649 1650 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( 1651 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 1652 1653 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 1654 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1655 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1656 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1657 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1658 1659 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1660 1661 // check EUB for distribute 1662 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1663 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1664 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1665 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1666 // CHECK-DAG: [[EUB_TRUE]]: 1667 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1668 // CHECK: br label %[[EUB_END:.+]] 1669 // CHECK-DAG: [[EUB_FALSE]]: 1670 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1671 // CHECK: br label %[[EUB_END]] 1672 // CHECK-DAG: [[EUB_END]]: 1673 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1674 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1675 1676 // initialize omp.iv 1677 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1678 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1679 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1680 1681 // check exit condition 1682 // CHECK: [[OMP_JUMP_BACK]]: 1683 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1684 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1685 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1686 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1687 1688 // check that PrevLB and PrevUB are passed to the 'for' 1689 // CHECK: [[DIST_BODY]]: 1690 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1691 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1692 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1693 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1694 // check that distlb and distub are properly passed to fork_call 1695 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1696 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1697 // CHECK: br label %[[DIST_INC:.+]] 1698 1699 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1700 // CHECK: [[DIST_INC]]: 1701 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1702 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1703 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1704 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1705 // CHECK: br label %[[OMP_JUMP_BACK]] 1706 1707 // CHECK-DAG: call void @__kmpc_for_static_fini( 1708 // CHECK: ret 1709 1710 // implementation of 'parallel for' 1711 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1712 1713 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1714 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1715 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1716 1717 // initialize lb and ub to PrevLB and PrevUB 1718 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1719 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1720 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1721 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1722 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1723 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1724 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1725 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1726 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1727 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1728 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1729 1730 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1731 // In this case we use EUB 1732 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1733 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1734 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1735 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1736 // CHECK: [[PF_EUB_TRUE]]: 1737 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1738 // CHECK: br label %[[PF_EUB_END:.+]] 1739 // CHECK-DAG: [[PF_EUB_FALSE]]: 1740 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1741 // CHECK: br label %[[PF_EUB_END]] 1742 // CHECK-DAG: [[PF_EUB_END]]: 1743 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1744 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1745 1746 // initialize omp.iv 1747 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1748 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1749 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1750 1751 // check exit condition 1752 // CHECK: [[OMP_PF_JUMP_BACK]]: 1753 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1754 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1755 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1756 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1757 1758 // check that PrevLB and PrevUB are passed to the 'for' 1759 // CHECK: [[PF_BODY]]: 1760 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1761 // CHECK: br label {{.+}} 1762 1763 // check stride 1 for 'for' in 'distribute parallel for simd' 1764 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1765 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1766 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1767 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1768 1769 // CHECK-DAG: call void @__kmpc_for_static_fini( 1770 // CHECK: ret 1771 1772 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1773 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1774 1775 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1776 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1777 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1778 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1779 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1780 1781 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1782 1783 // check EUB for distribute 1784 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1785 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1786 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1787 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1788 // CHECK-DAG: [[EUB_TRUE]]: 1789 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1790 // CHECK: br label %[[EUB_END:.+]] 1791 // CHECK-DAG: [[EUB_FALSE]]: 1792 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1793 // CHECK: br label %[[EUB_END]] 1794 // CHECK-DAG: [[EUB_END]]: 1795 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1796 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1797 1798 // initialize omp.iv 1799 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1800 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1801 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1802 1803 // check exit condition 1804 // CHECK: [[OMP_JUMP_BACK]]: 1805 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1806 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1807 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1808 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1809 1810 // check that PrevLB and PrevUB are passed to the 'for' 1811 // CHECK: [[DIST_BODY]]: 1812 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1813 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1814 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1815 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1816 // check that distlb and distub are properly passed to fork_call 1817 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1818 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1819 // CHECK: br label %[[DIST_INC:.+]] 1820 1821 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1822 // CHECK: [[DIST_INC]]: 1823 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1824 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1825 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1826 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1827 // CHECK: br label %[[OMP_JUMP_BACK]] 1828 1829 // CHECK-DAG: call void @__kmpc_for_static_fini( 1830 // CHECK: ret 1831 1832 // implementation of 'parallel for' 1833 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1834 1835 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1836 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1837 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1838 1839 // initialize lb and ub to PrevLB and PrevUB 1840 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1841 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1842 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1843 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1844 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1845 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1846 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1847 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1848 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1849 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1850 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1851 1852 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1853 // In this case we use EUB 1854 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1855 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1856 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1857 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1858 // CHECK: [[PF_EUB_TRUE]]: 1859 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1860 // CHECK: br label %[[PF_EUB_END:.+]] 1861 // CHECK-DAG: [[PF_EUB_FALSE]]: 1862 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1863 // CHECK: br label %[[PF_EUB_END]] 1864 // CHECK-DAG: [[PF_EUB_END]]: 1865 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1866 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1867 1868 // initialize omp.iv 1869 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1870 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1871 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1872 1873 // check exit condition 1874 // CHECK: [[OMP_PF_JUMP_BACK]]: 1875 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1876 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1877 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1878 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1879 1880 // check that PrevLB and PrevUB are passed to the 'for' 1881 // CHECK: [[PF_BODY]]: 1882 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1883 // CHECK: br label {{.+}} 1884 1885 // check stride 1 for 'for' in 'distribute parallel for simd' 1886 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1887 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1888 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1889 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1890 1891 // CHECK-DAG: call void @__kmpc_for_static_fini( 1892 // CHECK: ret 1893 1894 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1895 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1896 1897 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1898 // CHECK: alloca 1899 // CHECK: alloca 1900 // CHECK: alloca 1901 // CHECK: alloca 1902 // CHECK: alloca 1903 // CHECK: alloca 1904 // CHECK: alloca 1905 // CHECK: [[OMP_IV:%.+]] = alloca 1906 // CHECK: alloca 1907 // CHECK: alloca 1908 // CHECK: alloca 1909 // CHECK: alloca 1910 // CHECK: [[OMP_LB:%.+]] = alloca 1911 // CHECK: [[OMP_UB:%.+]] = alloca 1912 // CHECK: [[OMP_ST:%.+]] = alloca 1913 1914 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1915 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1916 1917 // check EUB for distribute 1918 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1919 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} 1920 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1921 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1922 // CHECK-DAG: [[EUB_TRUE]]: 1923 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1924 // CHECK: br label %[[EUB_END:.+]] 1925 // CHECK-DAG: [[EUB_FALSE]]: 1926 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1927 // CHECK: br label %[[EUB_END]] 1928 // CHECK-DAG: [[EUB_END]]: 1929 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1930 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1931 1932 // initialize omp.iv 1933 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1934 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1935 1936 // check exit condition 1937 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1938 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} 1939 // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1 1940 // CHECK: [[CMP_IV_UB:%.+]] = icmp slt {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]] 1941 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1942 1943 // check that PrevLB and PrevUB are passed to the 'for' 1944 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1945 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1946 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1947 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1948 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1949 // check that distlb and distub are properly passed to fork_call 1950 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1951 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1952 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1953 1954 // check DistInc 1955 // CHECK: [[DIST_INNER_LOOP_INC]]: 1956 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1957 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1958 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1959 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1960 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1961 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1962 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1963 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1964 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1965 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1966 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1967 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1968 1969 // Update UB 1970 // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1971 // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} 1972 // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]] 1973 // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]] 1974 // CHECK-DAG: [[EUB_TRUE_1]]: 1975 // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} 1976 // CHECK: br label %[[EUB_END_1:.+]] 1977 // CHECK-DAG: [[EUB_FALSE_1]]: 1978 // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]], 1979 // CHECK: br label %[[EUB_END_1]] 1980 // CHECK-DAG: [[EUB_END_1]]: 1981 // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ] 1982 // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]], 1983 1984 // Store LB in IV 1985 // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1986 // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]], 1987 1988 // CHECK: [[DIST_INNER_LOOP_END]]: 1989 // CHECK: br label %[[LOOP_EXIT:.+]] 1990 1991 // loop exit 1992 // CHECK: [[LOOP_EXIT]]: 1993 // CHECK-DAG: call void @__kmpc_for_static_fini( 1994 // CHECK: ret 1995 1996 // skip implementation of 'parallel for': using default scheduling and was tested above 1997 1998 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1999 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 2000 2001 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 2002 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 2003 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 2004 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 2005 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 2006 2007 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2008 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 2009 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2010 // CHECK: ret 2011 2012 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 2013 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2014 2015 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2016 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2017 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2018 2019 // initialize lb and ub to PrevLB and PrevUB 2020 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2021 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2022 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2023 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2024 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2025 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2026 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2027 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2028 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2029 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2030 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2031 2032 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 2033 // In this case we use EUB 2034 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2035 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 2036 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 2037 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2038 // CHECK: [[PF_EUB_TRUE]]: 2039 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 2040 // CHECK: br label %[[PF_EUB_END:.+]] 2041 // CHECK-DAG: [[PF_EUB_FALSE]]: 2042 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2043 // CHECK: br label %[[PF_EUB_END]] 2044 // CHECK-DAG: [[PF_EUB_END]]: 2045 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 2046 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 2047 2048 // initialize omp.iv 2049 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2050 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2051 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 2052 2053 // check exit condition 2054 // CHECK: [[OMP_PF_JUMP_BACK]]: 2055 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 2056 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 2057 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2058 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 2059 2060 // check that PrevLB and PrevUB are passed to the 'for' 2061 // CHECK: [[PF_BODY]]: 2062 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2063 // CHECK: br label {{.+}} 2064 2065 // check stride 1 for 'for' in 'distribute parallel for simd' 2066 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 2067 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 2068 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 2069 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 2070 2071 // CHECK-DAG: call void @__kmpc_for_static_fini( 2072 // CHECK: ret 2073 2074 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 2075 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 2076 2077 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 2078 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2079 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 2080 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2081 // CHECK: ret 2082 2083 // 'parallel for' implementation using outer and inner loops and PrevEUB 2084 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2085 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2086 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2087 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2088 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2089 2090 // initialize lb and ub to PrevLB and PrevUB 2091 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2092 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2093 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2094 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2095 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2096 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2097 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2098 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2099 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2100 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2101 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2102 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2103 2104 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 2105 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2106 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2107 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 2108 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2109 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 2110 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 2111 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2112 // CHECK: [[PF_EUB_TRUE]]: 2113 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2114 // CHECK: br label %[[PF_EUB_END:.+]] 2115 // CHECK-DAG: [[PF_EUB_FALSE]]: 2116 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2117 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 2118 // CHECK: br label %[[PF_EUB_END]] 2119 // CHECK-DAG: [[PF_EUB_END]]: 2120 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 2121 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 2122 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 2123 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 2124 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 2125 2126 // initialize omp.iv (IV = LB) 2127 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2128 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2129 2130 // outer loop: while (IV < UB) { 2131 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2132 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2133 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2134 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2135 2136 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2137 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 2138 2139 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 2140 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2141 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2142 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2143 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2144 2145 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2146 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2147 // skip body branch 2148 // CHECK: br{{.+}} 2149 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2150 2151 // IV = IV + 1 and inner loop latch 2152 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2153 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2154 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2155 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2156 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 2157 2158 // check NextLB and NextUB 2159 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2160 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2161 2162 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2163 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2164 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2165 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 2166 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 2167 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2168 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2169 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 2170 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 2171 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2172 2173 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2174 // CHECK-DAG: call void @__kmpc_for_static_fini( 2175 // CHECK: ret 2176 2177 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 2178 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 2179 2180 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 2181 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2182 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 2183 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2184 // CHECK: ret 2185 2186 // 'parallel for' implementation using outer and inner loops and PrevEUB 2187 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2188 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2189 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2190 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2191 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2192 2193 // initialize lb and ub to PrevLB and PrevUB 2194 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2195 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2196 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2197 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2198 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2199 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2200 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2201 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2202 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2203 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2204 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2205 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2206 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2207 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2208 2209 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2210 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2211 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2212 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2213 2214 // initialize omp.iv (IV = LB) 2215 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2216 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2217 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2218 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2219 2220 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2221 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2222 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2223 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2224 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2225 2226 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2227 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2228 // skip body branch 2229 // CHECK: br{{.+}} 2230 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2231 2232 // IV = IV + 1 and inner loop latch 2233 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2234 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2235 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2236 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2237 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2238 2239 // check NextLB and NextUB 2240 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2241 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2242 2243 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2244 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2245 2246 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2247 // CHECK: ret 2248 2249 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 2250 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 2251 2252 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 2253 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2254 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 2255 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2256 // CHECK: ret 2257 2258 // 'parallel for' implementation using outer and inner loops and PrevEUB 2259 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2260 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2261 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2262 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2263 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2264 2265 // initialize lb and ub to PrevLB and PrevUB 2266 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2267 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2268 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2269 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2270 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2271 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2272 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2273 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2274 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2275 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2276 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2277 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2278 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2279 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2280 2281 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2282 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2283 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2284 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2285 2286 // initialize omp.iv (IV = LB) 2287 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2288 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2289 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2290 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2291 2292 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2293 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2294 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2295 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2296 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2297 2298 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2299 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2300 // skip body branch 2301 // CHECK: br{{.+}} 2302 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2303 2304 // IV = IV + 1 and inner loop latch 2305 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2306 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2307 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2308 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2309 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2310 2311 // check NextLB and NextUB 2312 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2313 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2314 2315 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2316 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2317 2318 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2319 // CHECK: ret 2320 2321 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} 2322 #endif 2323