1 // Test host code gen 2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 8 9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 16 17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 23 24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} 31 // expected-no-diagnostics 32 #ifndef HEADER 33 #define HEADER 34 35 36 template <typename T> 37 T tmain() { 38 T *a, *b, *c; 39 int n = 10000; 40 int ch = 100; 41 42 // no schedule clauses 43 #pragma omp target 44 #pragma omp teams 45 #pragma omp distribute parallel for simd 46 for (int i = 0; i < n; ++i) { 47 a[i] = b[i] + c[i]; 48 } 49 50 // dist_schedule: static no chunk 51 #pragma omp target 52 #pragma omp teams 53 #pragma omp distribute parallel for simd dist_schedule(static) 54 for (int i = 0; i < n; ++i) { 55 a[i] = b[i] + c[i]; 56 } 57 58 // dist_schedule: static chunk 59 #pragma omp target 60 #pragma omp teams 61 #pragma omp distribute parallel for simd dist_schedule(static, ch) 62 for (int i = 0; i < n; ++i) { 63 a[i] = b[i] + c[i]; 64 } 65 66 // schedule: static no chunk 67 #pragma omp target 68 #pragma omp teams 69 #pragma omp distribute parallel for simd schedule(static) 70 for (int i = 0; i < n; ++i) { 71 a[i] = b[i] + c[i]; 72 } 73 74 // schedule: static chunk 75 #pragma omp target 76 #pragma omp teams 77 #pragma omp distribute parallel for simd schedule(static, ch) 78 for (int i = 0; i < n; ++i) { 79 a[i] = b[i] + c[i]; 80 } 81 82 // schedule: dynamic no chunk 83 #pragma omp target 84 #pragma omp teams 85 #pragma omp distribute parallel for simd schedule(dynamic) 86 for (int i = 0; i < n; ++i) { 87 a[i] = b[i] + c[i]; 88 } 89 90 // schedule: dynamic chunk 91 #pragma omp target 92 #pragma omp teams 93 #pragma omp distribute parallel for simd schedule(dynamic, ch) 94 for (int i = 0; i < n; ++i) { 95 a[i] = b[i] + c[i]; 96 } 97 98 return T(); 99 } 100 101 int main() { 102 double *a, *b, *c; 103 int n = 10000; 104 int ch = 100; 105 106 #ifdef LAMBDA 107 // LAMBDA-LABEL: @main 108 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( 109 [&]() { 110 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( 111 112 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 113 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( 114 115 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 116 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( 117 118 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 119 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( 120 121 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 122 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( 123 124 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 125 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( 126 127 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 128 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( 129 130 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 131 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( 132 133 // no schedule clauses 134 #pragma omp target 135 #pragma omp teams 136 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( 137 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 138 139 #pragma omp distribute parallel for simd 140 for (int i = 0; i < n; ++i) { 141 a[i] = b[i] + c[i]; 142 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( 143 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 144 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 145 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 146 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 147 148 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 149 150 // check EUB for distribute 151 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 152 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 153 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 154 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 155 // LAMBDA-DAG: [[EUB_TRUE]]: 156 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 157 // LAMBDA: br label %[[EUB_END:.+]] 158 // LAMBDA-DAG: [[EUB_FALSE]]: 159 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 160 // LAMBDA: br label %[[EUB_END]] 161 // LAMBDA-DAG: [[EUB_END]]: 162 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 163 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 164 165 // initialize omp.iv 166 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 167 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 168 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 169 170 // check exit condition 171 // LAMBDA: [[OMP_JUMP_BACK]]: 172 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 173 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 174 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 175 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 176 177 // check that PrevLB and PrevUB are passed to the 'for' 178 // LAMBDA: [[DIST_BODY]]: 179 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 180 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 181 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 182 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 183 // check that distlb and distub are properly passed to fork_call 184 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 185 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 186 // LAMBDA: br label %[[DIST_INC:.+]] 187 188 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 189 // LAMBDA: [[DIST_INC]]: 190 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 191 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 192 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 193 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 194 // LAMBDA: br label %[[OMP_JUMP_BACK]] 195 196 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 197 // LAMBDA: ret 198 199 // implementation of 'parallel for' 200 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 201 202 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 203 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 204 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 205 206 // initialize lb and ub to PrevLB and PrevUB 207 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 209 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 210 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 211 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 212 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 213 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 214 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 215 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 216 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 217 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 218 219 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 220 // In this case we use EUB 221 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 222 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 223 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 224 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 225 // LAMBDA: [[PF_EUB_TRUE]]: 226 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 227 // LAMBDA: br label %[[PF_EUB_END:.+]] 228 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 229 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 230 // LAMBDA: br label %[[PF_EUB_END]] 231 // LAMBDA-DAG: [[PF_EUB_END]]: 232 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 233 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 234 235 // initialize omp.iv 236 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 237 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 238 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 239 240 // check exit condition 241 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 242 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 243 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 244 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 245 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 246 247 // check that PrevLB and PrevUB are passed to the 'for' 248 // LAMBDA: [[PF_BODY]]: 249 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 250 // LAMBDA: br label {{.+}} 251 252 // check stride 1 for 'for' in 'distribute parallel for simd' 253 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 254 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 255 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 256 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 257 258 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 259 // LAMBDA: ret 260 261 [&]() { 262 a[i] = b[i] + c[i]; 263 }(); 264 } 265 266 // dist_schedule: static no chunk (same sa default - no dist_schedule) 267 #pragma omp target 268 #pragma omp teams 269 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( 270 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 271 272 #pragma omp distribute parallel for simd dist_schedule(static) 273 for (int i = 0; i < n; ++i) { 274 a[i] = b[i] + c[i]; 275 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( 276 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 277 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 278 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 279 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 280 281 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 282 283 // check EUB for distribute 284 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 285 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 286 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 287 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 288 // LAMBDA-DAG: [[EUB_TRUE]]: 289 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 290 // LAMBDA: br label %[[EUB_END:.+]] 291 // LAMBDA-DAG: [[EUB_FALSE]]: 292 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 293 // LAMBDA: br label %[[EUB_END]] 294 // LAMBDA-DAG: [[EUB_END]]: 295 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 296 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 297 298 // initialize omp.iv 299 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 300 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 301 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 302 303 // check exit condition 304 // LAMBDA: [[OMP_JUMP_BACK]]: 305 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 306 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 307 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 308 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 309 310 // check that PrevLB and PrevUB are passed to the 'for' 311 // LAMBDA: [[DIST_BODY]]: 312 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 313 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 314 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 315 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 316 // check that distlb and distub are properly passed to fork_call 317 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 318 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 319 // LAMBDA: br label %[[DIST_INC:.+]] 320 321 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 322 // LAMBDA: [[DIST_INC]]: 323 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 324 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 325 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 326 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 327 // LAMBDA: br label %[[OMP_JUMP_BACK]] 328 329 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 330 // LAMBDA: ret 331 332 // implementation of 'parallel for' 333 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 334 335 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 336 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 337 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 338 339 // initialize lb and ub to PrevLB and PrevUB 340 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 342 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 343 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 344 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 345 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 346 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 347 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 348 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 349 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 350 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 351 352 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 353 // In this case we use EUB 354 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 355 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 356 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 357 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 358 // LAMBDA: [[PF_EUB_TRUE]]: 359 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 360 // LAMBDA: br label %[[PF_EUB_END:.+]] 361 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 362 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 363 // LAMBDA: br label %[[PF_EUB_END]] 364 // LAMBDA-DAG: [[PF_EUB_END]]: 365 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 366 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 367 368 // initialize omp.iv 369 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 370 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 371 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 372 373 // check exit condition 374 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 375 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 376 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 377 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 378 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 379 380 // check that PrevLB and PrevUB are passed to the 'for' 381 // LAMBDA: [[PF_BODY]]: 382 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 383 // LAMBDA: br label {{.+}} 384 385 // check stride 1 for 'for' in 'distribute parallel for simd' 386 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 387 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 388 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 389 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 390 391 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 392 // LAMBDA: ret 393 [&]() { 394 a[i] = b[i] + c[i]; 395 }(); 396 } 397 398 // dist_schedule: static chunk 399 #pragma omp target 400 #pragma omp teams 401 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( 402 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 403 404 #pragma omp distribute parallel for simd dist_schedule(static, ch) 405 for (int i = 0; i < n; ++i) { 406 a[i] = b[i] + c[i]; 407 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( 408 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 409 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 410 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 411 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 412 413 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 414 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 415 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 416 417 // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: 418 // check EUB for distribute 419 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 420 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 421 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 422 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 423 // LAMBDA-DAG: [[EUB_TRUE]]: 424 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 425 // LAMBDA: br label %[[EUB_END:.+]] 426 // LAMBDA-DAG: [[EUB_FALSE]]: 427 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 428 // LAMBDA: br label %[[EUB_END]] 429 // LAMBDA-DAG: [[EUB_END]]: 430 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 431 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 432 433 // initialize omp.iv 434 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 435 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 436 437 // check exit condition 438 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 439 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 440 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 441 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 442 443 // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: 444 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] 445 446 // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: 447 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 448 // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 449 // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 450 // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 451 452 // check that PrevLB and PrevUB are passed to the 'for' 453 // LAMBDA: [[DIST_INNER_LOOP_BODY]]: 454 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 455 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 456 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 457 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 458 // check that distlb and distub are properly passed to fork_call 459 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 460 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 461 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] 462 463 // check DistInc 464 // LAMBDA: [[DIST_INNER_LOOP_INC]]: 465 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 466 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 467 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 468 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 469 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] 470 471 // LAMBDA: [[DIST_INNER_LOOP_END]]: 472 // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] 473 474 // LAMBDA: [[DIST_OUTER_LOOP_INC]]: 475 // check NextLB and NextUB 476 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 477 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 478 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 479 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 480 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 481 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 482 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 483 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 484 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] 485 486 // outer loop exit 487 // LAMBDA: [[DIST_OUTER_LOOP_END]]: 488 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 489 // LAMBDA: ret 490 491 // skip implementation of 'parallel for': using default scheduling and was tested above 492 [&]() { 493 a[i] = b[i] + c[i]; 494 }(); 495 } 496 497 // schedule: static no chunk 498 #pragma omp target 499 #pragma omp teams 500 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( 501 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 502 503 #pragma omp distribute parallel for simd schedule(static) 504 for (int i = 0; i < n; ++i) { 505 a[i] = b[i] + c[i]; 506 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( 507 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 508 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 509 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 510 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 511 512 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 513 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 514 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 515 // LAMBDA: ret 516 517 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 518 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 519 520 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 521 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 522 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 523 524 // initialize lb and ub to PrevLB and PrevUB 525 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 526 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 527 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 528 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 529 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 530 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 531 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 532 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 533 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 534 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 535 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 536 537 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 538 // In this case we use EUB 539 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 540 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 541 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 542 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 543 // LAMBDA: [[PF_EUB_TRUE]]: 544 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 545 // LAMBDA: br label %[[PF_EUB_END:.+]] 546 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 547 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 548 // LAMBDA: br label %[[PF_EUB_END]] 549 // LAMBDA-DAG: [[PF_EUB_END]]: 550 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 551 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 552 553 // initialize omp.iv 554 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 555 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 556 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 557 558 // check exit condition 559 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 560 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 561 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 562 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 563 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 564 565 // check that PrevLB and PrevUB are passed to the 'for' 566 // LAMBDA: [[PF_BODY]]: 567 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 568 // LAMBDA: br label {{.+}} 569 570 // check stride 1 for 'for' in 'distribute parallel for simd' 571 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 572 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 573 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 574 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 575 576 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 577 // LAMBDA: ret 578 579 [&]() { 580 a[i] = b[i] + c[i]; 581 }(); 582 } 583 584 // schedule: static chunk 585 #pragma omp target 586 #pragma omp teams 587 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( 588 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 589 590 #pragma omp distribute parallel for simd schedule(static, ch) 591 for (int i = 0; i < n; ++i) { 592 a[i] = b[i] + c[i]; 593 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( 594 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 595 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 596 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 597 // LAMBDA: ret 598 599 // 'parallel for' implementation using outer and inner loops and PrevEUB 600 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 601 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 602 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 603 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 604 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 605 606 // initialize lb and ub to PrevLB and PrevUB 607 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 608 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 609 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 610 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 611 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 612 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 613 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 614 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 615 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 616 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 617 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 618 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 619 620 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 621 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 622 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 623 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 624 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 625 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 626 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 627 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 628 // LAMBDA: [[PF_EUB_TRUE]]: 629 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 630 // LAMBDA: br label %[[PF_EUB_END:.+]] 631 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 632 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 633 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 634 // LAMBDA: br label %[[PF_EUB_END]] 635 // LAMBDA-DAG: [[PF_EUB_END]]: 636 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 637 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 638 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 639 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 640 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 641 642 // initialize omp.iv (IV = LB) 643 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 644 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 645 646 // outer loop: while (IV < UB) { 647 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 648 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 649 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 650 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 651 652 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 653 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 654 655 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: 656 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 657 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 658 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 659 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 660 661 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 662 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 663 // skip body branch 664 // LAMBDA: br{{.+}} 665 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 666 667 // IV = IV + 1 and inner loop latch 668 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 669 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 670 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 671 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 672 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] 673 674 // check NextLB and NextUB 675 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 676 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 677 678 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 679 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 680 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 681 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 682 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 683 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 684 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 685 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 686 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 687 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 688 689 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 690 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 691 // LAMBDA: ret 692 [&]() { 693 a[i] = b[i] + c[i]; 694 }(); 695 } 696 697 // schedule: dynamic no chunk 698 #pragma omp target 699 #pragma omp teams 700 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( 701 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 702 703 #pragma omp distribute parallel for simd schedule(dynamic) 704 for (int i = 0; i < n; ++i) { 705 a[i] = b[i] + c[i]; 706 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( 707 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 708 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 709 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 710 // LAMBDA: ret 711 712 // 'parallel for' implementation using outer and inner loops and PrevEUB 713 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 714 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 715 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 716 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 717 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 718 719 // initialize lb and ub to PrevLB and PrevUB 720 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 721 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 722 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 723 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 724 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 725 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 726 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 727 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 728 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 729 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 730 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 731 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 732 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 733 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 734 735 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 736 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 737 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 738 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 739 740 // initialize omp.iv (IV = LB) 741 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 742 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 743 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 744 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 745 746 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 747 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 748 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 749 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 750 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 751 752 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 753 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 754 // skip body branch 755 // LAMBDA: br{{.+}} 756 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 757 758 // IV = IV + 1 and inner loop latch 759 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 760 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 761 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 762 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 763 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 764 765 // check NextLB and NextUB 766 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 767 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 768 769 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 770 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 771 772 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 773 // LAMBDA: ret 774 [&]() { 775 a[i] = b[i] + c[i]; 776 }(); 777 } 778 779 // schedule: dynamic chunk 780 #pragma omp target 781 #pragma omp teams 782 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( 783 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 784 785 #pragma omp distribute parallel for simd schedule(dynamic, ch) 786 for (int i = 0; i < n; ++i) { 787 a[i] = b[i] + c[i]; 788 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( 789 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 790 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 791 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 792 // LAMBDA: ret 793 794 // 'parallel for' implementation using outer and inner loops and PrevEUB 795 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 796 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 797 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 798 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 799 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 800 801 // initialize lb and ub to PrevLB and PrevUB 802 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 803 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 804 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 805 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 806 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 807 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 808 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 809 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 810 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 811 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 812 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 813 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 814 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 815 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 816 817 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 818 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 819 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 820 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 821 822 // initialize omp.iv (IV = LB) 823 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 824 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 825 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 826 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 827 828 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 829 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 830 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 831 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 832 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 833 834 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 835 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 836 // skip body branch 837 // LAMBDA: br{{.+}} 838 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 839 840 // IV = IV + 1 and inner loop latch 841 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 842 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 843 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 844 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 845 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 846 847 // check NextLB and NextUB 848 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 849 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 850 851 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 852 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 853 854 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 855 // LAMBDA: ret 856 [&]() { 857 a[i] = b[i] + c[i]; 858 }(); 859 } 860 }(); 861 return 0; 862 #else 863 // CHECK-LABEL: @main 864 865 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 866 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 867 868 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 869 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 870 871 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 872 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 873 874 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 875 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 876 877 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 878 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 879 880 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 881 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 882 883 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 884 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 885 886 // CHECK: call{{.+}} [[TMAIN:@.+]]() 887 888 // no schedule clauses 889 #pragma omp target 890 #pragma omp teams 891 // CHECK: define internal void [[OFFLOADING_FUN_1]]( 892 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 893 894 #pragma omp distribute parallel for simd 895 for (int i = 0; i < n; ++i) { 896 a[i] = b[i] + c[i]; 897 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 898 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 899 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 900 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 901 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 902 903 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 904 905 // check EUB for distribute 906 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 907 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 908 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 909 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 910 // CHECK-DAG: [[EUB_TRUE]]: 911 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 912 // CHECK: br label %[[EUB_END:.+]] 913 // CHECK-DAG: [[EUB_FALSE]]: 914 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 915 // CHECK: br label %[[EUB_END]] 916 // CHECK-DAG: [[EUB_END]]: 917 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 918 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 919 920 // initialize omp.iv 921 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 922 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 923 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 924 925 // check exit condition 926 // CHECK: [[OMP_JUMP_BACK]]: 927 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 928 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 929 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 930 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 931 932 // check that PrevLB and PrevUB are passed to the 'for' 933 // CHECK: [[DIST_BODY]]: 934 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 935 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 936 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 937 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 938 // check that distlb and distub are properly passed to fork_call 939 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 940 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 941 // CHECK: br label %[[DIST_INC:.+]] 942 943 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 944 // CHECK: [[DIST_INC]]: 945 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 946 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 947 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 948 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 949 // CHECK: br label %[[OMP_JUMP_BACK]] 950 951 // CHECK-DAG: call void @__kmpc_for_static_fini( 952 // CHECK: ret 953 954 // implementation of 'parallel for' 955 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 956 957 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 958 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 959 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 960 961 // initialize lb and ub to PrevLB and PrevUB 962 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 963 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 964 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 965 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 966 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 967 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 968 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 969 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 970 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 971 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 972 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 973 974 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 975 // In this case we use EUB 976 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 977 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 978 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 979 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 980 // CHECK: [[PF_EUB_TRUE]]: 981 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 982 // CHECK: br label %[[PF_EUB_END:.+]] 983 // CHECK-DAG: [[PF_EUB_FALSE]]: 984 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 985 // CHECK: br label %[[PF_EUB_END]] 986 // CHECK-DAG: [[PF_EUB_END]]: 987 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 988 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 989 990 // initialize omp.iv 991 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 992 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 993 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 994 995 // check exit condition 996 // CHECK: [[OMP_PF_JUMP_BACK]]: 997 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 998 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 999 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1000 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1001 1002 // check that PrevLB and PrevUB are passed to the 'for' 1003 // CHECK: [[PF_BODY]]: 1004 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1005 // CHECK: br label {{.+}} 1006 1007 // check stride 1 for 'for' in 'distribute parallel for simd' 1008 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1009 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1010 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1011 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1012 1013 // CHECK-DAG: call void @__kmpc_for_static_fini( 1014 // CHECK: ret 1015 } 1016 1017 // dist_schedule: static no chunk 1018 #pragma omp target 1019 #pragma omp teams 1020 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1021 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1022 1023 #pragma omp distribute parallel for simd dist_schedule(static) 1024 for (int i = 0; i < n; ++i) { 1025 a[i] = b[i] + c[i]; 1026 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1027 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1028 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1029 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1030 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1031 1032 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1033 1034 // check EUB for distribute 1035 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1036 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1037 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1038 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1039 // CHECK-DAG: [[EUB_TRUE]]: 1040 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1041 // CHECK: br label %[[EUB_END:.+]] 1042 // CHECK-DAG: [[EUB_FALSE]]: 1043 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1044 // CHECK: br label %[[EUB_END]] 1045 // CHECK-DAG: [[EUB_END]]: 1046 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1047 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1048 1049 // initialize omp.iv 1050 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1051 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1052 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1053 1054 // check exit condition 1055 // CHECK: [[OMP_JUMP_BACK]]: 1056 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1057 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1058 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1059 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1060 1061 // check that PrevLB and PrevUB are passed to the 'for' 1062 // CHECK: [[DIST_BODY]]: 1063 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1064 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1065 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1066 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1067 // check that distlb and distub are properly passed to fork_call 1068 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1069 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1070 // CHECK: br label %[[DIST_INC:.+]] 1071 1072 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1073 // CHECK: [[DIST_INC]]: 1074 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1075 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1076 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1077 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1078 // CHECK: br label %[[OMP_JUMP_BACK]] 1079 1080 // CHECK-DAG: call void @__kmpc_for_static_fini( 1081 // CHECK: ret 1082 1083 // implementation of 'parallel for' 1084 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1085 1086 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1087 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1088 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1089 1090 // initialize lb and ub to PrevLB and PrevUB 1091 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1092 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1093 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1094 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1095 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1096 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1097 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1098 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1099 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1100 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1101 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1102 1103 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1104 // In this case we use EUB 1105 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1106 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1107 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1108 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1109 // CHECK: [[PF_EUB_TRUE]]: 1110 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1111 // CHECK: br label %[[PF_EUB_END:.+]] 1112 // CHECK-DAG: [[PF_EUB_FALSE]]: 1113 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1114 // CHECK: br label %[[PF_EUB_END]] 1115 // CHECK-DAG: [[PF_EUB_END]]: 1116 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1117 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1118 1119 // initialize omp.iv 1120 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1121 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1122 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1123 1124 // check exit condition 1125 // CHECK: [[OMP_PF_JUMP_BACK]]: 1126 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1127 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1128 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1129 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1130 1131 // check that PrevLB and PrevUB are passed to the 'for' 1132 // CHECK: [[PF_BODY]]: 1133 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1134 // CHECK: br label {{.+}} 1135 1136 // check stride 1 for 'for' in 'distribute parallel for simd' 1137 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1138 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1139 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1140 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1141 1142 // CHECK-DAG: call void @__kmpc_for_static_fini( 1143 // CHECK: ret 1144 } 1145 1146 // dist_schedule: static chunk 1147 #pragma omp target 1148 #pragma omp teams 1149 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1150 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1151 1152 #pragma omp distribute parallel for simd dist_schedule(static, ch) 1153 for (int i = 0; i < n; ++i) { 1154 a[i] = b[i] + c[i]; 1155 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1156 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1157 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1158 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1159 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1160 1161 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1162 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1163 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1164 1165 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1166 // check EUB for distribute 1167 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1168 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1169 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1170 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1171 // CHECK-DAG: [[EUB_TRUE]]: 1172 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1173 // CHECK: br label %[[EUB_END:.+]] 1174 // CHECK-DAG: [[EUB_FALSE]]: 1175 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1176 // CHECK: br label %[[EUB_END]] 1177 // CHECK-DAG: [[EUB_END]]: 1178 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1179 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1180 1181 // initialize omp.iv 1182 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1183 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1184 1185 // check exit condition 1186 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1187 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1188 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1189 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1190 1191 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1192 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1193 1194 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1195 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1196 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1197 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1198 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1199 1200 // check that PrevLB and PrevUB are passed to the 'for' 1201 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1202 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1203 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1204 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1205 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1206 // check that distlb and distub are properly passed to fork_call 1207 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1208 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1209 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1210 1211 // check DistInc 1212 // CHECK: [[DIST_INNER_LOOP_INC]]: 1213 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1214 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1215 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1216 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1217 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1218 1219 // CHECK: [[DIST_INNER_LOOP_END]]: 1220 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1221 1222 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1223 // check NextLB and NextUB 1224 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1225 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1226 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1227 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1228 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1229 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1230 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1231 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1232 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1233 1234 // outer loop exit 1235 // CHECK: [[DIST_OUTER_LOOP_END]]: 1236 // CHECK-DAG: call void @__kmpc_for_static_fini( 1237 // CHECK: ret 1238 1239 // skip implementation of 'parallel for': using default scheduling and was tested above 1240 } 1241 1242 // schedule: static no chunk 1243 #pragma omp target 1244 #pragma omp teams 1245 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1246 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1247 1248 #pragma omp distribute parallel for simd schedule(static) 1249 for (int i = 0; i < n; ++i) { 1250 a[i] = b[i] + c[i]; 1251 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1252 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1253 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1254 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1255 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1256 1257 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1258 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1259 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1260 // CHECK: ret 1261 1262 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1263 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1264 1265 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1266 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1267 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1268 1269 // initialize lb and ub to PrevLB and PrevUB 1270 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1271 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1272 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1273 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1274 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1275 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1276 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1277 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1278 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1279 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1280 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1281 1282 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1283 // In this case we use EUB 1284 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1285 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1286 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1287 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1288 // CHECK: [[PF_EUB_TRUE]]: 1289 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1290 // CHECK: br label %[[PF_EUB_END:.+]] 1291 // CHECK-DAG: [[PF_EUB_FALSE]]: 1292 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1293 // CHECK: br label %[[PF_EUB_END]] 1294 // CHECK-DAG: [[PF_EUB_END]]: 1295 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1296 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1297 1298 // initialize omp.iv 1299 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1300 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1301 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1302 1303 // check exit condition 1304 // CHECK: [[OMP_PF_JUMP_BACK]]: 1305 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1306 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1307 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1308 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1309 1310 // check that PrevLB and PrevUB are passed to the 'for' 1311 // CHECK: [[PF_BODY]]: 1312 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1313 // CHECK: br label {{.+}} 1314 1315 // check stride 1 for 'for' in 'distribute parallel for simd' 1316 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1317 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1318 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1319 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1320 1321 // CHECK-DAG: call void @__kmpc_for_static_fini( 1322 // CHECK: ret 1323 } 1324 1325 // schedule: static chunk 1326 #pragma omp target 1327 #pragma omp teams 1328 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 1329 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 1330 1331 #pragma omp distribute parallel for simd schedule(static, ch) 1332 for (int i = 0; i < n; ++i) { 1333 a[i] = b[i] + c[i]; 1334 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 1335 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1336 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 1337 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1338 // CHECK: ret 1339 1340 // 'parallel for' implementation using outer and inner loops and PrevEUB 1341 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1342 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1343 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1344 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1345 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1346 1347 // initialize lb and ub to PrevLB and PrevUB 1348 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1349 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1350 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1351 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1352 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1353 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1354 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1355 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1356 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1357 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1358 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1359 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1360 1361 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 1362 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1363 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1364 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 1365 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1366 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 1367 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 1368 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1369 // CHECK: [[PF_EUB_TRUE]]: 1370 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1371 // CHECK: br label %[[PF_EUB_END:.+]] 1372 // CHECK-DAG: [[PF_EUB_FALSE]]: 1373 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1374 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 1375 // CHECK: br label %[[PF_EUB_END]] 1376 // CHECK-DAG: [[PF_EUB_END]]: 1377 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 1378 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 1379 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 1380 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 1381 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 1382 1383 // initialize omp.iv (IV = LB) 1384 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1385 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1386 1387 // outer loop: while (IV < UB) { 1388 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1389 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1390 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1391 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1392 1393 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1394 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 1395 1396 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 1397 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1398 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1399 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1400 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1401 1402 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1403 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1404 // skip body branch 1405 // CHECK: br{{.+}} 1406 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1407 1408 // IV = IV + 1 and inner loop latch 1409 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1410 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1411 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1412 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1413 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 1414 1415 // check NextLB and NextUB 1416 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1417 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1418 1419 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1420 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1421 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1422 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 1423 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 1424 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1425 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1426 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 1427 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 1428 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1429 1430 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1431 // CHECK-DAG: call void @__kmpc_for_static_fini( 1432 // CHECK: ret 1433 } 1434 1435 // schedule: dynamic no chunk 1436 #pragma omp target 1437 #pragma omp teams 1438 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 1439 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 1440 1441 #pragma omp distribute parallel for simd schedule(dynamic) 1442 for (int i = 0; i < n; ++i) { 1443 a[i] = b[i] + c[i]; 1444 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 1445 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1446 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 1447 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1448 // CHECK: ret 1449 1450 // 'parallel for' implementation using outer and inner loops and PrevEUB 1451 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1452 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1453 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1454 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1455 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1456 1457 // initialize lb and ub to PrevLB and PrevUB 1458 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1459 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1460 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1461 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1462 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1463 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1464 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1465 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1466 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1467 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1468 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1469 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1470 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1471 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1472 1473 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1474 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1475 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1476 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1477 1478 // initialize omp.iv (IV = LB) 1479 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1480 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1481 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1482 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1483 1484 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1485 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1486 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1487 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1488 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1489 1490 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1491 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1492 // skip body branch 1493 // CHECK: br{{.+}} 1494 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1495 1496 // IV = IV + 1 and inner loop latch 1497 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1498 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1499 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1500 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1501 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1502 1503 // check NextLB and NextUB 1504 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1505 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1506 1507 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1508 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1509 1510 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1511 // CHECK: ret 1512 } 1513 1514 // schedule: dynamic chunk 1515 #pragma omp target 1516 #pragma omp teams 1517 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 1518 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 1519 1520 #pragma omp distribute parallel for simd schedule(dynamic, ch) 1521 for (int i = 0; i < n; ++i) { 1522 a[i] = b[i] + c[i]; 1523 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 1524 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1525 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 1526 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1527 // CHECK: ret 1528 1529 // 'parallel for' implementation using outer and inner loops and PrevEUB 1530 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1531 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1532 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1533 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1534 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1535 1536 // initialize lb and ub to PrevLB and PrevUB 1537 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1538 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1539 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1540 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1541 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1542 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1543 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1544 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1545 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1546 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1547 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1548 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1549 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1550 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1551 1552 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1553 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1554 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1555 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1556 1557 // initialize omp.iv (IV = LB) 1558 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1559 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1560 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1561 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1562 1563 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1564 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1565 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1566 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1567 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1568 1569 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1570 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1571 // skip body branch 1572 // CHECK: br{{.+}} 1573 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1574 1575 // IV = IV + 1 and inner loop latch 1576 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1577 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1578 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1579 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1580 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1581 1582 // check NextLB and NextUB 1583 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1584 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1585 1586 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1587 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1588 1589 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1590 // CHECK: ret 1591 } 1592 1593 return tmain<int>(); 1594 #endif 1595 } 1596 1597 // check code 1598 // CHECK: define{{.+}} [[TMAIN]]() 1599 1600 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1601 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 1602 1603 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1604 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 1605 1606 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1607 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 1608 1609 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1610 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 1611 1612 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1613 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 1614 1615 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1616 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 1617 1618 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1619 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 1620 1621 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( 1622 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 1623 1624 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 1625 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1626 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1627 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1628 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1629 1630 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1631 1632 // check EUB for distribute 1633 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1634 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1635 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1636 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1637 // CHECK-DAG: [[EUB_TRUE]]: 1638 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1639 // CHECK: br label %[[EUB_END:.+]] 1640 // CHECK-DAG: [[EUB_FALSE]]: 1641 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1642 // CHECK: br label %[[EUB_END]] 1643 // CHECK-DAG: [[EUB_END]]: 1644 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1645 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1646 1647 // initialize omp.iv 1648 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1649 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1650 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1651 1652 // check exit condition 1653 // CHECK: [[OMP_JUMP_BACK]]: 1654 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1655 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1656 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1657 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1658 1659 // check that PrevLB and PrevUB are passed to the 'for' 1660 // CHECK: [[DIST_BODY]]: 1661 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1662 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1663 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1664 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1665 // check that distlb and distub are properly passed to fork_call 1666 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1667 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1668 // CHECK: br label %[[DIST_INC:.+]] 1669 1670 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1671 // CHECK: [[DIST_INC]]: 1672 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1673 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1674 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1675 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1676 // CHECK: br label %[[OMP_JUMP_BACK]] 1677 1678 // CHECK-DAG: call void @__kmpc_for_static_fini( 1679 // CHECK: ret 1680 1681 // implementation of 'parallel for' 1682 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1683 1684 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1685 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1686 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1687 1688 // initialize lb and ub to PrevLB and PrevUB 1689 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1690 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1691 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1692 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1693 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1694 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1695 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1696 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1697 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1698 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1699 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1700 1701 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1702 // In this case we use EUB 1703 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1704 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1705 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1706 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1707 // CHECK: [[PF_EUB_TRUE]]: 1708 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1709 // CHECK: br label %[[PF_EUB_END:.+]] 1710 // CHECK-DAG: [[PF_EUB_FALSE]]: 1711 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1712 // CHECK: br label %[[PF_EUB_END]] 1713 // CHECK-DAG: [[PF_EUB_END]]: 1714 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1715 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1716 1717 // initialize omp.iv 1718 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1719 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1720 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1721 1722 // check exit condition 1723 // CHECK: [[OMP_PF_JUMP_BACK]]: 1724 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1725 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1726 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1727 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1728 1729 // check that PrevLB and PrevUB are passed to the 'for' 1730 // CHECK: [[PF_BODY]]: 1731 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1732 // CHECK: br label {{.+}} 1733 1734 // check stride 1 for 'for' in 'distribute parallel for simd' 1735 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1736 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1737 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1738 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1739 1740 // CHECK-DAG: call void @__kmpc_for_static_fini( 1741 // CHECK: ret 1742 1743 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1744 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1745 1746 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1747 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1748 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1749 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1750 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1751 1752 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1753 1754 // check EUB for distribute 1755 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1756 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1757 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1758 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1759 // CHECK-DAG: [[EUB_TRUE]]: 1760 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1761 // CHECK: br label %[[EUB_END:.+]] 1762 // CHECK-DAG: [[EUB_FALSE]]: 1763 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1764 // CHECK: br label %[[EUB_END]] 1765 // CHECK-DAG: [[EUB_END]]: 1766 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1767 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1768 1769 // initialize omp.iv 1770 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1771 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1772 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1773 1774 // check exit condition 1775 // CHECK: [[OMP_JUMP_BACK]]: 1776 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1777 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1778 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1779 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1780 1781 // check that PrevLB and PrevUB are passed to the 'for' 1782 // CHECK: [[DIST_BODY]]: 1783 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1784 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1785 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1786 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1787 // check that distlb and distub are properly passed to fork_call 1788 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1789 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1790 // CHECK: br label %[[DIST_INC:.+]] 1791 1792 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1793 // CHECK: [[DIST_INC]]: 1794 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1795 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1796 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1797 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1798 // CHECK: br label %[[OMP_JUMP_BACK]] 1799 1800 // CHECK-DAG: call void @__kmpc_for_static_fini( 1801 // CHECK: ret 1802 1803 // implementation of 'parallel for' 1804 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1805 1806 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1807 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1808 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1809 1810 // initialize lb and ub to PrevLB and PrevUB 1811 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1812 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1813 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1814 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1815 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1816 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1817 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1818 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1819 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1820 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1821 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1822 1823 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1824 // In this case we use EUB 1825 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1826 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1827 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1828 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1829 // CHECK: [[PF_EUB_TRUE]]: 1830 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1831 // CHECK: br label %[[PF_EUB_END:.+]] 1832 // CHECK-DAG: [[PF_EUB_FALSE]]: 1833 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1834 // CHECK: br label %[[PF_EUB_END]] 1835 // CHECK-DAG: [[PF_EUB_END]]: 1836 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1837 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1838 1839 // initialize omp.iv 1840 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1841 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1842 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1843 1844 // check exit condition 1845 // CHECK: [[OMP_PF_JUMP_BACK]]: 1846 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1847 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1848 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1849 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1850 1851 // check that PrevLB and PrevUB are passed to the 'for' 1852 // CHECK: [[PF_BODY]]: 1853 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1854 // CHECK: br label {{.+}} 1855 1856 // check stride 1 for 'for' in 'distribute parallel for simd' 1857 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1858 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1859 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1860 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1861 1862 // CHECK-DAG: call void @__kmpc_for_static_fini( 1863 // CHECK: ret 1864 1865 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1866 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1867 1868 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1869 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1870 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1871 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1872 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1873 1874 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1875 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1876 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1877 1878 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1879 // check EUB for distribute 1880 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1881 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1882 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1883 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1884 // CHECK-DAG: [[EUB_TRUE]]: 1885 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1886 // CHECK: br label %[[EUB_END:.+]] 1887 // CHECK-DAG: [[EUB_FALSE]]: 1888 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1889 // CHECK: br label %[[EUB_END]] 1890 // CHECK-DAG: [[EUB_END]]: 1891 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1892 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1893 1894 // initialize omp.iv 1895 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1896 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1897 1898 // check exit condition 1899 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1900 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1901 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1902 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1903 1904 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1905 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1906 1907 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1908 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1909 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1910 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1911 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1912 1913 // check that PrevLB and PrevUB are passed to the 'for' 1914 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1915 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1916 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1917 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1918 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1919 // check that distlb and distub are properly passed to fork_call 1920 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1921 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1922 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1923 1924 // check DistInc 1925 // CHECK: [[DIST_INNER_LOOP_INC]]: 1926 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1927 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1928 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1929 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1930 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1931 1932 // CHECK: [[DIST_INNER_LOOP_END]]: 1933 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1934 1935 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1936 // check NextLB and NextUB 1937 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1938 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1939 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1940 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1941 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1942 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1943 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1944 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1945 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1946 1947 // outer loop exit 1948 // CHECK: [[DIST_OUTER_LOOP_END]]: 1949 // CHECK-DAG: call void @__kmpc_for_static_fini( 1950 // CHECK: ret 1951 1952 // skip implementation of 'parallel for': using default scheduling and was tested above 1953 1954 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1955 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1956 1957 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1958 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1959 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1960 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1961 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1962 1963 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1964 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1965 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1966 // CHECK: ret 1967 1968 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1969 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1970 1971 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1972 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1973 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1974 1975 // initialize lb and ub to PrevLB and PrevUB 1976 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1977 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1978 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1979 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1980 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1981 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1982 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1983 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1984 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1985 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1986 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1987 1988 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1989 // In this case we use EUB 1990 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1991 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1992 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1993 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1994 // CHECK: [[PF_EUB_TRUE]]: 1995 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1996 // CHECK: br label %[[PF_EUB_END:.+]] 1997 // CHECK-DAG: [[PF_EUB_FALSE]]: 1998 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1999 // CHECK: br label %[[PF_EUB_END]] 2000 // CHECK-DAG: [[PF_EUB_END]]: 2001 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 2002 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 2003 2004 // initialize omp.iv 2005 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2006 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2007 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 2008 2009 // check exit condition 2010 // CHECK: [[OMP_PF_JUMP_BACK]]: 2011 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 2012 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 2013 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2014 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 2015 2016 // check that PrevLB and PrevUB are passed to the 'for' 2017 // CHECK: [[PF_BODY]]: 2018 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2019 // CHECK: br label {{.+}} 2020 2021 // check stride 1 for 'for' in 'distribute parallel for simd' 2022 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 2023 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 2024 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 2025 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 2026 2027 // CHECK-DAG: call void @__kmpc_for_static_fini( 2028 // CHECK: ret 2029 2030 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 2031 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 2032 2033 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 2034 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2035 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 2036 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2037 // CHECK: ret 2038 2039 // 'parallel for' implementation using outer and inner loops and PrevEUB 2040 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2041 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2042 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2043 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2044 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2045 2046 // initialize lb and ub to PrevLB and PrevUB 2047 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2048 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2049 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2050 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2051 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2052 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2053 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2054 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2055 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2056 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2057 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2058 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2059 2060 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 2061 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2062 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2063 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 2064 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2065 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 2066 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 2067 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2068 // CHECK: [[PF_EUB_TRUE]]: 2069 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2070 // CHECK: br label %[[PF_EUB_END:.+]] 2071 // CHECK-DAG: [[PF_EUB_FALSE]]: 2072 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2073 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 2074 // CHECK: br label %[[PF_EUB_END]] 2075 // CHECK-DAG: [[PF_EUB_END]]: 2076 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 2077 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 2078 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 2079 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 2080 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 2081 2082 // initialize omp.iv (IV = LB) 2083 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2084 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2085 2086 // outer loop: while (IV < UB) { 2087 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2088 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2089 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2090 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2091 2092 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2093 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 2094 2095 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 2096 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2097 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2098 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2099 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2100 2101 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2102 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2103 // skip body branch 2104 // CHECK: br{{.+}} 2105 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2106 2107 // IV = IV + 1 and inner loop latch 2108 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2109 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2110 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2111 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2112 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 2113 2114 // check NextLB and NextUB 2115 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2116 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2117 2118 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2119 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2120 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2121 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 2122 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 2123 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2124 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2125 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 2126 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 2127 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2128 2129 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2130 // CHECK-DAG: call void @__kmpc_for_static_fini( 2131 // CHECK: ret 2132 2133 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 2134 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 2135 2136 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 2137 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2138 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 2139 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2140 // CHECK: ret 2141 2142 // 'parallel for' implementation using outer and inner loops and PrevEUB 2143 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2144 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2145 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2146 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2147 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2148 2149 // initialize lb and ub to PrevLB and PrevUB 2150 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2151 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2152 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2153 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2154 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2155 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2156 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2157 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2158 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2159 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2160 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2161 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2162 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2163 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2164 2165 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2166 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2167 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2168 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2169 2170 // initialize omp.iv (IV = LB) 2171 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2172 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2173 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2174 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2175 2176 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2177 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2178 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2179 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2180 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2181 2182 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2183 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2184 // skip body branch 2185 // CHECK: br{{.+}} 2186 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2187 2188 // IV = IV + 1 and inner loop latch 2189 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2190 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2191 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2192 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2193 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2194 2195 // check NextLB and NextUB 2196 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2197 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2198 2199 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2200 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2201 2202 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2203 // CHECK: ret 2204 2205 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 2206 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 2207 2208 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 2209 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2210 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 2211 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2212 // CHECK: ret 2213 2214 // 'parallel for' implementation using outer and inner loops and PrevEUB 2215 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2216 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2217 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2218 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2219 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2220 2221 // initialize lb and ub to PrevLB and PrevUB 2222 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2223 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2224 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2225 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2226 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2227 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2228 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2229 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2230 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2231 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2232 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2233 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2234 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2235 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2236 2237 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2238 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2239 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2240 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2241 2242 // initialize omp.iv (IV = LB) 2243 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2244 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2245 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2246 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2247 2248 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2249 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2250 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2251 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2252 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2253 2254 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2255 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2256 // skip body branch 2257 // CHECK: br{{.+}} 2258 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2259 2260 // IV = IV + 1 and inner loop latch 2261 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2262 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2263 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2264 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2265 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2266 2267 // check NextLB and NextUB 2268 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2269 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2270 2271 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2272 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2273 2274 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2275 // CHECK: ret 2276 2277 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} 2278 #endif 2279