1 // Test host code gen 2 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 3 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 4 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64 5 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 6 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 7 // RUN: %clang_cc1 -DLAMBDA -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32 8 9 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 10 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 11 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 12 // RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s 13 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 14 // RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 15 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 16 17 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 18 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 19 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 20 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 21 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 22 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 23 24 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 25 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s 26 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 27 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s 28 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s 29 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s 30 // SIMD-ONLY1-NOT: {{__kmpc|__tgt}} 31 // expected-no-diagnostics 32 #ifndef HEADER 33 #define HEADER 34 35 36 template <typename T> 37 T tmain() { 38 T *a, *b, *c; 39 int n = 10000; 40 int ch = 100; 41 42 // no schedule clauses 43 #pragma omp target 44 #pragma omp teams 45 #pragma omp distribute parallel for 46 for (int i = 0; i < n; ++i) { 47 #pragma omp cancel for 48 a[i] = b[i] + c[i]; 49 } 50 51 // dist_schedule: static no chunk 52 #pragma omp target 53 #pragma omp teams 54 #pragma omp distribute parallel for dist_schedule(static) 55 for (int i = 0; i < n; ++i) { 56 a[i] = b[i] + c[i]; 57 } 58 59 // dist_schedule: static chunk 60 #pragma omp target 61 #pragma omp teams 62 #pragma omp distribute parallel for dist_schedule(static, ch) 63 for (int i = 0; i < n; ++i) { 64 a[i] = b[i] + c[i]; 65 } 66 67 // schedule: static no chunk 68 #pragma omp target 69 #pragma omp teams 70 #pragma omp distribute parallel for schedule(static) 71 for (int i = 0; i < n; ++i) { 72 a[i] = b[i] + c[i]; 73 } 74 75 // schedule: static chunk 76 #pragma omp target 77 #pragma omp teams 78 #pragma omp distribute parallel for schedule(static, ch) 79 for (int i = 0; i < n; ++i) { 80 a[i] = b[i] + c[i]; 81 } 82 83 // schedule: dynamic no chunk 84 #pragma omp target 85 #pragma omp teams 86 #pragma omp distribute parallel for schedule(dynamic) 87 for (int i = 0; i < n; ++i) { 88 a[i] = b[i] + c[i]; 89 } 90 91 // schedule: dynamic chunk 92 #pragma omp target 93 #pragma omp teams 94 #pragma omp distribute parallel for schedule(dynamic, ch) 95 for (int i = 0; i < n; ++i) { 96 a[i] = b[i] + c[i]; 97 } 98 99 return T(); 100 } 101 102 int main() { 103 double *a, *b, *c; 104 int n = 10000; 105 int ch = 100; 106 107 #ifdef LAMBDA 108 // LAMBDA-LABEL: @main 109 // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]]( 110 [&]() { 111 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( 112 113 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 114 // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]]( 115 116 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 117 // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]]( 118 119 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 120 // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]]( 121 122 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 123 // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]]( 124 125 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 126 // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]]( 127 128 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 129 // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]]( 130 131 // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams( 132 // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]]( 133 134 // no schedule clauses 135 #pragma omp target 136 #pragma omp teams 137 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_1]]( 138 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 139 140 #pragma omp distribute parallel for 141 for (int i = 0; i < n; ++i) { 142 a[i] = b[i] + c[i]; 143 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_1]]( 144 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 145 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 146 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 147 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 148 149 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 150 151 // check EUB for distribute 152 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 153 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 154 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 155 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 156 // LAMBDA-DAG: [[EUB_TRUE]]: 157 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 158 // LAMBDA: br label %[[EUB_END:.+]] 159 // LAMBDA-DAG: [[EUB_FALSE]]: 160 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 161 // LAMBDA: br label %[[EUB_END]] 162 // LAMBDA-DAG: [[EUB_END]]: 163 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 164 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 165 166 // initialize omp.iv 167 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 168 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 169 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 170 171 // check exit condition 172 // LAMBDA: [[OMP_JUMP_BACK]]: 173 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 174 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 175 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 176 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 177 178 // check that PrevLB and PrevUB are passed to the 'for' 179 // LAMBDA: [[DIST_BODY]]: 180 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 181 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 182 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 183 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 184 // check that distlb and distub are properly passed to fork_call 185 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 186 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 187 // LAMBDA: br label %[[DIST_INC:.+]] 188 189 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 190 // LAMBDA: [[DIST_INC]]: 191 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 192 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 193 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 194 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 195 // LAMBDA: br label %[[OMP_JUMP_BACK]] 196 197 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 198 // LAMBDA: ret 199 200 // implementation of 'parallel for' 201 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 202 203 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 204 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 205 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 206 207 // initialize lb and ub to PrevLB and PrevUB 208 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 209 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 210 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 211 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 212 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 213 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 214 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 215 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 216 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 217 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 218 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 219 220 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 221 // In this case we use EUB 222 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 223 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 224 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 225 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 226 // LAMBDA: [[PF_EUB_TRUE]]: 227 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 228 // LAMBDA: br label %[[PF_EUB_END:.+]] 229 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 230 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 231 // LAMBDA: br label %[[PF_EUB_END]] 232 // LAMBDA-DAG: [[PF_EUB_END]]: 233 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 234 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 235 236 // initialize omp.iv 237 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 238 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 239 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 240 241 // check exit condition 242 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 243 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 244 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 245 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 246 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 247 248 // check that PrevLB and PrevUB are passed to the 'for' 249 // LAMBDA: [[PF_BODY]]: 250 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 251 // LAMBDA: br label {{.+}} 252 253 // check stride 1 for 'for' in 'distribute parallel for' 254 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 255 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 256 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 257 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 258 259 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 260 // LAMBDA: ret 261 262 [&]() { 263 a[i] = b[i] + c[i]; 264 }(); 265 } 266 267 // dist_schedule: static no chunk (same sa default - no dist_schedule) 268 #pragma omp target 269 #pragma omp teams 270 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_2]]( 271 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 272 273 #pragma omp distribute parallel for dist_schedule(static) 274 for (int i = 0; i < n; ++i) { 275 a[i] = b[i] + c[i]; 276 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_2]]( 277 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 278 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 279 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 280 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 281 282 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 283 284 // check EUB for distribute 285 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 286 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 287 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 288 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 289 // LAMBDA-DAG: [[EUB_TRUE]]: 290 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 291 // LAMBDA: br label %[[EUB_END:.+]] 292 // LAMBDA-DAG: [[EUB_FALSE]]: 293 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 294 // LAMBDA: br label %[[EUB_END]] 295 // LAMBDA-DAG: [[EUB_END]]: 296 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 297 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 298 299 // initialize omp.iv 300 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 301 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 302 // LAMBDA: br label %[[OMP_JUMP_BACK:.+]] 303 304 // check exit condition 305 // LAMBDA: [[OMP_JUMP_BACK]]: 306 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 307 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 308 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 309 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 310 311 // check that PrevLB and PrevUB are passed to the 'for' 312 // LAMBDA: [[DIST_BODY]]: 313 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 314 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to 315 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 316 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to 317 // check that distlb and distub are properly passed to fork_call 318 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 319 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 320 // LAMBDA: br label %[[DIST_INC:.+]] 321 322 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 323 // LAMBDA: [[DIST_INC]]: 324 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 325 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 326 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 327 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 328 // LAMBDA: br label %[[OMP_JUMP_BACK]] 329 330 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 331 // LAMBDA: ret 332 333 // implementation of 'parallel for' 334 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 335 336 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 337 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 338 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 339 340 // initialize lb and ub to PrevLB and PrevUB 341 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 342 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 343 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 344 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 345 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 346 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 347 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 348 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 349 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 350 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 351 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 352 353 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 354 // In this case we use EUB 355 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 356 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 357 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 358 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 359 // LAMBDA: [[PF_EUB_TRUE]]: 360 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 361 // LAMBDA: br label %[[PF_EUB_END:.+]] 362 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 363 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 364 // LAMBDA: br label %[[PF_EUB_END]] 365 // LAMBDA-DAG: [[PF_EUB_END]]: 366 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 367 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 368 369 // initialize omp.iv 370 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 371 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 372 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 373 374 // check exit condition 375 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 376 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 377 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 378 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 379 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 380 381 // check that PrevLB and PrevUB are passed to the 'for' 382 // LAMBDA: [[PF_BODY]]: 383 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 384 // LAMBDA: br label {{.+}} 385 386 // check stride 1 for 'for' in 'distribute parallel for' 387 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 388 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 389 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 390 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 391 392 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 393 // LAMBDA: ret 394 [&]() { 395 a[i] = b[i] + c[i]; 396 }(); 397 } 398 399 // dist_schedule: static chunk 400 #pragma omp target 401 #pragma omp teams 402 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_3]]( 403 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 404 405 #pragma omp distribute parallel for dist_schedule(static, ch) 406 for (int i = 0; i < n; ++i) { 407 a[i] = b[i] + c[i]; 408 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]]( 409 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 410 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 411 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 412 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 413 414 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 415 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 416 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 417 418 // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]: 419 // check EUB for distribute 420 // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 421 // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}, 422 // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 423 // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 424 // LAMBDA-DAG: [[EUB_TRUE]]: 425 // LAMBDA: [[NUM_IT_2:%.+]] = load{{.+}}, 426 // LAMBDA: br label %[[EUB_END:.+]] 427 // LAMBDA-DAG: [[EUB_FALSE]]: 428 // LAMBDA: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 429 // LAMBDA: br label %[[EUB_END]] 430 // LAMBDA-DAG: [[EUB_END]]: 431 // LAMBDA-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 432 // LAMBDA: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 433 434 // initialize omp.iv 435 // LAMBDA: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 436 // LAMBDA: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 437 438 // check exit condition 439 // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 440 // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 441 // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 442 // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 443 444 // LAMBDA: [[DIST_OUTER_LOOP_BODY]]: 445 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]] 446 447 // LAMBDA: [[DIST_INNER_LOOP_HEADER]]: 448 // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 449 // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 450 // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 451 // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 452 453 // check that PrevLB and PrevUB are passed to the 'for' 454 // LAMBDA: [[DIST_INNER_LOOP_BODY]]: 455 // LAMBDA-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 456 // LAMBDA-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 457 // LAMBDA-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 458 // LAMBDA-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 459 // check that distlb and distub are properly passed to fork_call 460 // LAMBDA-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 461 // LAMBDA-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 462 // LAMBDA: br label %[[DIST_INNER_LOOP_INC:.+]] 463 464 // check DistInc 465 // LAMBDA: [[DIST_INNER_LOOP_INC]]: 466 // LAMBDA-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 467 // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 468 // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 469 // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 470 // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]] 471 472 // LAMBDA: [[DIST_INNER_LOOP_END]]: 473 // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]] 474 475 // LAMBDA: [[DIST_OUTER_LOOP_INC]]: 476 // check NextLB and NextUB 477 // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 478 // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 479 // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 480 // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 481 // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 482 // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 483 // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 484 // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 485 // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]] 486 487 // outer loop exit 488 // LAMBDA: [[DIST_OUTER_LOOP_END]]: 489 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 490 // LAMBDA: ret 491 492 // skip implementation of 'parallel for': using default scheduling and was tested above 493 [&]() { 494 a[i] = b[i] + c[i]; 495 }(); 496 } 497 498 // schedule: static no chunk 499 #pragma omp target 500 #pragma omp teams 501 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_4]]( 502 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 503 504 #pragma omp distribute parallel for schedule(static) 505 for (int i = 0; i < n; ++i) { 506 a[i] = b[i] + c[i]; 507 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_4]]( 508 // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca 509 // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 510 // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 511 // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca 512 513 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 514 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 515 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 516 // LAMBDA: ret 517 518 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 519 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 520 521 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 522 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 523 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 524 525 // initialize lb and ub to PrevLB and PrevUB 526 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 527 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 528 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 529 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 530 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 531 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 532 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 533 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 534 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 535 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 536 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 537 538 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 539 // In this case we use EUB 540 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 541 // LAMBDA: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 542 // LAMBDA-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 543 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 544 // LAMBDA: [[PF_EUB_TRUE]]: 545 // LAMBDA: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 546 // LAMBDA: br label %[[PF_EUB_END:.+]] 547 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 548 // LAMBDA: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 549 // LAMBDA: br label %[[PF_EUB_END]] 550 // LAMBDA-DAG: [[PF_EUB_END]]: 551 // LAMBDA-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 552 // LAMBDA: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 553 554 // initialize omp.iv 555 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 556 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 557 // LAMBDA: br label %[[OMP_PF_JUMP_BACK:.+]] 558 559 // check exit condition 560 // LAMBDA: [[OMP_PF_JUMP_BACK]]: 561 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 562 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 563 // LAMBDA: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 564 // LAMBDA: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 565 566 // check that PrevLB and PrevUB are passed to the 'for' 567 // LAMBDA: [[PF_BODY]]: 568 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 569 // LAMBDA: br label {{.+}} 570 571 // check stride 1 for 'for' in 'distribute parallel for' 572 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 573 // LAMBDA: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 574 // LAMBDA: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 575 // LAMBDA: br label %[[OMP_PF_JUMP_BACK]] 576 577 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 578 // LAMBDA: ret 579 580 [&]() { 581 a[i] = b[i] + c[i]; 582 }(); 583 } 584 585 // schedule: static chunk 586 #pragma omp target 587 #pragma omp teams 588 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_5]]( 589 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 590 591 #pragma omp distribute parallel for schedule(static, ch) 592 for (int i = 0; i < n; ++i) { 593 a[i] = b[i] + c[i]; 594 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_5]]( 595 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 596 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 597 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 598 // LAMBDA: ret 599 600 // 'parallel for' implementation using outer and inner loops and PrevEUB 601 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 602 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 603 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 604 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 605 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 606 607 // initialize lb and ub to PrevLB and PrevUB 608 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 609 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 610 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 611 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 612 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 613 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 614 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 615 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 616 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 617 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 618 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 619 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 620 621 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 622 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 623 // LAMBDA-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 624 // LAMBDA-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 625 // LAMBDA: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 626 // LAMBDA-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 627 // LAMBDA-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 628 // LAMBDA: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 629 // LAMBDA: [[PF_EUB_TRUE]]: 630 // LAMBDA: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 631 // LAMBDA: br label %[[PF_EUB_END:.+]] 632 // LAMBDA-DAG: [[PF_EUB_FALSE]]: 633 // LAMBDA: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 634 // LAMBDA-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 635 // LAMBDA: br label %[[PF_EUB_END]] 636 // LAMBDA-DAG: [[PF_EUB_END]]: 637 // LAMBDA-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 638 // LAMBDA-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 639 // LAMBDA-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 640 // LAMBDA-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 641 // LAMBDA-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 642 643 // initialize omp.iv (IV = LB) 644 // LAMBDA: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 645 // LAMBDA: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 646 647 // outer loop: while (IV < UB) { 648 // LAMBDA-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 649 // LAMBDA-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 650 // LAMBDA: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 651 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 652 653 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 654 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 655 656 // LAMBDA: [[OMP_PF_INNER_FOR_HEADER]]: 657 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 658 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 659 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 660 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 661 662 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 663 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 664 // skip body branch 665 // LAMBDA: br{{.+}} 666 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 667 668 // IV = IV + 1 and inner loop latch 669 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 670 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 671 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 672 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 673 // LAMBDA: br label %[[OMP_PF_INNER_FOR_HEADER]] 674 675 // check NextLB and NextUB 676 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 677 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 678 679 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 680 // LAMBDA-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 681 // LAMBDA-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 682 // LAMBDA-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 683 // LAMBDA: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 684 // LAMBDA-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 685 // LAMBDA-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 686 // LAMBDA-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 687 // LAMBDA: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 688 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 689 690 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 691 // LAMBDA-DAG: call void @__kmpc_for_static_fini( 692 // LAMBDA: ret 693 [&]() { 694 a[i] = b[i] + c[i]; 695 }(); 696 } 697 698 // schedule: dynamic no chunk 699 #pragma omp target 700 #pragma omp teams 701 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_6]]( 702 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 703 704 #pragma omp distribute parallel for schedule(dynamic) 705 for (int i = 0; i < n; ++i) { 706 a[i] = b[i] + c[i]; 707 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_6]]( 708 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 709 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 710 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 711 // LAMBDA: ret 712 713 // 'parallel for' implementation using outer and inner loops and PrevEUB 714 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 715 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 716 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 717 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 718 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 719 720 // initialize lb and ub to PrevLB and PrevUB 721 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 722 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 723 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 724 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 725 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 726 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 727 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 728 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 729 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 730 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 731 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 732 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 733 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 734 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 735 736 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 737 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 738 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 739 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 740 741 // initialize omp.iv (IV = LB) 742 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 743 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 744 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 745 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 746 747 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 748 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 749 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 750 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 751 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 752 753 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 754 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 755 // skip body branch 756 // LAMBDA: br{{.+}} 757 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 758 759 // IV = IV + 1 and inner loop latch 760 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 761 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 762 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 763 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 764 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 765 766 // check NextLB and NextUB 767 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 768 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 769 770 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 771 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 772 773 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 774 // LAMBDA: ret 775 [&]() { 776 a[i] = b[i] + c[i]; 777 }(); 778 } 779 780 // schedule: dynamic chunk 781 #pragma omp target 782 #pragma omp teams 783 // LAMBDA: define{{.+}} void [[OFFLOADING_FUN_7]]( 784 // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 785 786 #pragma omp distribute parallel for schedule(dynamic, ch) 787 for (int i = 0; i < n; ++i) { 788 a[i] = b[i] + c[i]; 789 // LAMBDA: define{{.+}} void [[OMP_OUTLINED_7]]( 790 // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 791 // LAMBDA: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 792 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 793 // LAMBDA: ret 794 795 // 'parallel for' implementation using outer and inner loops and PrevEUB 796 // LAMBDA: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 797 // LAMBDA-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 798 // LAMBDA-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 799 // LAMBDA-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 800 // LAMBDA-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 801 802 // initialize lb and ub to PrevLB and PrevUB 803 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 804 // LAMBDA-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 805 // LAMBDA-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 806 // LAMBDA-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 807 // LAMBDA-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 808 // LAMBDA-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 809 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 810 // LAMBDA-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 811 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 812 // LAMBDA-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 813 // LAMBDA-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 814 // LAMBDA-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 815 // LAMBDA: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 816 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 817 818 // LAMBDA: [[OMP_PF_OUTER_LOOP_HEADER]]: 819 // LAMBDA: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 820 // LAMBDA: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 821 // LAMBDA: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 822 823 // initialize omp.iv (IV = LB) 824 // LAMBDA: [[OMP_PF_OUTER_LOOP_BODY]]: 825 // LAMBDA-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 826 // LAMBDA-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 827 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 828 829 // LAMBDA: [[OMP_PF_INNER_LOOP_HEADER]]: 830 // LAMBDA-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 831 // LAMBDA-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 832 // LAMBDA: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 833 // LAMBDA: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 834 835 // LAMBDA: [[OMP_PF_INNER_LOOP_BODY]]: 836 // LAMBDA-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 837 // skip body branch 838 // LAMBDA: br{{.+}} 839 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 840 841 // IV = IV + 1 and inner loop latch 842 // LAMBDA: [[OMP_PF_INNER_LOOP_INC]]: 843 // LAMBDA-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 844 // LAMBDA-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 845 // LAMBDA-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 846 // LAMBDA: br label %[[OMP_PF_INNER_LOOP_HEADER]] 847 848 // check NextLB and NextUB 849 // LAMBDA: [[OMP_PF_INNER_LOOP_END]]: 850 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 851 852 // LAMBDA: [[OMP_PF_OUTER_LOOP_INC]]: 853 // LAMBDA: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 854 855 // LAMBDA: [[OMP_PF_OUTER_LOOP_END]]: 856 // LAMBDA: ret 857 [&]() { 858 a[i] = b[i] + c[i]; 859 }(); 860 } 861 }(); 862 return 0; 863 #else 864 // CHECK-LABEL: @main 865 866 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 867 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 868 869 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 870 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 871 872 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 873 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 874 875 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 876 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 877 878 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 879 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 880 881 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 882 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 883 884 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 885 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 886 887 // CHECK: call{{.+}} [[TMAIN:@.+]]() 888 889 // no schedule clauses 890 #pragma omp target 891 #pragma omp teams 892 // CHECK: define internal void [[OFFLOADING_FUN_1]]( 893 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 894 895 #pragma omp distribute parallel for 896 for (int i = 0; i < n; ++i) { 897 a[i] = b[i] + c[i]; 898 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 899 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 900 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 901 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 902 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 903 904 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 905 906 // check EUB for distribute 907 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 908 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 909 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 910 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 911 // CHECK-DAG: [[EUB_TRUE]]: 912 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 913 // CHECK: br label %[[EUB_END:.+]] 914 // CHECK-DAG: [[EUB_FALSE]]: 915 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 916 // CHECK: br label %[[EUB_END]] 917 // CHECK-DAG: [[EUB_END]]: 918 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 919 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 920 921 // initialize omp.iv 922 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 923 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 924 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 925 926 // check exit condition 927 // CHECK: [[OMP_JUMP_BACK]]: 928 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 929 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 930 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 931 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 932 933 // check that PrevLB and PrevUB are passed to the 'for' 934 // CHECK: [[DIST_BODY]]: 935 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 936 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 937 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 938 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 939 // check that distlb and distub are properly passed to fork_call 940 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 941 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 942 // CHECK: br label %[[DIST_INC:.+]] 943 944 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 945 // CHECK: [[DIST_INC]]: 946 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 947 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 948 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 949 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 950 // CHECK: br label %[[OMP_JUMP_BACK]] 951 952 // CHECK-DAG: call void @__kmpc_for_static_fini( 953 // CHECK: ret 954 955 // implementation of 'parallel for' 956 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 957 958 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 959 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 960 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 961 962 // initialize lb and ub to PrevLB and PrevUB 963 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 964 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 965 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 966 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 967 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 968 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 969 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 970 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 971 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 972 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 973 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 974 975 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 976 // In this case we use EUB 977 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 978 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 979 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 980 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 981 // CHECK: [[PF_EUB_TRUE]]: 982 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 983 // CHECK: br label %[[PF_EUB_END:.+]] 984 // CHECK-DAG: [[PF_EUB_FALSE]]: 985 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 986 // CHECK: br label %[[PF_EUB_END]] 987 // CHECK-DAG: [[PF_EUB_END]]: 988 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 989 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 990 991 // initialize omp.iv 992 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 993 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 994 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 995 996 // check exit condition 997 // CHECK: [[OMP_PF_JUMP_BACK]]: 998 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 999 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1000 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1001 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1002 1003 // check that PrevLB and PrevUB are passed to the 'for' 1004 // CHECK: [[PF_BODY]]: 1005 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1006 // CHECK: br label {{.+}} 1007 1008 // check stride 1 for 'for' in 'distribute parallel for' 1009 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1010 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1011 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1012 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1013 1014 // CHECK-DAG: call void @__kmpc_for_static_fini( 1015 // CHECK: ret 1016 } 1017 1018 // dist_schedule: static no chunk 1019 #pragma omp target 1020 #pragma omp teams 1021 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1022 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1023 1024 #pragma omp distribute parallel for dist_schedule(static) 1025 for (int i = 0; i < n; ++i) { 1026 a[i] = b[i] + c[i]; 1027 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1028 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1029 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1030 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1031 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1032 1033 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1034 1035 // check EUB for distribute 1036 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1037 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1038 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1039 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1040 // CHECK-DAG: [[EUB_TRUE]]: 1041 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1042 // CHECK: br label %[[EUB_END:.+]] 1043 // CHECK-DAG: [[EUB_FALSE]]: 1044 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1045 // CHECK: br label %[[EUB_END]] 1046 // CHECK-DAG: [[EUB_END]]: 1047 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1048 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1049 1050 // initialize omp.iv 1051 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1052 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1053 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1054 1055 // check exit condition 1056 // CHECK: [[OMP_JUMP_BACK]]: 1057 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1058 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1059 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1060 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1061 1062 // check that PrevLB and PrevUB are passed to the 'for' 1063 // CHECK: [[DIST_BODY]]: 1064 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1065 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1066 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1067 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1068 // check that distlb and distub are properly passed to fork_call 1069 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1070 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1071 // CHECK: br label %[[DIST_INC:.+]] 1072 1073 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1074 // CHECK: [[DIST_INC]]: 1075 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1076 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1077 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1078 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1079 // CHECK: br label %[[OMP_JUMP_BACK]] 1080 1081 // CHECK-DAG: call void @__kmpc_for_static_fini( 1082 // CHECK: ret 1083 1084 // implementation of 'parallel for' 1085 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1086 1087 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1088 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1089 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1090 1091 // initialize lb and ub to PrevLB and PrevUB 1092 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1093 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1094 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1095 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1096 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1097 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1098 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1099 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1100 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1101 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1102 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1103 1104 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1105 // In this case we use EUB 1106 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1107 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1108 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1109 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1110 // CHECK: [[PF_EUB_TRUE]]: 1111 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1112 // CHECK: br label %[[PF_EUB_END:.+]] 1113 // CHECK-DAG: [[PF_EUB_FALSE]]: 1114 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1115 // CHECK: br label %[[PF_EUB_END]] 1116 // CHECK-DAG: [[PF_EUB_END]]: 1117 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1118 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1119 1120 // initialize omp.iv 1121 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1122 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1123 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1124 1125 // check exit condition 1126 // CHECK: [[OMP_PF_JUMP_BACK]]: 1127 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1128 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1129 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1130 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1131 1132 // check that PrevLB and PrevUB are passed to the 'for' 1133 // CHECK: [[PF_BODY]]: 1134 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1135 // CHECK: br label {{.+}} 1136 1137 // check stride 1 for 'for' in 'distribute parallel for' 1138 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1139 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1140 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1141 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1142 1143 // CHECK-DAG: call void @__kmpc_for_static_fini( 1144 // CHECK: ret 1145 } 1146 1147 // dist_schedule: static chunk 1148 #pragma omp target 1149 #pragma omp teams 1150 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1151 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1152 1153 #pragma omp distribute parallel for dist_schedule(static, ch) 1154 for (int i = 0; i < n; ++i) { 1155 a[i] = b[i] + c[i]; 1156 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1157 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1158 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1159 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1160 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1161 1162 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1163 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1164 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1165 1166 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1167 // check EUB for distribute 1168 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1169 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1170 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1171 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1172 // CHECK-DAG: [[EUB_TRUE]]: 1173 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1174 // CHECK: br label %[[EUB_END:.+]] 1175 // CHECK-DAG: [[EUB_FALSE]]: 1176 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1177 // CHECK: br label %[[EUB_END]] 1178 // CHECK-DAG: [[EUB_END]]: 1179 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1180 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1181 1182 // initialize omp.iv 1183 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1184 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1185 1186 // check exit condition 1187 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1188 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1189 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1190 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1191 1192 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1193 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1194 1195 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1196 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1197 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1198 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1199 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1200 1201 // check that PrevLB and PrevUB are passed to the 'for' 1202 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1203 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1204 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1205 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1206 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1207 // check that distlb and distub are properly passed to fork_call 1208 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1209 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1210 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1211 1212 // check DistInc 1213 // CHECK: [[DIST_INNER_LOOP_INC]]: 1214 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1215 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1216 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1217 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1218 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1219 1220 // CHECK: [[DIST_INNER_LOOP_END]]: 1221 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1222 1223 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1224 // check NextLB and NextUB 1225 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1226 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1227 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1228 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1229 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1230 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1231 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1232 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1233 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1234 1235 // outer loop exit 1236 // CHECK: [[DIST_OUTER_LOOP_END]]: 1237 // CHECK-DAG: call void @__kmpc_for_static_fini( 1238 // CHECK: ret 1239 1240 // skip implementation of 'parallel for': using default scheduling and was tested above 1241 } 1242 1243 // schedule: static no chunk 1244 #pragma omp target 1245 #pragma omp teams 1246 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1247 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1248 1249 #pragma omp distribute parallel for schedule(static) 1250 for (int i = 0; i < n; ++i) { 1251 a[i] = b[i] + c[i]; 1252 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1253 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1254 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1255 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1256 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1257 1258 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1259 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1260 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1261 // CHECK: ret 1262 1263 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1264 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1265 1266 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1267 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1268 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1269 1270 // initialize lb and ub to PrevLB and PrevUB 1271 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1272 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1273 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1274 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1275 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1276 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1277 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1278 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1279 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1280 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1281 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1282 1283 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1284 // In this case we use EUB 1285 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1286 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1287 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1288 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1289 // CHECK: [[PF_EUB_TRUE]]: 1290 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1291 // CHECK: br label %[[PF_EUB_END:.+]] 1292 // CHECK-DAG: [[PF_EUB_FALSE]]: 1293 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1294 // CHECK: br label %[[PF_EUB_END]] 1295 // CHECK-DAG: [[PF_EUB_END]]: 1296 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1297 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1298 1299 // initialize omp.iv 1300 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1301 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1302 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1303 1304 // check exit condition 1305 // CHECK: [[OMP_PF_JUMP_BACK]]: 1306 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1307 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1308 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1309 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1310 1311 // check that PrevLB and PrevUB are passed to the 'for' 1312 // CHECK: [[PF_BODY]]: 1313 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1314 // CHECK: br label {{.+}} 1315 1316 // check stride 1 for 'for' in 'distribute parallel for' 1317 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1318 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1319 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1320 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1321 1322 // CHECK-DAG: call void @__kmpc_for_static_fini( 1323 // CHECK: ret 1324 } 1325 1326 // schedule: static chunk 1327 #pragma omp target 1328 #pragma omp teams 1329 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 1330 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 1331 1332 #pragma omp distribute parallel for schedule(static, ch) 1333 for (int i = 0; i < n; ++i) { 1334 a[i] = b[i] + c[i]; 1335 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 1336 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1337 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 1338 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1339 // CHECK: ret 1340 1341 // 'parallel for' implementation using outer and inner loops and PrevEUB 1342 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1343 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1344 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1345 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1346 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1347 1348 // initialize lb and ub to PrevLB and PrevUB 1349 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1350 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1351 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1352 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1353 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1354 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1355 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1356 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1357 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1358 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1359 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1360 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1361 1362 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 1363 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1364 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1365 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 1366 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1367 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 1368 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 1369 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1370 // CHECK: [[PF_EUB_TRUE]]: 1371 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1372 // CHECK: br label %[[PF_EUB_END:.+]] 1373 // CHECK-DAG: [[PF_EUB_FALSE]]: 1374 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1375 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 1376 // CHECK: br label %[[PF_EUB_END]] 1377 // CHECK-DAG: [[PF_EUB_END]]: 1378 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 1379 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 1380 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 1381 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 1382 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 1383 1384 // initialize omp.iv (IV = LB) 1385 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1386 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1387 1388 // outer loop: while (IV < UB) { 1389 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1390 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1391 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1392 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1393 1394 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1395 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 1396 1397 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 1398 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1399 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1400 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1401 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1402 1403 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1404 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1405 // skip body branch 1406 // CHECK: br{{.+}} 1407 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1408 1409 // IV = IV + 1 and inner loop latch 1410 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1411 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1412 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1413 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1414 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 1415 1416 // check NextLB and NextUB 1417 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1418 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1419 1420 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1421 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1422 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1423 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 1424 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 1425 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1426 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 1427 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 1428 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 1429 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1430 1431 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1432 // CHECK-DAG: call void @__kmpc_for_static_fini( 1433 // CHECK: ret 1434 } 1435 1436 // schedule: dynamic no chunk 1437 #pragma omp target 1438 #pragma omp teams 1439 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 1440 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 1441 1442 #pragma omp distribute parallel for schedule(dynamic) 1443 for (int i = 0; i < n; ++i) { 1444 a[i] = b[i] + c[i]; 1445 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 1446 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1447 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 1448 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1449 // CHECK: ret 1450 1451 // 'parallel for' implementation using outer and inner loops and PrevEUB 1452 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1453 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1454 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1455 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1456 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1457 1458 // initialize lb and ub to PrevLB and PrevUB 1459 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1460 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1461 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1462 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1463 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1464 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1465 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1466 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1467 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1468 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1469 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1470 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1471 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1472 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1473 1474 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1475 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1476 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1477 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1478 1479 // initialize omp.iv (IV = LB) 1480 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1481 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1482 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1483 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1484 1485 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1486 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1487 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1488 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1489 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1490 1491 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1492 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1493 // skip body branch 1494 // CHECK: br{{.+}} 1495 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1496 1497 // IV = IV + 1 and inner loop latch 1498 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1499 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1500 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1501 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1502 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1503 1504 // check NextLB and NextUB 1505 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1506 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1507 1508 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1509 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1510 1511 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1512 // CHECK: ret 1513 } 1514 1515 // schedule: dynamic chunk 1516 #pragma omp target 1517 #pragma omp teams 1518 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 1519 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 1520 1521 #pragma omp distribute parallel for schedule(dynamic, ch) 1522 for (int i = 0; i < n; ++i) { 1523 a[i] = b[i] + c[i]; 1524 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 1525 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1526 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 1527 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1528 // CHECK: ret 1529 1530 // 'parallel for' implementation using outer and inner loops and PrevEUB 1531 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1532 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1533 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1534 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1535 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 1536 1537 // initialize lb and ub to PrevLB and PrevUB 1538 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1539 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1540 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1541 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1542 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1543 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1544 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1545 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1546 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1547 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1548 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1549 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 1550 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 1551 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 1552 1553 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 1554 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 1555 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 1556 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 1557 1558 // initialize omp.iv (IV = LB) 1559 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 1560 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1561 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1562 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 1563 1564 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 1565 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1566 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 1567 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 1568 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 1569 1570 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 1571 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1572 // skip body branch 1573 // CHECK: br{{.+}} 1574 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 1575 1576 // IV = IV + 1 and inner loop latch 1577 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 1578 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 1579 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 1580 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 1581 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 1582 1583 // check NextLB and NextUB 1584 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 1585 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 1586 1587 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 1588 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 1589 1590 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 1591 // CHECK: ret 1592 } 1593 1594 return tmain<int>(); 1595 #endif 1596 } 1597 1598 // check code 1599 // CHECK: define{{.+}} [[TMAIN]]() 1600 1601 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1602 // CHECK: call void [[OFFLOADING_FUN_1:@.+]]( 1603 1604 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1605 // CHECK: call void [[OFFLOADING_FUN_2:@.+]]( 1606 1607 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1608 // CHECK: call void [[OFFLOADING_FUN_3:@.+]]( 1609 1610 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1611 // CHECK: call void [[OFFLOADING_FUN_4:@.+]]( 1612 1613 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1614 // CHECK: call void [[OFFLOADING_FUN_5:@.+]]( 1615 1616 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1617 // CHECK: call void [[OFFLOADING_FUN_6:@.+]]( 1618 1619 // CHECK: call i{{[0-9]+}} @__tgt_target_teams( 1620 // CHECK: call void [[OFFLOADING_FUN_7:@.+]]( 1621 1622 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]]( 1623 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_1:@.+]] to {{.+}}) 1624 1625 // CHECK: define{{.+}} void [[OMP_OUTLINED_1]]( 1626 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1627 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1628 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1629 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1630 1631 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1632 1633 // check EUB for distribute 1634 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1635 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1636 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1637 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1638 // CHECK-DAG: [[EUB_TRUE]]: 1639 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1640 // CHECK: br label %[[EUB_END:.+]] 1641 // CHECK-DAG: [[EUB_FALSE]]: 1642 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1643 // CHECK: br label %[[EUB_END]] 1644 // CHECK-DAG: [[EUB_END]]: 1645 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1646 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1647 1648 // initialize omp.iv 1649 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1650 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1651 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1652 1653 // check exit condition 1654 // CHECK: [[OMP_JUMP_BACK]]: 1655 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1656 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1657 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1658 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1659 1660 // check that PrevLB and PrevUB are passed to the 'for' 1661 // CHECK: [[DIST_BODY]]: 1662 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1663 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1664 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1665 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1666 // check that distlb and distub are properly passed to fork_call 1667 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1668 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_1:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1669 // CHECK: br label %[[DIST_INC:.+]] 1670 1671 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1672 // CHECK: [[DIST_INC]]: 1673 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1674 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1675 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1676 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1677 // CHECK: br label %[[OMP_JUMP_BACK]] 1678 1679 // CHECK-DAG: call void @__kmpc_for_static_fini( 1680 // CHECK: ret 1681 1682 // implementation of 'parallel for' 1683 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_1]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1684 1685 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1686 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1687 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1688 1689 // initialize lb and ub to PrevLB and PrevUB 1690 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1691 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1692 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1693 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1694 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1695 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1696 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1697 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1698 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1699 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1700 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1701 1702 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1703 // In this case we use EUB 1704 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1705 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1706 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1707 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1708 // CHECK: [[PF_EUB_TRUE]]: 1709 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1710 // CHECK: br label %[[PF_EUB_END:.+]] 1711 // CHECK-DAG: [[PF_EUB_FALSE]]: 1712 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1713 // CHECK: br label %[[PF_EUB_END]] 1714 // CHECK-DAG: [[PF_EUB_END]]: 1715 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1716 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1717 1718 // initialize omp.iv 1719 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1720 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1721 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1722 1723 // check exit condition 1724 // CHECK: [[OMP_PF_JUMP_BACK]]: 1725 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1726 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1727 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1728 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1729 1730 // check that PrevLB and PrevUB are passed to the 'for' 1731 // CHECK: [[PF_BODY]]: 1732 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1733 // CHECK: br label {{.+}} 1734 1735 // check stride 1 for 'for' in 'distribute parallel for' 1736 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1737 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1738 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1739 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1740 1741 // CHECK-DAG: call void @__kmpc_for_static_fini( 1742 // CHECK: ret 1743 1744 // CHECK: define{{.+}} void [[OFFLOADING_FUN_2]]( 1745 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_2:@.+]] to {{.+}}) 1746 1747 // CHECK: define{{.+}} void [[OMP_OUTLINED_2]]( 1748 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1749 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1750 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1751 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1752 1753 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1754 1755 // check EUB for distribute 1756 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1757 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1758 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1759 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1760 // CHECK-DAG: [[EUB_TRUE]]: 1761 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1762 // CHECK: br label %[[EUB_END:.+]] 1763 // CHECK-DAG: [[EUB_FALSE]]: 1764 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1765 // CHECK: br label %[[EUB_END]] 1766 // CHECK-DAG: [[EUB_END]]: 1767 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1768 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1769 1770 // initialize omp.iv 1771 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1772 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1773 // CHECK: br label %[[OMP_JUMP_BACK:.+]] 1774 1775 // check exit condition 1776 // CHECK: [[OMP_JUMP_BACK]]: 1777 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1778 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1779 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1780 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_BODY:.+]], label %[[DIST_END:.+]] 1781 1782 // check that PrevLB and PrevUB are passed to the 'for' 1783 // CHECK: [[DIST_BODY]]: 1784 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1785 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1786 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1787 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1788 // check that distlb and distub are properly passed to fork_call 1789 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1790 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_2:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1791 // CHECK: br label %[[DIST_INC:.+]] 1792 1793 // increment by stride (distInc - 'parallel for' executes the whole chunk) and latch 1794 // CHECK: [[DIST_INC]]: 1795 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1796 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1797 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_2]], [[OMP_ST_VAL_1]] 1798 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1799 // CHECK: br label %[[OMP_JUMP_BACK]] 1800 1801 // CHECK-DAG: call void @__kmpc_for_static_fini( 1802 // CHECK: ret 1803 1804 // implementation of 'parallel for' 1805 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_2]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1806 1807 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1808 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1809 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1810 1811 // initialize lb and ub to PrevLB and PrevUB 1812 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1813 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1814 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1815 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1816 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1817 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1818 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1819 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1820 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1821 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1822 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1823 1824 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1825 // In this case we use EUB 1826 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1827 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1828 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1829 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1830 // CHECK: [[PF_EUB_TRUE]]: 1831 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1832 // CHECK: br label %[[PF_EUB_END:.+]] 1833 // CHECK-DAG: [[PF_EUB_FALSE]]: 1834 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 1835 // CHECK: br label %[[PF_EUB_END]] 1836 // CHECK-DAG: [[PF_EUB_END]]: 1837 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 1838 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 1839 1840 // initialize omp.iv 1841 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 1842 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 1843 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 1844 1845 // check exit condition 1846 // CHECK: [[OMP_PF_JUMP_BACK]]: 1847 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 1848 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 1849 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 1850 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 1851 1852 // check that PrevLB and PrevUB are passed to the 'for' 1853 // CHECK: [[PF_BODY]]: 1854 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 1855 // CHECK: br label {{.+}} 1856 1857 // check stride 1 for 'for' in 'distribute parallel for' 1858 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 1859 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 1860 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 1861 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 1862 1863 // CHECK-DAG: call void @__kmpc_for_static_fini( 1864 // CHECK: ret 1865 1866 // CHECK: define{{.+}} void [[OFFLOADING_FUN_3]]( 1867 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}}) 1868 1869 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]]( 1870 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1871 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1872 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1873 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1874 1875 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute' 1876 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91, 1877 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]] 1878 1879 // CHECK: [[DIST_OUTER_LOOP_HEADER]]: 1880 // check EUB for distribute 1881 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]], 1882 // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}, 1883 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]] 1884 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]] 1885 // CHECK-DAG: [[EUB_TRUE]]: 1886 // CHECK: [[NUM_IT_2:%.+]] = load{{.+}}, 1887 // CHECK: br label %[[EUB_END:.+]] 1888 // CHECK-DAG: [[EUB_FALSE]]: 1889 // CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]], 1890 // CHECK: br label %[[EUB_END]] 1891 // CHECK-DAG: [[EUB_END]]: 1892 // CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ [[NUM_IT_2]], %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ] 1893 // CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]], 1894 1895 // initialize omp.iv 1896 // CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]], 1897 // CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]], 1898 1899 // check exit condition 1900 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]], 1901 // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]], 1902 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]] 1903 // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]] 1904 1905 // CHECK: [[DIST_OUTER_LOOP_BODY]]: 1906 // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]] 1907 1908 // CHECK: [[DIST_INNER_LOOP_HEADER]]: 1909 // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]], 1910 // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]], 1911 // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]] 1912 // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]] 1913 1914 // check that PrevLB and PrevUB are passed to the 'for' 1915 // CHECK: [[DIST_INNER_LOOP_BODY]]: 1916 // CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]], 1917 // CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}} 1918 // CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]], 1919 // CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}} 1920 // check that distlb and distub are properly passed to fork_call 1921 // CHECK-64: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_EXT]], i{{[0-9]+}} [[OMP_PREV_UB_EXT]], {{.+}}) 1922 // CHECK-32: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_3:@.+]] to {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB]], i{{[0-9]+}} [[OMP_PREV_UB]], {{.+}}) 1923 // CHECK: br label %[[DIST_INNER_LOOP_INC:.+]] 1924 1925 // check DistInc 1926 // CHECK: [[DIST_INNER_LOOP_INC]]: 1927 // CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]], 1928 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]], 1929 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]] 1930 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]], 1931 // CHECK: br label %[[DIST_INNER_LOOP_HEADER]] 1932 1933 // CHECK: [[DIST_INNER_LOOP_END]]: 1934 // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]] 1935 1936 // CHECK: [[DIST_OUTER_LOOP_INC]]: 1937 // check NextLB and NextUB 1938 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]], 1939 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1940 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]] 1941 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]], 1942 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]], 1943 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]], 1944 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]] 1945 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]], 1946 // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]] 1947 1948 // outer loop exit 1949 // CHECK: [[DIST_OUTER_LOOP_END]]: 1950 // CHECK-DAG: call void @__kmpc_for_static_fini( 1951 // CHECK: ret 1952 1953 // skip implementation of 'parallel for': using default scheduling and was tested above 1954 1955 // CHECK: define{{.+}} void [[OFFLOADING_FUN_4]]( 1956 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_4:@.+]] to {{.+}}) 1957 1958 // CHECK: define{{.+}} void [[OMP_OUTLINED_4]]( 1959 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca 1960 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca 1961 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca 1962 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca 1963 1964 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 1965 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_4:@.+]] to {{.+}}, 1966 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 1967 // CHECK: ret 1968 1969 // 'parallel for' implementation is the same as the case without schedule clase (static no chunk is the default) 1970 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_4]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 1971 1972 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 1973 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 1974 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 1975 1976 // initialize lb and ub to PrevLB and PrevUB 1977 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 1978 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 1979 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 1980 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 1981 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 1982 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 1983 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 1984 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 1985 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 1986 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 1987 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 1988 1989 // PrevEUB is only used when 'for' has a chunked schedule, otherwise EUB is used 1990 // In this case we use EUB 1991 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 1992 // CHECK: [[PF_NUM_IT_1:%.+]] = load{{.+}}, 1993 // CHECK-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_NUM_IT_1]] 1994 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 1995 // CHECK: [[PF_EUB_TRUE]]: 1996 // CHECK: [[PF_NUM_IT_2:%.+]] = load{{.+}}, 1997 // CHECK: br label %[[PF_EUB_END:.+]] 1998 // CHECK-DAG: [[PF_EUB_FALSE]]: 1999 // CHECK: [[OMP_PF_UB_VAL2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2000 // CHECK: br label %[[PF_EUB_END]] 2001 // CHECK-DAG: [[PF_EUB_END]]: 2002 // CHECK-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_NUM_IT_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL2]], %[[PF_EUB_FALSE]] ] 2003 // CHECK: store{{.+}} [[PF_EUB_RES]],{{.+}} [[OMP_PF_UB]], 2004 2005 // initialize omp.iv 2006 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2007 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2008 // CHECK: br label %[[OMP_PF_JUMP_BACK:.+]] 2009 2010 // check exit condition 2011 // CHECK: [[OMP_PF_JUMP_BACK]]: 2012 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load {{.+}} [[OMP_PF_IV]], 2013 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load {{.+}} [[OMP_PF_UB]], 2014 // CHECK: [[PF_CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2015 // CHECK: br {{.+}} [[PF_CMP_IV_UB]], label %[[PF_BODY:.+]], label %[[PF_END:.+]] 2016 2017 // check that PrevLB and PrevUB are passed to the 'for' 2018 // CHECK: [[PF_BODY]]: 2019 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2020 // CHECK: br label {{.+}} 2021 2022 // check stride 1 for 'for' in 'distribute parallel for' 2023 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load {{.+}}, {{.+}}* [[OMP_PF_IV]], 2024 // CHECK: [[OMP_PF_IV_INC:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_2]], 1 2025 // CHECK: store{{.+}} [[OMP_PF_IV_INC]], {{.+}}* [[OMP_PF_IV]], 2026 // CHECK: br label %[[OMP_PF_JUMP_BACK]] 2027 2028 // CHECK-DAG: call void @__kmpc_for_static_fini( 2029 // CHECK: ret 2030 2031 // CHECK: define{{.+}} void [[OFFLOADING_FUN_5]]( 2032 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_5:@.+]] to {{.+}}) 2033 2034 // CHECK: define{{.+}} void [[OMP_OUTLINED_5]]( 2035 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2036 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_5:@.+]] to {{.+}}, 2037 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2038 // CHECK: ret 2039 2040 // 'parallel for' implementation using outer and inner loops and PrevEUB 2041 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_5]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2042 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2043 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2044 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2045 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2046 2047 // initialize lb and ub to PrevLB and PrevUB 2048 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2049 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2050 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2051 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2052 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2053 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2054 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2055 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2056 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2057 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2058 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]],{{.+}}) 2059 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2060 2061 // check PrevEUB (using PrevUB instead of NumIt as upper bound) 2062 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2063 // CHECK-DAG: [[OMP_PF_UB_VAL_1:%.+]] = load{{.+}} [[OMP_PF_UB]], 2064 // CHECK-64-DAG: [[OMP_PF_UB_VAL_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_1]] to 2065 // CHECK: [[PF_PREV_UB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2066 // CHECK-64-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_CONV]], [[PF_PREV_UB_VAL_1]] 2067 // CHECK-32-DAG: [[PF_CMP_UB_NUM_IT:%.+]] = icmp{{.+}} [[OMP_PF_UB_VAL_1]], [[PF_PREV_UB_VAL_1]] 2068 // CHECK: br i1 [[PF_CMP_UB_NUM_IT]], label %[[PF_EUB_TRUE:.+]], label %[[PF_EUB_FALSE:.+]] 2069 // CHECK: [[PF_EUB_TRUE]]: 2070 // CHECK: [[PF_PREV_UB_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2071 // CHECK: br label %[[PF_EUB_END:.+]] 2072 // CHECK-DAG: [[PF_EUB_FALSE]]: 2073 // CHECK: [[OMP_PF_UB_VAL_2:%.+]] = load{{.+}} [[OMP_PF_UB]], 2074 // CHECK-64: [[OMP_PF_UB_VAL_2_CONV:%.+]] = sext{{.+}} [[OMP_PF_UB_VAL_2]] to 2075 // CHECK: br label %[[PF_EUB_END]] 2076 // CHECK-DAG: [[PF_EUB_END]]: 2077 // CHECK-64-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2_CONV]], %[[PF_EUB_FALSE]] ] 2078 // CHECK-32-DAG: [[PF_EUB_RES:%.+]] = phi{{.+}} [ [[PF_PREV_UB_VAL_2]], %[[PF_EUB_TRUE]] ], [ [[OMP_PF_UB_VAL_2]], %[[PF_EUB_FALSE]] ] 2079 // CHECK-64-DAG: [[PF_EUB_RES_CONV:%.+]] = trunc{{.+}} [[PF_EUB_RES]] to 2080 // CHECK-64: store{{.+}} [[PF_EUB_RES_CONV]],{{.+}} [[OMP_PF_UB]], 2081 // CHECK-32: store{{.+}} [[PF_EUB_RES]], {{.+}} [[OMP_PF_UB]], 2082 2083 // initialize omp.iv (IV = LB) 2084 // CHECK: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2085 // CHECK: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2086 2087 // outer loop: while (IV < UB) { 2088 // CHECK-DAG: [[OMP_PF_IV_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2089 // CHECK-DAG: [[OMP_PF_UB_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2090 // CHECK: [[PF_CMP_IV_UB_1:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_1]], [[OMP_PF_UB_VAL_3]] 2091 // CHECK: br{{.+}} [[PF_CMP_IV_UB_1]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2092 2093 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2094 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER:.+]] 2095 2096 // CHECK: [[OMP_PF_INNER_FOR_HEADER]]: 2097 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2098 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2099 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2100 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2101 2102 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2103 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2104 // skip body branch 2105 // CHECK: br{{.+}} 2106 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2107 2108 // IV = IV + 1 and inner loop latch 2109 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2110 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2111 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2112 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2113 // CHECK: br label %[[OMP_PF_INNER_FOR_HEADER]] 2114 2115 // check NextLB and NextUB 2116 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2117 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2118 2119 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2120 // CHECK-DAG: [[OMP_PF_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2121 // CHECK-DAG: [[OMP_PF_ST_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2122 // CHECK-DAG: [[OMP_PF_LB_NEXT:%.+]] = add{{.+}} [[OMP_PF_LB_VAL_2]], [[OMP_PF_ST_VAL_1]] 2123 // CHECK: store{{.+}} [[OMP_PF_LB_NEXT]], {{.+}}* [[OMP_PF_LB]], 2124 // CHECK-DAG: [[OMP_PF_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2125 // CHECK-DAG: [[OMP_PF_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_ST]], 2126 // CHECK-DAG: [[OMP_PF_UB_NEXT:%.+]] = add{{.+}} [[OMP_PF_UB_VAL_5]], [[OMP_PF_ST_VAL_2]] 2127 // CHECK: store{{.+}} [[OMP_PF_UB_NEXT]], {{.+}}* [[OMP_PF_UB]], 2128 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2129 2130 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2131 // CHECK-DAG: call void @__kmpc_for_static_fini( 2132 // CHECK: ret 2133 2134 // CHECK: define{{.+}} void [[OFFLOADING_FUN_6]]( 2135 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED_6:@.+]] to {{.+}}) 2136 2137 // CHECK: define{{.+}} void [[OMP_OUTLINED_6]]( 2138 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2139 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_6:@.+]] to {{.+}}, 2140 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2141 // CHECK: ret 2142 2143 // 'parallel for' implementation using outer and inner loops and PrevEUB 2144 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_6]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2145 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2146 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2147 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2148 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2149 2150 // initialize lb and ub to PrevLB and PrevUB 2151 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2152 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2153 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2154 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2155 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2156 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2157 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2158 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2159 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2160 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2161 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2162 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2163 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2164 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2165 2166 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2167 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2168 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2169 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2170 2171 // initialize omp.iv (IV = LB) 2172 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2173 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2174 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2175 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2176 2177 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2178 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2179 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2180 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2181 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2182 2183 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2184 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2185 // skip body branch 2186 // CHECK: br{{.+}} 2187 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2188 2189 // IV = IV + 1 and inner loop latch 2190 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2191 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2192 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2193 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2194 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2195 2196 // check NextLB and NextUB 2197 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2198 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2199 2200 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2201 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2202 2203 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2204 // CHECK: ret 2205 2206 // CHECK: define{{.+}} void [[OFFLOADING_FUN_7]]( 2207 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_7:@.+]] to {{.+}}) 2208 2209 // CHECK: define{{.+}} void [[OMP_OUTLINED_7]]( 2210 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 92, 2211 // CHECK: call{{.+}} @__kmpc_fork_call({{.+}}, {{.+}}, {{.+}}[[OMP_PARFOR_OUTLINED_7:@.+]] to {{.+}}, 2212 // skip rest of implementation of 'distribute' as it is tested above for default dist_schedule case 2213 // CHECK: ret 2214 2215 // 'parallel for' implementation using outer and inner loops and PrevEUB 2216 // CHECK: define{{.+}} void [[OMP_PARFOR_OUTLINED_7]]({{.+}}, {{.+}}, i{{[0-9]+}} [[OMP_PREV_LB_IN:%.+]], i{{[0-9]+}} [[OMP_PREV_UB_IN:%.+]], {{.+}}, {{.+}}, {{.+}}, {{.+}}, {{.+}}) 2217 // CHECK-DAG: [[OMP_PF_LB:%.omp.lb]] = alloca{{.+}}, 2218 // CHECK-DAG: [[OMP_PF_UB:%.omp.ub]] = alloca{{.+}}, 2219 // CHECK-DAG: [[OMP_PF_IV:%.omp.iv]] = alloca{{.+}}, 2220 // CHECK-DAG: [[OMP_PF_ST:%.omp.stride]] = alloca{{.+}}, 2221 2222 // initialize lb and ub to PrevLB and PrevUB 2223 // CHECK-DAG: store{{.+}} [[OMP_PREV_LB_IN]], {{.+}}* [[PREV_LB_ADDR:%.+]], 2224 // CHECK-DAG: store{{.+}} [[OMP_PREV_UB_IN]], {{.+}}* [[PREV_UB_ADDR:%.+]], 2225 // CHECK-DAG: [[OMP_PREV_LB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_LB_ADDR]], 2226 // CHECK-64-DAG: [[OMP_PREV_LB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_LB_VAL]] to {{.+}} 2227 // CHECK-DAG: [[OMP_PREV_UB_VAL:%.+]] = load{{.+}}, {{.+}}* [[PREV_UB_ADDR]], 2228 // CHECK-64-DAG: [[OMP_PREV_UB_TRC:%.+]] = trunc{{.+}} [[OMP_PREV_UB_VAL]] to {{.+}} 2229 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_LB_TRC]], {{.+}}* [[OMP_PF_LB]], 2230 // CHECK-64-DAG: store{{.+}} [[OMP_PREV_UB_TRC]], {{.+}}* [[OMP_PF_UB]], 2231 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_LB_VAL]], {{.+}}* [[OMP_PF_LB]], 2232 // CHECK-32-DAG: store{{.+}} [[OMP_PREV_UB_VAL]], {{.+}}* [[OMP_PF_UB]], 2233 // CHECK-DAG: [[OMP_PF_LB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2234 // CHECK-DAG: [[OMP_PF_UB_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_UB]], 2235 // CHECK: call void @__kmpc_dispatch_init_4({{.+}}, {{.+}}, {{.+}} 35, {{.+}} [[OMP_PF_LB_VAL]], {{.+}} [[OMP_PF_UB_VAL]], {{.+}}, {{.+}}) 2236 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER:.+]] 2237 2238 // CHECK: [[OMP_PF_OUTER_LOOP_HEADER]]: 2239 // CHECK: [[IS_FIN:%.+]] = call{{.+}} @__kmpc_dispatch_next_4({{.+}}, {{.+}}, {{.+}}, {{.+}}* [[OMP_PF_LB]], {{.+}}* [[OMP_PF_UB]], {{.+}}* [[OMP_PF_ST]]) 2240 // CHECK: [[IS_FIN_CMP:%.+]] = icmp{{.+}} [[IS_FIN]], 0 2241 // CHECK: br{{.+}} [[IS_FIN_CMP]], label %[[OMP_PF_OUTER_LOOP_BODY:.+]], label %[[OMP_PF_OUTER_LOOP_END:.+]] 2242 2243 // initialize omp.iv (IV = LB) 2244 // CHECK: [[OMP_PF_OUTER_LOOP_BODY]]: 2245 // CHECK-DAG: [[OMP_PF_LB_VAL_1:%.+]] = load{{.+}}, {{.+}} [[OMP_PF_LB]], 2246 // CHECK-DAG: store {{.+}} [[OMP_PF_LB_VAL_1]], {{.+}}* [[OMP_PF_IV]], 2247 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER:.+]] 2248 2249 // CHECK: [[OMP_PF_INNER_LOOP_HEADER]]: 2250 // CHECK-DAG: [[OMP_PF_IV_VAL_2:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2251 // CHECK-DAG: [[OMP_PF_UB_VAL_4:%.+]] = load{{.+}}, {{.+}}* [[OMP_PF_UB]], 2252 // CHECK: [[PF_CMP_IV_UB_2:%.+]] = icmp{{.+}} [[OMP_PF_IV_VAL_2]], [[OMP_PF_UB_VAL_4]] 2253 // CHECK: br{{.+}} [[PF_CMP_IV_UB_2]], label %[[OMP_PF_INNER_LOOP_BODY:.+]], label %[[OMP_PF_INNER_LOOP_END:.+]] 2254 2255 // CHECK: [[OMP_PF_INNER_LOOP_BODY]]: 2256 // CHECK-DAG: {{.+}} = load{{.+}}, {{.+}}* [[OMP_PF_IV]], 2257 // skip body branch 2258 // CHECK: br{{.+}} 2259 // CHECK: br label %[[OMP_PF_INNER_LOOP_INC:.+]] 2260 2261 // IV = IV + 1 and inner loop latch 2262 // CHECK: [[OMP_PF_INNER_LOOP_INC]]: 2263 // CHECK-DAG: [[OMP_PF_IV_VAL_3:%.+]] = load{{.+}}, {{.+}}* [[OMP_IV]], 2264 // CHECK-DAG: [[OMP_PF_NEXT_IV:%.+]] = add{{.+}} [[OMP_PF_IV_VAL_3]], 1 2265 // CHECK-DAG: store{{.+}} [[OMP_PF_NEXT_IV]], {{.+}}* [[OMP_IV]], 2266 // CHECK: br label %[[OMP_PF_INNER_LOOP_HEADER]] 2267 2268 // check NextLB and NextUB 2269 // CHECK: [[OMP_PF_INNER_LOOP_END]]: 2270 // CHECK: br label %[[OMP_PF_OUTER_LOOP_INC:.+]] 2271 2272 // CHECK: [[OMP_PF_OUTER_LOOP_INC]]: 2273 // CHECK: br label %[[OMP_PF_OUTER_LOOP_HEADER]] 2274 2275 // CHECK: [[OMP_PF_OUTER_LOOP_END]]: 2276 // CHECK: ret 2277 #endif 2278