1 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s 2 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s 3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s 4 5 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s 6 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s 7 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s 8 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 9 // expected-no-diagnostics 10 #ifndef HEADER 11 #define HEADER 12 13 // CHECK-LABEL: @main 14 int main(int argc, char **argv) { 15 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[DEFLOC:@.+]]) 16 // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*)) 17 // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*)) 18 // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***, i64)* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i32* [[ARGC:%.+]], i8*** [[ARGV:%.+]], i64 [[COND:%.+]]) 19 // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 20 // CHECK: call void [[OMP_OUTLINED3]](i32* %{{.+}}, i32* %{{.+}}, i32* [[ARGC]], i8*** [[ARGV]], i64 [[COND]]) 21 // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 22 23 24 // CHECK: define internal void [[OMP_OUTLINED1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) 25 // CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]]) 26 // CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 27 // CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] 28 // CHECK: [[THEN]] 29 // CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 30 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) 31 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* 32 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 33 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 34 // CHECK: store i64 0, i64* [[DOWN]], 35 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 36 // CHECK: store i64 9, i64* [[UP]], 37 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 38 // CHECK: store i64 1, i64* [[ST]], 39 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 40 // CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null) 41 // CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 42 // CHECK-NEXT: call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 43 // CHECK-NEXT: br label {{%?}}[[EXIT]] 44 // CHECK: [[EXIT]] 45 46 47 // CHECK: define internal i32 [[TASK1]]( 48 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 49 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], 50 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 51 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], 52 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 53 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 54 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 55 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], 56 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], 57 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], 58 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], 59 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], 60 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], 61 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 62 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], 63 // CHECK: br label 64 // CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], 65 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 66 // CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], 67 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] 68 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} 69 // CHECK: load i32, i32* % 70 // CHECK: store i32 % 71 // CHECK: load i32, i32* % 72 // CHECK: add nsw i32 %{{.+}}, 1 73 // CHECK: store i32 %{{.+}}, i32* % 74 // CHECK: br label % 75 // CHECK: ret i32 0 76 77 #pragma omp parallel master taskloop priority(4) 78 for (int i = 0; i < 10; ++i) 79 ; 80 // CHECK: define internal void [[OMP_OUTLINED2]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) 81 // CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]]) 82 // CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 83 // CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] 84 // CHECK: [[THEN]] 85 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) 86 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* 87 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 88 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 89 // CHECK: store i64 0, i64* [[DOWN]], 90 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 91 // CHECK: store i64 9, i64* [[UP]], 92 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 93 // CHECK: store i64 1, i64* [[ST]], 94 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 95 // CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 4, i8* null) 96 // CHECK-NEXT: call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 97 // CHECK-NEXT: br label {{%?}}[[EXIT]] 98 // CHECK: [[EXIT]] 99 100 101 // CHECK: define internal i32 [[TASK2]]( 102 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 103 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], 104 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 105 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], 106 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 107 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 108 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 109 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], 110 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], 111 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], 112 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], 113 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], 114 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], 115 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 116 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], 117 // CHECK: br label 118 // CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], 119 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 120 // CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], 121 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] 122 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} 123 // CHECK: load i32, i32* % 124 // CHECK: store i32 % 125 // CHECK: load i32, i32* % 126 // CHECK: add nsw i32 %{{.+}}, 1 127 // CHECK: store i32 %{{.+}}, i32* % 128 // CHECK: br label % 129 // CHECK: ret i32 0 130 131 #pragma omp parallel master taskloop nogroup grainsize(4) 132 for (int i = 0; i < 10; ++i) 133 ; 134 // CHECK: define internal void [[OMP_OUTLINED3]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i8*** dereferenceable(8) %{{.+}}, i64 %{{.+}}) 135 // CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]]) 136 // CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 137 // CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] 138 // CHECK: [[THEN]] 139 // CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 140 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) 141 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* 142 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 143 // CHECK: [[COND_VAL:%.+]] = load i8, i8* %{{.+}}, 144 // CHECK: [[COND_BOOL:%.+]] = trunc i8 [[COND_VAL]] to i1 145 // CHECK: [[IF_INT:%.+]] = sext i1 [[COND_BOOL]] to i32 146 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 147 // CHECK: store i64 0, i64* [[DOWN]], 148 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 149 // CHECK: store i64 %{{.+}}, i64* [[UP]], 150 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 151 // CHECK: store i64 1, i64* [[ST]], 152 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 153 // CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i8* null) 154 // CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 155 // CHECK-NEXT: call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]]) 156 // CHECK-NEXT: br label {{%?}}[[EXIT]] 157 // CHECK: [[EXIT]] 158 159 // CHECK: define internal i32 [[TASK3]]( 160 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 161 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], 162 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 163 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], 164 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 165 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 166 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 167 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], 168 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], 169 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], 170 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], 171 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], 172 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], 173 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]], 174 // CHECK: br label 175 // CHECK: ret i32 0 176 177 int i; 178 #pragma omp parallel master taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4) 179 for (i = 0; i < argc; ++i) 180 for (int j = argc; j < argv[argc][argc]; ++j) 181 ; 182 } 183 184 // CHECK-LABEL: @_ZN1SC2Ei 185 struct S { 186 int a; 187 S(int c) { 188 // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, i32*)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), %struct.S* %{{.+}}, i32* %{{.+}}) 189 190 // CHECK: define internal void [[OMP_OUTLINED4]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, %struct.S* %{{.+}}, i32* dereferenceable(4) %{{.+}}) 191 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]], i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) 192 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* 193 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 194 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 195 // CHECK: store i64 0, i64* [[DOWN]], 196 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 197 // CHECK: store i64 %{{.+}}, i64* [[UP]], 198 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7 199 // CHECK: store i64 1, i64* [[ST]], 200 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 201 // CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i8* null) 202 #pragma omp parallel master taskloop shared(c) num_tasks(4) 203 for (a = 0; a < c; ++a) 204 ; 205 } 206 } s(1); 207 208 // CHECK: define internal i32 [[TASK4]]( 209 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 210 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], 211 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 212 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], 213 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 214 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], 215 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8 216 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], 217 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], 218 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], 219 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], 220 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], 221 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], 222 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 223 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], 224 // CHECK: br label 225 // CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], 226 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 227 // CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], 228 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] 229 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} 230 // CHECK: load i32, i32* % 231 // CHECK: store i32 % 232 // CHECK: load i32, i32* % 233 // CHECK: add nsw i32 %{{.+}}, 1 234 // CHECK: store i32 %{{.+}}, i32* % 235 // CHECK: br label % 236 // CHECK: ret i32 0 237 238 #endif 239