1 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
2 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
4 // expected-no-diagnostics
5 // REQUIRES: x86-registered-target
6 #ifndef HEADER
7 #define HEADER
8 
9 // CHECK-LABEL: @main
10 int main(int argc, char **argv) {
11 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
12 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
13 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
14 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
15 // CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
16 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
17 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
18 // CHECK: store i64 0, i64* [[DOWN]],
19 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
20 // CHECK: store i64 9, i64* [[UP]],
21 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
22 // CHECK: store i64 1, i64* [[ST]],
23 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
24 // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
25 #pragma omp taskloop simd
26   for (int i = 0; i < 10; ++i)
27     ;
28 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
29 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
30 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
31 // CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
32 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
33 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
34 // CHECK: store i64 0, i64* [[DOWN]],
35 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
36 // CHECK: store i64 9, i64* [[UP]],
37 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
38 // CHECK: store i64 1, i64* [[ST]],
39 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
40 // CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
41 // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
42 #pragma omp taskloop simd nogroup grainsize(argc) simdlen(4)
43   for (int i = 0; i < 10; ++i)
44     ;
45 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
46 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
47 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
48 // CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
49 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
50 // CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0
51 // CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32
52 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
53 // CHECK: store i64 0, i64* [[DOWN]],
54 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
55 // CHECK: store i64 %{{.+}}, i64* [[UP]],
56 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
57 // CHECK: store i64 1, i64* [[ST]],
58 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
59 // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
60   int i;
61 #pragma omp taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(4) safelen(32)
62   for (i = 0; i < argc; ++i)
63   for (int j = argc; j < argv[argc][argc]; ++j)
64     ;
65 }
66 
67 // CHECK: define internal i32 [[TASK1]](
68 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
69 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
70 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
71 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
72 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
73 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
74 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
75 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
76 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
77 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
78 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
79 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
80 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
81 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
82 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
83 // CHECK: br label
84 // CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1:!.+]]
85 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
86 // CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
87 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
88 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
89 // CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
90 // CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
91 // CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
92 // CHECK: add nsw i32 %{{.+}}, 1
93 // CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP1]]
94 // CHECK: br label %{{.*}}!llvm.loop [[LOOP1]]
95 // CHECK: ret i32 0
96 
97 // CHECK: define internal i32 [[TASK2]](
98 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
99 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
100 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
101 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
102 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
103 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
104 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
105 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
106 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
107 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
108 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
109 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
110 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
111 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
112 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
113 // CHECK: br label
114 // CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2:!.+]]
115 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
116 // CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
117 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
118 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
119 // CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
120 // CHECK: store i32 %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
121 // CHECK: load i32, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
122 // CHECK: add nsw i32 %{{.+}}, 1
123 // CHECK: store i32 %{{.+}}, i32* %{{.*}}!llvm.mem.parallel_loop_access [[LOOP2]]
124 // CHECK: br label %{{.*}}!llvm.loop [[LOOP2]]
125 // CHECK: ret i32 0
126 
127 // CHECK: define internal i32 [[TASK3]](
128 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
129 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
130 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
131 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
132 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
133 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
134 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
135 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
136 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
137 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
138 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
139 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
140 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
141 // CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
142 // CHECK: br label
143 // CHECK-NOT: !llvm.mem.parallel_loop_access
144 // CHECK: br label %{{.*}}!llvm.loop
145 // CHECK: ret i32 0
146 
147 // CHECK-LABEL: @_ZN1SC2Ei
148 struct S {
149   int a;
150   S(int c) {
151 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
152 // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
153 // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
154 // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
155 // CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3
156 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}}
157 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
158 // CHECK: store i64 0, i64* [[DOWN]],
159 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
160 // CHECK: store i64 %{{.+}}, i64* [[UP]],
161 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
162 // CHECK: store i64 1, i64* [[ST]],
163 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
164 // CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
165 // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
166 #pragma omp taskloop simd shared(c) num_tasks(a) simdlen(64) safelen(8)
167     for (a = 0; a < c; ++a)
168       ;
169   }
170 } s(1);
171 
172 // CHECK: define internal i32 [[TASK4]](
173 // CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4
174 // CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
175 // CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
176 // CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
177 // CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
178 // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
179 // CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
180 // CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
181 // CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
182 // CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
183 // CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
184 // CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
185 // CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
186 // CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
187 // CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
188 // CHECK: br label
189 // CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
190 // CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
191 // CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
192 // CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
193 // CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
194 // CHECK: load i32, i32* %
195 // CHECK-NOT: !llvm.mem.parallel_loop_access
196 // CHECK: store i32 %
197 // CHECK-NOT: !llvm.mem.parallel_loop_access
198 // CHECK: load i32, i32* %
199 // CHECK-NOT: !llvm.mem.parallel_loop_access
200 // CHECK: add nsw i32 %{{.+}}, 1
201 // CHECK: store i32 %{{.+}}, i32* %
202 // CHECK-NOT: !llvm.mem.parallel_loop_access
203 // CHECK: br label %{{.*}}!llvm.loop
204 // CHECK: ret i32 0
205 
206 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
207 // CHECK: !{!"llvm.loop.vectorize.width", i32 4}
208 // CHECK: !{!"llvm.loop.vectorize.width", i32 32}
209 // CHECK: !{!"llvm.loop.vectorize.width", i32 64}
210 
211 #endif
212