1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
4 // expected-no-diagnostics
5 #ifndef HEADER
6 #define HEADER
7 
8 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
9 // CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
10 // CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
11 void static_not_chunked(float *a, float *b, float *c, float *d) {
12 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
13   #pragma omp for schedule(static) ordered
14 // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
15 //
16 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
17 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
18 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
19 
20 // Loop header
21 // CHECK: [[O_LOOP1_BODY]]
22 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
23 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
24 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
25 
26 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
27 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
28 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
29   for (int i = 32000000; i > 33; i += -7) {
30 // CHECK: [[LOOP1_BODY]]
31 // Start of body: calculate i from IV:
32 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]]
33 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 7
34 // CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 32000000, [[CALC_I_1]]
35 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]]
36 
37 // ... start of ordered region ...
38 // CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
39 // ... loop body ...
40 // End of body: store into a[i]:
41 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
42 // CHECK-NOT: !llvm.mem.parallel_loop_access
43 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
44 // ... end of ordered region ...
45     #pragma omp ordered
46     a[i] = b[i] * c[i] * d[i];
47 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
48 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
49 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
50 // CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
51 // CHECK-NEXT: br label %{{.+}}
52   }
53 // CHECK: [[LOOP1_END]]
54 // CHECK: [[O_LOOP1_END]]
55 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
56 // CHECK: ret void
57 }
58 
59 // CHECK-LABEL: define {{.*void}} @{{.*}}dynamic1{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
60 void dynamic1(float *a, float *b, float *c, float *d) {
61 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
62   #pragma omp for schedule(dynamic) ordered
63 // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
64 //
65 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
66 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
67 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
68 
69 // Loop header
70 // CHECK: [[O_LOOP1_BODY]]
71 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
72 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
73 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
74 
75 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
76 // CHECK-NEXT: [[CMP:%.+]] = icmp ule i64 [[IV]], [[UB]]
77 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
78   for (unsigned long long i = 131071; i < 2147483647; i += 127) {
79 // CHECK: [[LOOP1_BODY]]
80 // Start of body: calculate i from IV:
81 // CHECK: [[IV1_1:%.+]] = load i64, i64* [[OMP_IV]]
82 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 127
83 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 131071, [[CALC_I_1]]
84 // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
85 
86 // ... start of ordered region ...
87 // CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
88 // ... loop body ...
89 // End of body: store into a[i]:
90 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
91 // CHECK-NOT: !llvm.mem.parallel_loop_access
92 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
93 // ... end of ordered region ...
94     #pragma omp ordered threads
95     a[i] = b[i] * c[i] * d[i];
96 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
97 // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
98 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
99 
100 // ... end iteration for ordered loop ...
101 // CHECK-NEXT: call void @__kmpc_dispatch_fini_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
102 // CHECK-NEXT: br label %{{.+}}
103   }
104 // CHECK: [[LOOP1_END]]
105 // CHECK: [[O_LOOP1_END]]
106 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
107 // CHECK: ret void
108 }
109 
110 // CHECK-LABEL: define {{.*void}} @{{.*}}test_auto{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
111 void test_auto(float *a, float *b, float *c, float *d) {
112   unsigned int x = 0;
113   unsigned int y = 0;
114 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
115   #pragma omp for schedule(auto) collapse(2) ordered
116 // CHECK: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 70, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1)
117 //
118 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]])
119 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
120 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
121 
122 // Loop header
123 // CHECK: [[O_LOOP1_BODY]]
124 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]]
125 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]]
126 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]]
127 
128 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]]
129 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]]
130 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
131 // FIXME: When the iteration count of some nested loop is not a known constant,
132 // we should pre-calculate it, like we do for the total number of iterations!
133   for (char i = static_cast<char>(y); i <= '9'; ++i)
134     for (x = 11; x > 0; --x) {
135 // CHECK: [[LOOP1_BODY]]
136 // Start of body: indices are calculated from IV:
137 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
138 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
139 
140 // ... start of ordered region ...
141 // CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
142 // ... loop body ...
143 // End of body: store into a[i]:
144 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
145 // CHECK-NOT: !llvm.mem.parallel_loop_access
146 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
147 // ... end of ordered region ...
148     #pragma omp ordered
149     a[i] = b[i] * c[i] * d[i];
150 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
151 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
152 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]]
153 
154 // ... end iteration for ordered loop ...
155 // CHECK-NEXT: call void @__kmpc_dispatch_fini_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
156 // CHECK-NEXT: br label %{{.+}}
157   }
158 // CHECK: [[LOOP1_END]]
159 // CHECK: [[O_LOOP1_END]]
160 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
161 // CHECK: ret void
162 }
163 
164 // CHECK-LABEL: define {{.*void}} @{{.*}}runtime{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
165 void runtime(float *a, float *b, float *c, float *d) {
166   int x = 0;
167 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]])
168   #pragma omp for collapse(2) schedule(runtime) ordered
169 // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 69, i32 0, i32 199, i32 1, i32 1)
170 //
171 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]])
172 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0
173 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]]
174 
175 // Loop header
176 // CHECK: [[O_LOOP1_BODY]]
177 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]]
178 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]]
179 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]
180 
181 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]]
182 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]]
183 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
184   for (unsigned char i = '0' ; i <= '9'; ++i)
185     for (x = -10; x < 10; ++x) {
186 // CHECK: [[LOOP1_BODY]]
187 // Start of body: indices are calculated from IV:
188 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}}
189 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}}
190 
191 // ... start of ordered region ...
192 // CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
193 // ... loop body ...
194 // End of body: store into a[i]:
195 // CHECK: store float [[RESULT:%.+]], float* {{%.+}}
196 // CHECK-NOT: !llvm.mem.parallel_loop_access
197 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
198 // ... end of ordered region ...
199     #pragma omp ordered threads
200     a[i] = b[i] * c[i] * d[i];
201 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
202 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
203 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]]
204 
205 // ... end iteration for ordered loop ...
206 // CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
207 // CHECK-NEXT: br label %{{.+}}
208   }
209 // CHECK: [[LOOP1_END]]
210 // CHECK: [[O_LOOP1_END]]
211 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
212 // CHECK: ret void
213 }
214 
215 float f[10];
216 // CHECK-LABEL: foo_simd
217 void foo_simd(int low, int up) {
218   // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access !
219   // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access !
220 #pragma omp simd
221   for (int i = low; i < up; ++i) {
222     f[i] = 0.0;
223 #pragma omp ordered simd
224     f[i] = 1.0;
225   }
226   // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}
227   // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}
228 #pragma omp for simd ordered
229   for (int i = low; i < up; ++i) {
230     f[i] = 0.0;
231 #pragma omp ordered simd
232     f[i] = 1.0;
233   }
234 }
235 
236 // CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
237 // CHECK: store float 1.000000e+00, float* %{{.+}}, align
238 // CHECK-NEXT: ret void
239 
240 #endif // HEADER
241 
242