1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
3 // expected-no-diagnostics
4
5 #ifndef HEADER
6 #define HEADER
7
8 // CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for(
9 // CHECK-NEXT: [[ENTRY:.*]]:
10 // CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4
11 // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
12 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
13 // CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
14 // CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
15 // CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
16 // CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8
17 // CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4
18 // CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4
19 // CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4
20 // CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4
21 // CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4
22 // CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4
23 // CHECK-NEXT: store i32 %[[N:.+]], i32* %[[N_ADDR]], align 4
24 // CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
25 // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
26 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
27 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
28 // CHECK-NEXT: store i32 0, i32* %[[I]], align 4
29 // CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 0
30 // CHECK-NEXT: store i32* %[[I]], i32** %[[TMP0]], align 8
31 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[AGG_CAPTURED]], i32 0, i32 1
32 // CHECK-NEXT: store i32* %[[N_ADDR]], i32** %[[TMP1]], align 8
33 // CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[AGG_CAPTURED1]], i32 0, i32 0
34 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[I]], align 4
35 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[TMP2]], align 4
36 // CHECK-NEXT: call void @__captured_stmt(i32* %[[DOTCOUNT_ADDR]], %struct.anon* %[[AGG_CAPTURED]])
37 // CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, i32* %[[DOTCOUNT_ADDR]], align 4
38 // CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]]
39 // CHECK-EMPTY:
40 // CHECK-NEXT: [[OMP_LOOP_PREHEADER]]:
41 // CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 13
42 // CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 13
43 // CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0
44 // CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32
45 // CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]]
46 // CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
47 // CHECK-EMPTY:
48 // CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
49 // CHECK-NEXT: store i32 0, i32* %[[P_LOWERBOUND]], align 4
50 // CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1
51 // CHECK-NEXT: store i32 %[[TMP8]], i32* %[[P_UPPERBOUND]], align 4
52 // CHECK-NEXT: store i32 1, i32* %[[P_STRIDE]], align 4
53 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
54 // CHECK-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[P_LASTITER]], i32* %[[P_LOWERBOUND]], i32* %[[P_UPPERBOUND]], i32* %[[P_STRIDE]], i32 1, i32 0)
55 // CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[P_LOWERBOUND]], align 4
56 // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[P_UPPERBOUND]], align 4
57 // CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]]
58 // CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1
59 // CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
60 // CHECK-EMPTY:
61 // CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
62 // CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
63 // CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
64 // CHECK-EMPTY:
65 // CHECK-NEXT: [[OMP_FLOOR0_COND]]:
66 // CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]]
67 // CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
68 // CHECK-EMPTY:
69 // CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
70 // CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]]
71 // CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]]
72 // CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 13
73 // CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
74 // CHECK-EMPTY:
75 // CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
76 // CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
77 // CHECK-EMPTY:
78 // CHECK-NEXT: [[OMP_TILE0_HEADER]]:
79 // CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
80 // CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
81 // CHECK-EMPTY:
82 // CHECK-NEXT: [[OMP_TILE0_COND]]:
83 // CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]]
84 // CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
85 // CHECK-EMPTY:
86 // CHECK-NEXT: [[OMP_TILE0_BODY]]:
87 // CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 13, %[[TMP13]]
88 // CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]]
89 // CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]]
90 // CHECK-EMPTY:
91 // CHECK-NEXT: [[OMP_LOOP_BODY]]:
92 // CHECK-NEXT: call void @__captured_stmt.1(i32* %[[I]], i32 %[[TMP17]], %struct.anon.0* %[[AGG_CAPTURED1]])
93 // CHECK-NEXT: %[[TMP18:.+]] = load float*, float** %[[B_ADDR]], align 8
94 // CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[I]], align 4
95 // CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64
96 // CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP18]], i64 %[[IDXPROM]]
97 // CHECK-NEXT: %[[TMP20:.+]] = load float, float* %[[ARRAYIDX]], align 4
98 // CHECK-NEXT: %[[TMP21:.+]] = load float*, float** %[[C_ADDR]], align 8
99 // CHECK-NEXT: %[[TMP22:.+]] = load i32, i32* %[[I]], align 4
100 // CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64
101 // CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, float* %[[TMP21]], i64 %[[IDXPROM2]]
102 // CHECK-NEXT: %[[TMP23:.+]] = load float, float* %[[ARRAYIDX3]], align 4
103 // CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP20]], %[[TMP23]]
104 // CHECK-NEXT: %[[TMP24:.+]] = load float*, float** %[[D_ADDR]], align 8
105 // CHECK-NEXT: %[[TMP25:.+]] = load i32, i32* %[[I]], align 4
106 // CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP25]] to i64
107 // CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, float* %[[TMP24]], i64 %[[IDXPROM4]]
108 // CHECK-NEXT: %[[TMP26:.+]] = load float, float* %[[ARRAYIDX5]], align 4
109 // CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP26]]
110 // CHECK-NEXT: %[[TMP27:.+]] = load float*, float** %[[A_ADDR]], align 8
111 // CHECK-NEXT: %[[TMP28:.+]] = load i32, i32* %[[I]], align 4
112 // CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP28]] to i64
113 // CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, float* %[[TMP27]], i64 %[[IDXPROM7]]
114 // CHECK-NEXT: store float %[[MUL6]], float* %[[ARRAYIDX8]], align 4
115 // CHECK-NEXT: br label %[[OMP_TILE0_INC]]
116 // CHECK-EMPTY:
117 // CHECK-NEXT: [[OMP_TILE0_INC]]:
118 // CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1
119 // CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]]
120 // CHECK-EMPTY:
121 // CHECK-NEXT: [[OMP_TILE0_EXIT]]:
122 // CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
123 // CHECK-EMPTY:
124 // CHECK-NEXT: [[OMP_TILE0_AFTER]]:
125 // CHECK-NEXT: br label %[[OMP_FLOOR0_INC]]
126 // CHECK-EMPTY:
127 // CHECK-NEXT: [[OMP_FLOOR0_INC]]:
128 // CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1
129 // CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]]
130 // CHECK-EMPTY:
131 // CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
132 // CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]])
133 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
134 // CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]])
135 // CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
136 // CHECK-EMPTY:
137 // CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
138 // CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]]
139 // CHECK-EMPTY:
140 // CHECK-NEXT: [[OMP_LOOP_AFTER]]:
141 // CHECK-NEXT: ret void
142 // CHECK-NEXT: }
143
unroll_partial_heuristic_for(int n,float * a,float * b,float * c,float * d)144 void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) {
145 #pragma omp for
146 #pragma omp unroll partial(13)
147 for (int i = 0; i < n; i++) {
148 a[i] = b[i] * c[i] * d[i];
149 }
150 }
151
152 #endif // HEADER
153
154 // CHECK-LABEL: define {{.*}}@__captured_stmt(
155 // CHECK-NEXT: [[ENTRY:.*]]:
156 // CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca i32*, align 8
157 // CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon*, align 8
158 // CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4
159 // CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4
160 // CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4
161 // CHECK-NEXT: store i32* %[[DISTANCE:.+]], i32** %[[DISTANCE_ADDR]], align 8
162 // CHECK-NEXT: store %struct.anon* %[[__CONTEXT:.+]], %struct.anon** %[[__CONTEXT_ADDR]], align 8
163 // CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon*, %struct.anon** %[[__CONTEXT_ADDR]], align 8
164 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 0
165 // CHECK-NEXT: %[[TMP2:.+]] = load i32*, i32** %[[TMP1]], align 8
166 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
167 // CHECK-NEXT: store i32 %[[TMP3]], i32* %[[DOTSTART]], align 4
168 // CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, %struct.anon* %[[TMP0]], i32 0, i32 1
169 // CHECK-NEXT: %[[TMP5:.+]] = load i32*, i32** %[[TMP4]], align 8
170 // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[TMP5]], align 4
171 // CHECK-NEXT: store i32 %[[TMP6]], i32* %[[DOTSTOP]], align 4
172 // CHECK-NEXT: store i32 1, i32* %[[DOTSTEP]], align 4
173 // CHECK-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTSTART]], align 4
174 // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTSTOP]], align 4
175 // CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]]
176 // CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
177 // CHECK-EMPTY:
178 // CHECK-NEXT: [[COND_TRUE]]:
179 // CHECK-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTSTOP]], align 4
180 // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTSTART]], align 4
181 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]]
182 // CHECK-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTSTEP]], align 4
183 // CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1
184 // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]]
185 // CHECK-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTSTEP]], align 4
186 // CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]]
187 // CHECK-NEXT: br label %[[COND_END:.+]]
188 // CHECK-EMPTY:
189 // CHECK-NEXT: [[COND_FALSE]]:
190 // CHECK-NEXT: br label %[[COND_END]]
191 // CHECK-EMPTY:
192 // CHECK-NEXT: [[COND_END]]:
193 // CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ]
194 // CHECK-NEXT: %[[TMP13:.+]] = load i32*, i32** %[[DISTANCE_ADDR]], align 8
195 // CHECK-NEXT: store i32 %[[COND]], i32* %[[TMP13]], align 4
196 // CHECK-NEXT: ret void
197 // CHECK-NEXT: }
198
199
200 // CHECK-LABEL: define {{.*}}@__captured_stmt.1(
201 // CHECK-NEXT: [[ENTRY:.*]]:
202 // CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca i32*, align 8
203 // CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4
204 // CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca %struct.anon.0*, align 8
205 // CHECK-NEXT: store i32* %[[LOOPVAR:.+]], i32** %[[LOOPVAR_ADDR]], align 8
206 // CHECK-NEXT: store i32 %[[LOGICAL:.+]], i32* %[[LOGICAL_ADDR]], align 4
207 // CHECK-NEXT: store %struct.anon.0* %[[__CONTEXT:.+]], %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
208 // CHECK-NEXT: %[[TMP0:.+]] = load %struct.anon.0*, %struct.anon.0** %[[__CONTEXT_ADDR]], align 8
209 // CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* %[[TMP0]], i32 0, i32 0
210 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[TMP1]], align 4
211 // CHECK-NEXT: %[[TMP3:.+]] = load i32, i32* %[[LOGICAL_ADDR]], align 4
212 // CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]]
213 // CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]]
214 // CHECK-NEXT: %[[TMP4:.+]] = load i32*, i32** %[[LOOPVAR_ADDR]], align 8
215 // CHECK-NEXT: store i32 %[[ADD]], i32* %[[TMP4]], align 4
216 // CHECK-NEXT: ret void
217 // CHECK-NEXT: }
218
219
220 // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
221 // CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
222 // CHECK: ![[META2:[0-9]+]] =
223 // CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
224 // CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}
225 // CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 13}
226