1; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
2
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
4
5;CHECK-LABEL: @sqrt_f32(
6;CHECK: llvm.sqrt.v4f32
7;CHECK: ret void
8define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
9entry:
10  %cmp6 = icmp sgt i32 %n, 0
11  br i1 %cmp6, label %for.body, label %for.end
12
13for.body:                                         ; preds = %entry, %for.body
14  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
15  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
16  %0 = load float, float* %arrayidx, align 4
17  %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
18  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
19  store float %call, float* %arrayidx2, align 4
20  %indvars.iv.next = add i64 %indvars.iv, 1
21  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
22  %exitcond = icmp eq i32 %lftr.wideiv, %n
23  br i1 %exitcond, label %for.end, label %for.body
24
25for.end:                                          ; preds = %for.body, %entry
26  ret void
27}
28
29declare float @llvm.sqrt.f32(float) nounwind readnone
30
31;CHECK-LABEL: @sqrt_f64(
32;CHECK: llvm.sqrt.v4f64
33;CHECK: ret void
34define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
35entry:
36  %cmp6 = icmp sgt i32 %n, 0
37  br i1 %cmp6, label %for.body, label %for.end
38
39for.body:                                         ; preds = %entry, %for.body
40  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
41  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
42  %0 = load double, double* %arrayidx, align 8
43  %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
44  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
45  store double %call, double* %arrayidx2, align 8
46  %indvars.iv.next = add i64 %indvars.iv, 1
47  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
48  %exitcond = icmp eq i32 %lftr.wideiv, %n
49  br i1 %exitcond, label %for.end, label %for.body
50
51for.end:                                          ; preds = %for.body, %entry
52  ret void
53}
54
55declare double @llvm.sqrt.f64(double) nounwind readnone
56
57;CHECK-LABEL: @sin_f32(
58;CHECK: llvm.sin.v4f32
59;CHECK: ret void
60define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
61entry:
62  %cmp6 = icmp sgt i32 %n, 0
63  br i1 %cmp6, label %for.body, label %for.end
64
65for.body:                                         ; preds = %entry, %for.body
66  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
67  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
68  %0 = load float, float* %arrayidx, align 4
69  %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
70  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
71  store float %call, float* %arrayidx2, align 4
72  %indvars.iv.next = add i64 %indvars.iv, 1
73  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
74  %exitcond = icmp eq i32 %lftr.wideiv, %n
75  br i1 %exitcond, label %for.end, label %for.body
76
77for.end:                                          ; preds = %for.body, %entry
78  ret void
79}
80
81declare float @llvm.sin.f32(float) nounwind readnone
82
83;CHECK-LABEL: @sin_f64(
84;CHECK: llvm.sin.v4f64
85;CHECK: ret void
86define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
87entry:
88  %cmp6 = icmp sgt i32 %n, 0
89  br i1 %cmp6, label %for.body, label %for.end
90
91for.body:                                         ; preds = %entry, %for.body
92  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
93  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
94  %0 = load double, double* %arrayidx, align 8
95  %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
96  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
97  store double %call, double* %arrayidx2, align 8
98  %indvars.iv.next = add i64 %indvars.iv, 1
99  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
100  %exitcond = icmp eq i32 %lftr.wideiv, %n
101  br i1 %exitcond, label %for.end, label %for.body
102
103for.end:                                          ; preds = %for.body, %entry
104  ret void
105}
106
107declare double @llvm.sin.f64(double) nounwind readnone
108
109;CHECK-LABEL: @cos_f32(
110;CHECK: llvm.cos.v4f32
111;CHECK: ret void
112define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
113entry:
114  %cmp6 = icmp sgt i32 %n, 0
115  br i1 %cmp6, label %for.body, label %for.end
116
117for.body:                                         ; preds = %entry, %for.body
118  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
119  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
120  %0 = load float, float* %arrayidx, align 4
121  %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
122  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
123  store float %call, float* %arrayidx2, align 4
124  %indvars.iv.next = add i64 %indvars.iv, 1
125  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
126  %exitcond = icmp eq i32 %lftr.wideiv, %n
127  br i1 %exitcond, label %for.end, label %for.body
128
129for.end:                                          ; preds = %for.body, %entry
130  ret void
131}
132
133declare float @llvm.cos.f32(float) nounwind readnone
134
135;CHECK-LABEL: @cos_f64(
136;CHECK: llvm.cos.v4f64
137;CHECK: ret void
138define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
139entry:
140  %cmp6 = icmp sgt i32 %n, 0
141  br i1 %cmp6, label %for.body, label %for.end
142
143for.body:                                         ; preds = %entry, %for.body
144  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
145  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
146  %0 = load double, double* %arrayidx, align 8
147  %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
148  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
149  store double %call, double* %arrayidx2, align 8
150  %indvars.iv.next = add i64 %indvars.iv, 1
151  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
152  %exitcond = icmp eq i32 %lftr.wideiv, %n
153  br i1 %exitcond, label %for.end, label %for.body
154
155for.end:                                          ; preds = %for.body, %entry
156  ret void
157}
158
159declare double @llvm.cos.f64(double) nounwind readnone
160
161;CHECK-LABEL: @exp_f32(
162;CHECK: llvm.exp.v4f32
163;CHECK: ret void
164define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
165entry:
166  %cmp6 = icmp sgt i32 %n, 0
167  br i1 %cmp6, label %for.body, label %for.end
168
169for.body:                                         ; preds = %entry, %for.body
170  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
171  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
172  %0 = load float, float* %arrayidx, align 4
173  %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
174  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
175  store float %call, float* %arrayidx2, align 4
176  %indvars.iv.next = add i64 %indvars.iv, 1
177  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
178  %exitcond = icmp eq i32 %lftr.wideiv, %n
179  br i1 %exitcond, label %for.end, label %for.body
180
181for.end:                                          ; preds = %for.body, %entry
182  ret void
183}
184
185declare float @llvm.exp.f32(float) nounwind readnone
186
187;CHECK-LABEL: @exp_f64(
188;CHECK: llvm.exp.v4f64
189;CHECK: ret void
190define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
191entry:
192  %cmp6 = icmp sgt i32 %n, 0
193  br i1 %cmp6, label %for.body, label %for.end
194
195for.body:                                         ; preds = %entry, %for.body
196  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
197  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
198  %0 = load double, double* %arrayidx, align 8
199  %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
200  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
201  store double %call, double* %arrayidx2, align 8
202  %indvars.iv.next = add i64 %indvars.iv, 1
203  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
204  %exitcond = icmp eq i32 %lftr.wideiv, %n
205  br i1 %exitcond, label %for.end, label %for.body
206
207for.end:                                          ; preds = %for.body, %entry
208  ret void
209}
210
211declare double @llvm.exp.f64(double) nounwind readnone
212
213;CHECK-LABEL: @exp2_f32(
214;CHECK: llvm.exp2.v4f32
215;CHECK: ret void
216define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
217entry:
218  %cmp6 = icmp sgt i32 %n, 0
219  br i1 %cmp6, label %for.body, label %for.end
220
221for.body:                                         ; preds = %entry, %for.body
222  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
223  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
224  %0 = load float, float* %arrayidx, align 4
225  %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
226  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
227  store float %call, float* %arrayidx2, align 4
228  %indvars.iv.next = add i64 %indvars.iv, 1
229  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
230  %exitcond = icmp eq i32 %lftr.wideiv, %n
231  br i1 %exitcond, label %for.end, label %for.body
232
233for.end:                                          ; preds = %for.body, %entry
234  ret void
235}
236
237declare float @llvm.exp2.f32(float) nounwind readnone
238
239;CHECK-LABEL: @exp2_f64(
240;CHECK: llvm.exp2.v4f64
241;CHECK: ret void
242define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
243entry:
244  %cmp6 = icmp sgt i32 %n, 0
245  br i1 %cmp6, label %for.body, label %for.end
246
247for.body:                                         ; preds = %entry, %for.body
248  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
249  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
250  %0 = load double, double* %arrayidx, align 8
251  %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
252  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
253  store double %call, double* %arrayidx2, align 8
254  %indvars.iv.next = add i64 %indvars.iv, 1
255  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
256  %exitcond = icmp eq i32 %lftr.wideiv, %n
257  br i1 %exitcond, label %for.end, label %for.body
258
259for.end:                                          ; preds = %for.body, %entry
260  ret void
261}
262
263declare double @llvm.exp2.f64(double) nounwind readnone
264
265;CHECK-LABEL: @log_f32(
266;CHECK: llvm.log.v4f32
267;CHECK: ret void
268define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
269entry:
270  %cmp6 = icmp sgt i32 %n, 0
271  br i1 %cmp6, label %for.body, label %for.end
272
273for.body:                                         ; preds = %entry, %for.body
274  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
275  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
276  %0 = load float, float* %arrayidx, align 4
277  %call = tail call float @llvm.log.f32(float %0) nounwind readnone
278  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
279  store float %call, float* %arrayidx2, align 4
280  %indvars.iv.next = add i64 %indvars.iv, 1
281  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
282  %exitcond = icmp eq i32 %lftr.wideiv, %n
283  br i1 %exitcond, label %for.end, label %for.body
284
285for.end:                                          ; preds = %for.body, %entry
286  ret void
287}
288
289declare float @llvm.log.f32(float) nounwind readnone
290
291;CHECK-LABEL: @log_f64(
292;CHECK: llvm.log.v4f64
293;CHECK: ret void
294define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
295entry:
296  %cmp6 = icmp sgt i32 %n, 0
297  br i1 %cmp6, label %for.body, label %for.end
298
299for.body:                                         ; preds = %entry, %for.body
300  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
301  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
302  %0 = load double, double* %arrayidx, align 8
303  %call = tail call double @llvm.log.f64(double %0) nounwind readnone
304  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
305  store double %call, double* %arrayidx2, align 8
306  %indvars.iv.next = add i64 %indvars.iv, 1
307  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
308  %exitcond = icmp eq i32 %lftr.wideiv, %n
309  br i1 %exitcond, label %for.end, label %for.body
310
311for.end:                                          ; preds = %for.body, %entry
312  ret void
313}
314
315declare double @llvm.log.f64(double) nounwind readnone
316
317;CHECK-LABEL: @log10_f32(
318;CHECK: llvm.log10.v4f32
319;CHECK: ret void
320define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
321entry:
322  %cmp6 = icmp sgt i32 %n, 0
323  br i1 %cmp6, label %for.body, label %for.end
324
325for.body:                                         ; preds = %entry, %for.body
326  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
327  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
328  %0 = load float, float* %arrayidx, align 4
329  %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
330  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
331  store float %call, float* %arrayidx2, align 4
332  %indvars.iv.next = add i64 %indvars.iv, 1
333  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
334  %exitcond = icmp eq i32 %lftr.wideiv, %n
335  br i1 %exitcond, label %for.end, label %for.body
336
337for.end:                                          ; preds = %for.body, %entry
338  ret void
339}
340
341declare float @llvm.log10.f32(float) nounwind readnone
342
343;CHECK-LABEL: @log10_f64(
344;CHECK: llvm.log10.v4f64
345;CHECK: ret void
346define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
347entry:
348  %cmp6 = icmp sgt i32 %n, 0
349  br i1 %cmp6, label %for.body, label %for.end
350
351for.body:                                         ; preds = %entry, %for.body
352  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
353  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
354  %0 = load double, double* %arrayidx, align 8
355  %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
356  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
357  store double %call, double* %arrayidx2, align 8
358  %indvars.iv.next = add i64 %indvars.iv, 1
359  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
360  %exitcond = icmp eq i32 %lftr.wideiv, %n
361  br i1 %exitcond, label %for.end, label %for.body
362
363for.end:                                          ; preds = %for.body, %entry
364  ret void
365}
366
367declare double @llvm.log10.f64(double) nounwind readnone
368
369;CHECK-LABEL: @log2_f32(
370;CHECK: llvm.log2.v4f32
371;CHECK: ret void
372define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
373entry:
374  %cmp6 = icmp sgt i32 %n, 0
375  br i1 %cmp6, label %for.body, label %for.end
376
377for.body:                                         ; preds = %entry, %for.body
378  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
379  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
380  %0 = load float, float* %arrayidx, align 4
381  %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
382  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
383  store float %call, float* %arrayidx2, align 4
384  %indvars.iv.next = add i64 %indvars.iv, 1
385  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
386  %exitcond = icmp eq i32 %lftr.wideiv, %n
387  br i1 %exitcond, label %for.end, label %for.body
388
389for.end:                                          ; preds = %for.body, %entry
390  ret void
391}
392
393declare float @llvm.log2.f32(float) nounwind readnone
394
395;CHECK-LABEL: @log2_f64(
396;CHECK: llvm.log2.v4f64
397;CHECK: ret void
398define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
399entry:
400  %cmp6 = icmp sgt i32 %n, 0
401  br i1 %cmp6, label %for.body, label %for.end
402
403for.body:                                         ; preds = %entry, %for.body
404  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
405  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
406  %0 = load double, double* %arrayidx, align 8
407  %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
408  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
409  store double %call, double* %arrayidx2, align 8
410  %indvars.iv.next = add i64 %indvars.iv, 1
411  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
412  %exitcond = icmp eq i32 %lftr.wideiv, %n
413  br i1 %exitcond, label %for.end, label %for.body
414
415for.end:                                          ; preds = %for.body, %entry
416  ret void
417}
418
419declare double @llvm.log2.f64(double) nounwind readnone
420
421;CHECK-LABEL: @fabs_f32(
422;CHECK: llvm.fabs.v4f32
423;CHECK: ret void
424define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
425entry:
426  %cmp6 = icmp sgt i32 %n, 0
427  br i1 %cmp6, label %for.body, label %for.end
428
429for.body:                                         ; preds = %entry, %for.body
430  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
431  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
432  %0 = load float, float* %arrayidx, align 4
433  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
434  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
435  store float %call, float* %arrayidx2, align 4
436  %indvars.iv.next = add i64 %indvars.iv, 1
437  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
438  %exitcond = icmp eq i32 %lftr.wideiv, %n
439  br i1 %exitcond, label %for.end, label %for.body
440
441for.end:                                          ; preds = %for.body, %entry
442  ret void
443}
444
445declare float @llvm.fabs.f32(float) nounwind readnone
446
447define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
448entry:
449  %cmp6 = icmp sgt i32 %n, 0
450  br i1 %cmp6, label %for.body, label %for.end
451
452for.body:                                         ; preds = %entry, %for.body
453  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
454  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
455  %0 = load double, double* %arrayidx, align 8
456  %call = tail call double @llvm.fabs(double %0) nounwind readnone
457  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
458  store double %call, double* %arrayidx2, align 8
459  %indvars.iv.next = add i64 %indvars.iv, 1
460  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
461  %exitcond = icmp eq i32 %lftr.wideiv, %n
462  br i1 %exitcond, label %for.end, label %for.body
463
464for.end:                                          ; preds = %for.body, %entry
465  ret void
466}
467
468declare double @llvm.fabs(double) nounwind readnone
469
470;CHECK-LABEL: @copysign_f32(
471;CHECK: llvm.copysign.v4f32
472;CHECK: ret void
473define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
474entry:
475  %cmp6 = icmp sgt i32 %n, 0
476  br i1 %cmp6, label %for.body, label %for.end
477
478for.body:                                         ; preds = %entry, %for.body
479  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
480  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
481  %0 = load float, float* %arrayidx, align 4
482  %arrayidx1 = getelementptr inbounds float, float* %z, i64 %indvars.iv
483  %1 = load float, float* %arrayidx1, align 4
484  %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
485  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
486  store float %call, float* %arrayidx2, align 4
487  %indvars.iv.next = add i64 %indvars.iv, 1
488  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
489  %exitcond = icmp eq i32 %lftr.wideiv, %n
490  br i1 %exitcond, label %for.end, label %for.body
491
492for.end:                                          ; preds = %for.body, %entry
493  ret void
494}
495
496declare float @llvm.copysign.f32(float, float) nounwind readnone
497
498define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
499entry:
500  %cmp6 = icmp sgt i32 %n, 0
501  br i1 %cmp6, label %for.body, label %for.end
502
503for.body:                                         ; preds = %entry, %for.body
504  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
505  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
506  %0 = load double, double* %arrayidx, align 8
507  %arrayidx1 = getelementptr inbounds double, double* %z, i64 %indvars.iv
508  %1 = load double, double* %arrayidx, align 8
509  %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
510  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
511  store double %call, double* %arrayidx2, align 8
512  %indvars.iv.next = add i64 %indvars.iv, 1
513  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
514  %exitcond = icmp eq i32 %lftr.wideiv, %n
515  br i1 %exitcond, label %for.end, label %for.body
516
517for.end:                                          ; preds = %for.body, %entry
518  ret void
519}
520
521declare double @llvm.copysign(double, double) nounwind readnone
522
523;CHECK-LABEL: @floor_f32(
524;CHECK: llvm.floor.v4f32
525;CHECK: ret void
526define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
527entry:
528  %cmp6 = icmp sgt i32 %n, 0
529  br i1 %cmp6, label %for.body, label %for.end
530
531for.body:                                         ; preds = %entry, %for.body
532  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
533  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
534  %0 = load float, float* %arrayidx, align 4
535  %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
536  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
537  store float %call, float* %arrayidx2, align 4
538  %indvars.iv.next = add i64 %indvars.iv, 1
539  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
540  %exitcond = icmp eq i32 %lftr.wideiv, %n
541  br i1 %exitcond, label %for.end, label %for.body
542
543for.end:                                          ; preds = %for.body, %entry
544  ret void
545}
546
547declare float @llvm.floor.f32(float) nounwind readnone
548
549;CHECK-LABEL: @floor_f64(
550;CHECK: llvm.floor.v4f64
551;CHECK: ret void
552define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
553entry:
554  %cmp6 = icmp sgt i32 %n, 0
555  br i1 %cmp6, label %for.body, label %for.end
556
557for.body:                                         ; preds = %entry, %for.body
558  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
559  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
560  %0 = load double, double* %arrayidx, align 8
561  %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
562  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
563  store double %call, double* %arrayidx2, align 8
564  %indvars.iv.next = add i64 %indvars.iv, 1
565  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
566  %exitcond = icmp eq i32 %lftr.wideiv, %n
567  br i1 %exitcond, label %for.end, label %for.body
568
569for.end:                                          ; preds = %for.body, %entry
570  ret void
571}
572
573declare double @llvm.floor.f64(double) nounwind readnone
574
575;CHECK-LABEL: @ceil_f32(
576;CHECK: llvm.ceil.v4f32
577;CHECK: ret void
578define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
579entry:
580  %cmp6 = icmp sgt i32 %n, 0
581  br i1 %cmp6, label %for.body, label %for.end
582
583for.body:                                         ; preds = %entry, %for.body
584  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
585  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
586  %0 = load float, float* %arrayidx, align 4
587  %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
588  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
589  store float %call, float* %arrayidx2, align 4
590  %indvars.iv.next = add i64 %indvars.iv, 1
591  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
592  %exitcond = icmp eq i32 %lftr.wideiv, %n
593  br i1 %exitcond, label %for.end, label %for.body
594
595for.end:                                          ; preds = %for.body, %entry
596  ret void
597}
598
599declare float @llvm.ceil.f32(float) nounwind readnone
600
601;CHECK-LABEL: @ceil_f64(
602;CHECK: llvm.ceil.v4f64
603;CHECK: ret void
604define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
605entry:
606  %cmp6 = icmp sgt i32 %n, 0
607  br i1 %cmp6, label %for.body, label %for.end
608
609for.body:                                         ; preds = %entry, %for.body
610  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
611  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
612  %0 = load double, double* %arrayidx, align 8
613  %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
614  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
615  store double %call, double* %arrayidx2, align 8
616  %indvars.iv.next = add i64 %indvars.iv, 1
617  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
618  %exitcond = icmp eq i32 %lftr.wideiv, %n
619  br i1 %exitcond, label %for.end, label %for.body
620
621for.end:                                          ; preds = %for.body, %entry
622  ret void
623}
624
625declare double @llvm.ceil.f64(double) nounwind readnone
626
627;CHECK-LABEL: @trunc_f32(
628;CHECK: llvm.trunc.v4f32
629;CHECK: ret void
630define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
631entry:
632  %cmp6 = icmp sgt i32 %n, 0
633  br i1 %cmp6, label %for.body, label %for.end
634
635for.body:                                         ; preds = %entry, %for.body
636  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
637  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
638  %0 = load float, float* %arrayidx, align 4
639  %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
640  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
641  store float %call, float* %arrayidx2, align 4
642  %indvars.iv.next = add i64 %indvars.iv, 1
643  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
644  %exitcond = icmp eq i32 %lftr.wideiv, %n
645  br i1 %exitcond, label %for.end, label %for.body
646
647for.end:                                          ; preds = %for.body, %entry
648  ret void
649}
650
651declare float @llvm.trunc.f32(float) nounwind readnone
652
653;CHECK-LABEL: @trunc_f64(
654;CHECK: llvm.trunc.v4f64
655;CHECK: ret void
656define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
657entry:
658  %cmp6 = icmp sgt i32 %n, 0
659  br i1 %cmp6, label %for.body, label %for.end
660
661for.body:                                         ; preds = %entry, %for.body
662  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
663  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
664  %0 = load double, double* %arrayidx, align 8
665  %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
666  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
667  store double %call, double* %arrayidx2, align 8
668  %indvars.iv.next = add i64 %indvars.iv, 1
669  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
670  %exitcond = icmp eq i32 %lftr.wideiv, %n
671  br i1 %exitcond, label %for.end, label %for.body
672
673for.end:                                          ; preds = %for.body, %entry
674  ret void
675}
676
677declare double @llvm.trunc.f64(double) nounwind readnone
678
679;CHECK-LABEL: @rint_f32(
680;CHECK: llvm.rint.v4f32
681;CHECK: ret void
682define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
683entry:
684  %cmp6 = icmp sgt i32 %n, 0
685  br i1 %cmp6, label %for.body, label %for.end
686
687for.body:                                         ; preds = %entry, %for.body
688  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
689  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
690  %0 = load float, float* %arrayidx, align 4
691  %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
692  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
693  store float %call, float* %arrayidx2, align 4
694  %indvars.iv.next = add i64 %indvars.iv, 1
695  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
696  %exitcond = icmp eq i32 %lftr.wideiv, %n
697  br i1 %exitcond, label %for.end, label %for.body
698
699for.end:                                          ; preds = %for.body, %entry
700  ret void
701}
702
703declare float @llvm.rint.f32(float) nounwind readnone
704
705;CHECK-LABEL: @rint_f64(
706;CHECK: llvm.rint.v4f64
707;CHECK: ret void
708define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
709entry:
710  %cmp6 = icmp sgt i32 %n, 0
711  br i1 %cmp6, label %for.body, label %for.end
712
713for.body:                                         ; preds = %entry, %for.body
714  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
715  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
716  %0 = load double, double* %arrayidx, align 8
717  %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
718  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
719  store double %call, double* %arrayidx2, align 8
720  %indvars.iv.next = add i64 %indvars.iv, 1
721  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
722  %exitcond = icmp eq i32 %lftr.wideiv, %n
723  br i1 %exitcond, label %for.end, label %for.body
724
725for.end:                                          ; preds = %for.body, %entry
726  ret void
727}
728
729declare double @llvm.rint.f64(double) nounwind readnone
730
731;CHECK-LABEL: @nearbyint_f32(
732;CHECK: llvm.nearbyint.v4f32
733;CHECK: ret void
734define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
735entry:
736  %cmp6 = icmp sgt i32 %n, 0
737  br i1 %cmp6, label %for.body, label %for.end
738
739for.body:                                         ; preds = %entry, %for.body
740  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
741  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
742  %0 = load float, float* %arrayidx, align 4
743  %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
744  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
745  store float %call, float* %arrayidx2, align 4
746  %indvars.iv.next = add i64 %indvars.iv, 1
747  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
748  %exitcond = icmp eq i32 %lftr.wideiv, %n
749  br i1 %exitcond, label %for.end, label %for.body
750
751for.end:                                          ; preds = %for.body, %entry
752  ret void
753}
754
755declare float @llvm.nearbyint.f32(float) nounwind readnone
756
757;CHECK-LABEL: @nearbyint_f64(
758;CHECK: llvm.nearbyint.v4f64
759;CHECK: ret void
760define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
761entry:
762  %cmp6 = icmp sgt i32 %n, 0
763  br i1 %cmp6, label %for.body, label %for.end
764
765for.body:                                         ; preds = %entry, %for.body
766  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
767  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
768  %0 = load double, double* %arrayidx, align 8
769  %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
770  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
771  store double %call, double* %arrayidx2, align 8
772  %indvars.iv.next = add i64 %indvars.iv, 1
773  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
774  %exitcond = icmp eq i32 %lftr.wideiv, %n
775  br i1 %exitcond, label %for.end, label %for.body
776
777for.end:                                          ; preds = %for.body, %entry
778  ret void
779}
780
781declare double @llvm.nearbyint.f64(double) nounwind readnone
782
783;CHECK-LABEL: @round_f32(
784;CHECK: llvm.round.v4f32
785;CHECK: ret void
786define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
787entry:
788  %cmp6 = icmp sgt i32 %n, 0
789  br i1 %cmp6, label %for.body, label %for.end
790
791for.body:                                         ; preds = %entry, %for.body
792  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
793  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
794  %0 = load float, float* %arrayidx, align 4
795  %call = tail call float @llvm.round.f32(float %0) nounwind readnone
796  %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
797  store float %call, float* %arrayidx2, align 4
798  %indvars.iv.next = add i64 %indvars.iv, 1
799  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
800  %exitcond = icmp eq i32 %lftr.wideiv, %n
801  br i1 %exitcond, label %for.end, label %for.body
802
803for.end:                                          ; preds = %for.body, %entry
804  ret void
805}
806
807declare float @llvm.round.f32(float) nounwind readnone
808
809;CHECK-LABEL: @round_f64(
810;CHECK: llvm.round.v4f64
811;CHECK: ret void
812define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
813entry:
814  %cmp6 = icmp sgt i32 %n, 0
815  br i1 %cmp6, label %for.body, label %for.end
816
817for.body:                                         ; preds = %entry, %for.body
818  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
819  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
820  %0 = load double, double* %arrayidx, align 8
821  %call = tail call double @llvm.round.f64(double %0) nounwind readnone
822  %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
823  store double %call, double* %arrayidx2, align 8
824  %indvars.iv.next = add i64 %indvars.iv, 1
825  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
826  %exitcond = icmp eq i32 %lftr.wideiv, %n
827  br i1 %exitcond, label %for.end, label %for.body
828
829for.end:                                          ; preds = %for.body, %entry
830  ret void
831}
832
833declare double @llvm.round.f64(double) nounwind readnone
834
835;CHECK-LABEL: @fma_f32(
836;CHECK: llvm.fma.v4f32
837;CHECK: ret void
838define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
839entry:
840  %cmp12 = icmp sgt i32 %n, 0
841  br i1 %cmp12, label %for.body, label %for.end
842
843for.body:                                         ; preds = %entry, %for.body
844  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
845  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
846  %0 = load float, float* %arrayidx, align 4
847  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
848  %1 = load float, float* %arrayidx2, align 4
849  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
850  %2 = load float, float* %arrayidx4, align 4
851  %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
852  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
853  store float %3, float* %arrayidx6, align 4
854  %indvars.iv.next = add i64 %indvars.iv, 1
855  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
856  %exitcond = icmp eq i32 %lftr.wideiv, %n
857  br i1 %exitcond, label %for.end, label %for.body
858
859for.end:                                          ; preds = %for.body, %entry
860  ret void
861}
862
863declare float @llvm.fma.f32(float, float, float) nounwind readnone
864
865;CHECK-LABEL: @fma_f64(
866;CHECK: llvm.fma.v4f64
867;CHECK: ret void
868define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
869entry:
870  %cmp12 = icmp sgt i32 %n, 0
871  br i1 %cmp12, label %for.body, label %for.end
872
873for.body:                                         ; preds = %entry, %for.body
874  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
875  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
876  %0 = load double, double* %arrayidx, align 8
877  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
878  %1 = load double, double* %arrayidx2, align 8
879  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
880  %2 = load double, double* %arrayidx4, align 8
881  %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
882  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
883  store double %3, double* %arrayidx6, align 8
884  %indvars.iv.next = add i64 %indvars.iv, 1
885  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
886  %exitcond = icmp eq i32 %lftr.wideiv, %n
887  br i1 %exitcond, label %for.end, label %for.body
888
889for.end:                                          ; preds = %for.body, %entry
890  ret void
891}
892
893declare double @llvm.fma.f64(double, double, double) nounwind readnone
894
895;CHECK-LABEL: @fmuladd_f32(
896;CHECK: llvm.fmuladd.v4f32
897;CHECK: ret void
898define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
899entry:
900  %cmp12 = icmp sgt i32 %n, 0
901  br i1 %cmp12, label %for.body, label %for.end
902
903for.body:                                         ; preds = %entry, %for.body
904  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
905  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
906  %0 = load float, float* %arrayidx, align 4
907  %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
908  %1 = load float, float* %arrayidx2, align 4
909  %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
910  %2 = load float, float* %arrayidx4, align 4
911  %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
912  %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
913  store float %3, float* %arrayidx6, align 4
914  %indvars.iv.next = add i64 %indvars.iv, 1
915  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
916  %exitcond = icmp eq i32 %lftr.wideiv, %n
917  br i1 %exitcond, label %for.end, label %for.body
918
919for.end:                                          ; preds = %for.body, %entry
920  ret void
921}
922
923declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
924
925;CHECK-LABEL: @fmuladd_f64(
926;CHECK: llvm.fmuladd.v4f64
927;CHECK: ret void
928define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
929entry:
930  %cmp12 = icmp sgt i32 %n, 0
931  br i1 %cmp12, label %for.body, label %for.end
932
933for.body:                                         ; preds = %entry, %for.body
934  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
935  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
936  %0 = load double, double* %arrayidx, align 8
937  %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
938  %1 = load double, double* %arrayidx2, align 8
939  %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
940  %2 = load double, double* %arrayidx4, align 8
941  %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
942  %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
943  store double %3, double* %arrayidx6, align 8
944  %indvars.iv.next = add i64 %indvars.iv, 1
945  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
946  %exitcond = icmp eq i32 %lftr.wideiv, %n
947  br i1 %exitcond, label %for.end, label %for.body
948
949for.end:                                          ; preds = %for.body, %entry
950  ret void
951}
952
953declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
954
955;CHECK-LABEL: @pow_f32(
956;CHECK: llvm.pow.v4f32
957;CHECK: ret void
958define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
959entry:
960  %cmp9 = icmp sgt i32 %n, 0
961  br i1 %cmp9, label %for.body, label %for.end
962
963for.body:                                         ; preds = %entry, %for.body
964  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
965  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
966  %0 = load float, float* %arrayidx, align 4
967  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
968  %1 = load float, float* %arrayidx2, align 4
969  %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
970  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
971  store float %call, float* %arrayidx4, align 4
972  %indvars.iv.next = add i64 %indvars.iv, 1
973  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
974  %exitcond = icmp eq i32 %lftr.wideiv, %n
975  br i1 %exitcond, label %for.end, label %for.body
976
977for.end:                                          ; preds = %for.body, %entry
978  ret void
979}
980
981declare float @llvm.pow.f32(float, float) nounwind readnone
982
983;CHECK-LABEL: @pow_f64(
984;CHECK: llvm.pow.v4f64
985;CHECK: ret void
986define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
987entry:
988  %cmp9 = icmp sgt i32 %n, 0
989  br i1 %cmp9, label %for.body, label %for.end
990
991for.body:                                         ; preds = %entry, %for.body
992  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
993  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
994  %0 = load double, double* %arrayidx, align 8
995  %arrayidx2 = getelementptr inbounds double, double* %z, i64 %indvars.iv
996  %1 = load double, double* %arrayidx2, align 8
997  %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
998  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
999  store double %call, double* %arrayidx4, align 8
1000  %indvars.iv.next = add i64 %indvars.iv, 1
1001  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1002  %exitcond = icmp eq i32 %lftr.wideiv, %n
1003  br i1 %exitcond, label %for.end, label %for.body
1004
1005for.end:                                          ; preds = %for.body, %entry
1006  ret void
1007}
1008
1009; CHECK: fabs_libm
1010; CHECK:  call <4 x float> @llvm.fabs.v4f32
1011; CHECK: ret void
1012define void @fabs_libm(float* nocapture %x) nounwind {
1013entry:
1014  br label %for.body
1015
1016for.body:                                         ; preds = %entry, %for.body
1017  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1018  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
1019  %0 = load float, float* %arrayidx, align 4
1020  %call = tail call float @fabsf(float %0) nounwind readnone
1021  store float %call, float* %arrayidx, align 4
1022  %indvars.iv.next = add i64 %indvars.iv, 1
1023  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1024  %exitcond = icmp eq i32 %lftr.wideiv, 1024
1025  br i1 %exitcond, label %for.end, label %for.body
1026
1027for.end:                                          ; preds = %for.body
1028  ret void
1029}
1030
1031declare float @fabsf(float) nounwind readnone
1032
1033declare double @llvm.pow.f64(double, double) nounwind readnone
1034
1035
1036
1037; Make sure we don't replace calls to functions with standard library function
1038; signatures but defined with internal linkage.
1039
1040define internal float @roundf(float %x) nounwind readnone {
1041  ret float 0.00000000
1042}
1043; CHECK-LABEL: internal_round
1044; CHECK-NOT:  load <4 x float>
1045
1046define void @internal_round(float* nocapture %x) nounwind {
1047entry:
1048  br label %for.body
1049
1050for.body:                                         ; preds = %entry, %for.body
1051  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1052  %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
1053  %0 = load float, float* %arrayidx, align 4
1054  %call = tail call float @roundf(float %0) nounwind readnone
1055  store float %call, float* %arrayidx, align 4
1056  %indvars.iv.next = add i64 %indvars.iv, 1
1057  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1058  %exitcond = icmp eq i32 %lftr.wideiv, 1024
1059  br i1 %exitcond, label %for.end, label %for.body
1060
1061for.end:                                          ; preds = %for.body
1062  ret void
1063}
1064
1065; Make sure we don't replace calls to functions with standard library names but
1066; different signatures.
1067
1068declare void @round(double %f)
1069
1070; CHECK-LABEL: wrong_signature
1071; CHECK-NOT:  load <4 x double>
1072
1073define void @wrong_signature(double* nocapture %x) nounwind {
1074entry:
1075  br label %for.body
1076
1077for.body:                                         ; preds = %entry, %for.body
1078  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1079  %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
1080  %0 = load double, double* %arrayidx, align 4
1081  store double %0, double* %arrayidx, align 4
1082  tail call void @round(double %0) nounwind readnone
1083  %indvars.iv.next = add i64 %indvars.iv, 1
1084  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1085  %exitcond = icmp eq i32 %lftr.wideiv, 1024
1086  br i1 %exitcond, label %for.end, label %for.body
1087
1088for.end:                                          ; preds = %for.body
1089  ret void
1090}
1091
1092declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
1093
1094;CHECK-LABEL: @powi_f64(
1095;CHECK: llvm.powi.v4f64
1096;CHECK: ret void
1097define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
1098entry:
1099  %cmp9 = icmp sgt i32 %n, 0
1100  br i1 %cmp9, label %for.body, label %for.end
1101
1102for.body:                                         ; preds = %entry, %for.body
1103  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1104  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
1105  %0 = load double, double* %arrayidx, align 8
1106  %call = tail call double @llvm.powi.f64(double %0, i32  %P) nounwind readnone
1107  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
1108  store double %call, double* %arrayidx4, align 8
1109  %indvars.iv.next = add i64 %indvars.iv, 1
1110  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1111  %exitcond = icmp eq i32 %lftr.wideiv, %n
1112  br i1 %exitcond, label %for.end, label %for.body
1113
1114for.end:                                          ; preds = %for.body, %entry
1115  ret void
1116}
1117
1118;CHECK-LABEL: @powi_f64_neg(
1119;CHECK-NOT: llvm.powi.v4f64
1120;CHECK: ret void
1121define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
1122entry:
1123  %cmp9 = icmp sgt i32 %n, 0
1124  br i1 %cmp9, label %for.body, label %for.end
1125
1126for.body:                                         ; preds = %entry, %for.body
1127  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1128  %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
1129  %0 = load double, double* %arrayidx, align 8
1130  %1 = trunc i64 %indvars.iv to i32
1131  %call = tail call double @llvm.powi.f64(double %0, i32  %1) nounwind readnone
1132  %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
1133  store double %call, double* %arrayidx4, align 8
1134  %indvars.iv.next = add i64 %indvars.iv, 1
1135  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1136  %exitcond = icmp eq i32 %lftr.wideiv, %n
1137  br i1 %exitcond, label %for.end, label %for.body
1138
1139for.end:                                          ; preds = %for.body, %entry
1140  ret void
1141}
1142
1143declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
1144
1145;CHECK-LABEL: @cttz_f64(
1146;CHECK: llvm.cttz.v4i64
1147;CHECK: ret void
1148define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1149entry:
1150  %cmp9 = icmp sgt i32 %n, 0
1151  br i1 %cmp9, label %for.body, label %for.end
1152
1153for.body:                                         ; preds = %entry, %for.body
1154  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1155  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
1156  %0 = load i64, i64* %arrayidx, align 8
1157  %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
1158  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
1159  store i64 %call, i64* %arrayidx4, align 8
1160  %indvars.iv.next = add i64 %indvars.iv, 1
1161  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1162  %exitcond = icmp eq i32 %lftr.wideiv, %n
1163  br i1 %exitcond, label %for.end, label %for.body
1164
1165for.end:                                          ; preds = %for.body, %entry
1166  ret void
1167}
1168
1169declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
1170
1171;CHECK-LABEL: @ctlz_f64(
1172;CHECK: llvm.ctlz.v4i64
1173;CHECK: ret void
1174define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1175entry:
1176  %cmp9 = icmp sgt i32 %n, 0
1177  br i1 %cmp9, label %for.body, label %for.end
1178
1179for.body:                                         ; preds = %entry, %for.body
1180  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1181  %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
1182  %0 = load i64, i64* %arrayidx, align 8
1183  %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
1184  %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
1185  store i64 %call, i64* %arrayidx4, align 8
1186  %indvars.iv.next = add i64 %indvars.iv, 1
1187  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1188  %exitcond = icmp eq i32 %lftr.wideiv, %n
1189  br i1 %exitcond, label %for.end, label %for.body
1190
1191for.end:                                          ; preds = %for.body, %entry
1192  ret void
1193}
1194
1195declare float @llvm.minnum.f32(float, float) nounwind readnone
1196
1197;CHECK-LABEL: @minnum_f32(
1198;CHECK: llvm.minnum.v4f32
1199;CHECK: ret void
1200define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
1201entry:
1202  %cmp9 = icmp sgt i32 %n, 0
1203  br i1 %cmp9, label %for.body, label %for.end
1204
1205for.body:                                         ; preds = %entry, %for.body
1206  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1207  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
1208  %0 = load float, float* %arrayidx, align 4
1209  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
1210  %1 = load float, float* %arrayidx2, align 4
1211  %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
1212  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
1213  store float %call, float* %arrayidx4, align 4
1214  %indvars.iv.next = add i64 %indvars.iv, 1
1215  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1216  %exitcond = icmp eq i32 %lftr.wideiv, %n
1217  br i1 %exitcond, label %for.end, label %for.body
1218
1219for.end:                                          ; preds = %for.body, %entry
1220  ret void
1221}
1222
1223declare float @llvm.maxnum.f32(float, float) nounwind readnone
1224
1225;CHECK-LABEL: @maxnum_f32(
1226;CHECK: llvm.maxnum.v4f32
1227;CHECK: ret void
1228define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
1229entry:
1230  %cmp9 = icmp sgt i32 %n, 0
1231  br i1 %cmp9, label %for.body, label %for.end
1232
1233for.body:                                         ; preds = %entry, %for.body
1234  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1235  %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
1236  %0 = load float, float* %arrayidx, align 4
1237  %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
1238  %1 = load float, float* %arrayidx2, align 4
1239  %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
1240  %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
1241  store float %call, float* %arrayidx4, align 4
1242  %indvars.iv.next = add i64 %indvars.iv, 1
1243  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1244  %exitcond = icmp eq i32 %lftr.wideiv, %n
1245  br i1 %exitcond, label %for.end, label %for.body
1246
1247for.end:                                          ; preds = %for.body, %entry
1248  ret void
1249}
1250