1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.8.0"
6
7declare double @sin(double) nounwind willreturn
8declare double @cos(double) nounwind willreturn
9declare double @pow(double, double) nounwind willreturn
10declare double @exp2(double) nounwind willreturn
11declare double @sqrt(double) nounwind willreturn
12declare i64 @round(i64) nounwind willreturn
13
14
15define void @sin_libm(double* %a, double* %b) {
16; CHECK-LABEL: @sin_libm(
17; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
18; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
19; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
20; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
21; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
22; CHECK-NEXT:    ret void
23;
24  %a0 = load double, double* %a, align 8
25  %idx1 = getelementptr inbounds double, double* %a, i64 1
26  %a1 = load double, double* %idx1, align 8
27  %sin1 = tail call double @sin(double %a0) nounwind readnone
28  %sin2 = tail call double @sin(double %a1) nounwind readnone
29  store double %sin1, double* %b, align 8
30  %idx2 = getelementptr inbounds double, double* %b, i64 1
31  store double %sin2, double* %idx2, align 8
32  ret void
33}
34
35define void @cos_libm(double* %a, double* %b) {
36; CHECK-LABEL: @cos_libm(
37; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
38; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
39; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.cos.v2f64(<2 x double> [[TMP2]])
40; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
41; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
42; CHECK-NEXT:    ret void
43;
44  %a0 = load double, double* %a, align 8
45  %idx1 = getelementptr inbounds double, double* %a, i64 1
46  %a1 = load double, double* %idx1, align 8
47  %cos1 = tail call double @cos(double %a0) nounwind readnone
48  %cos2 = tail call double @cos(double %a1) nounwind readnone
49  store double %cos1, double* %b, align 8
50  %idx2 = getelementptr inbounds double, double* %b, i64 1
51  store double %cos2, double* %idx2, align 8
52  ret void
53}
54
55define void @pow_libm(double* %a, double* %b) {
56; CHECK-LABEL: @pow_libm(
57; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
58; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
59; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP2]])
60; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
61; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
62; CHECK-NEXT:    ret void
63;
64  %a0 = load double, double* %a, align 8
65  %idx1 = getelementptr inbounds double, double* %a, i64 1
66  %a1 = load double, double* %idx1, align 8
67  %pow1 = tail call double @pow(double %a0, double %a0) nounwind readnone
68  %pow2 = tail call double @pow(double %a1, double %a1) nounwind readnone
69  store double %pow1, double* %b, align 8
70  %idx2 = getelementptr inbounds double, double* %b, i64 1
71  store double %pow2, double* %idx2, align 8
72  ret void
73}
74
75define void @exp_libm(double* %a, double* %b) {
76; CHECK-LABEL: @exp_libm(
77; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
78; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
79; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[TMP2]])
80; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
81; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
82; CHECK-NEXT:    ret void
83;
84  %a0 = load double, double* %a, align 8
85  %idx1 = getelementptr inbounds double, double* %a, i64 1
86  %a1 = load double, double* %idx1, align 8
87  %exp1 = tail call double @exp2(double %a0) nounwind readnone
88  %exp2 = tail call double @exp2(double %a1) nounwind readnone
89  store double %exp1, double* %b, align 8
90  %idx2 = getelementptr inbounds double, double* %b, i64 1
91  store double %exp2, double* %idx2, align 8
92  ret void
93}
94
95; No fast-math-flags are required to convert sqrt library calls to an intrinsic.
96; We just need to know that errno is not set (readnone).
97
98define void @sqrt_libm_no_errno(double* %a, double* %b) {
99; CHECK-LABEL: @sqrt_libm_no_errno(
100; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
101; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8
102; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
103; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
104; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
105; CHECK-NEXT:    ret void
106;
107  %a0 = load double, double* %a, align 8
108  %idx1 = getelementptr inbounds double, double* %a, i64 1
109  %a1 = load double, double* %idx1, align 8
110  %sqrt1 = tail call double @sqrt(double %a0) nounwind readnone
111  %sqrt2 = tail call double @sqrt(double %a1) nounwind readnone
112  store double %sqrt1, double* %b, align 8
113  %idx2 = getelementptr inbounds double, double* %b, i64 1
114  store double %sqrt2, double* %idx2, align 8
115  ret void
116}
117
118; The sqrt intrinsic does not set errno, but a non-constant sqrt call might, so this can't vectorize.
119; The nnan on the call does not matter because there's no guarantee in the C standard that a negative
120; input would result in a nan output ("On a domain error, the function returns an
121; implementation-defined value.")
122
123define void @sqrt_libm_errno(double* %a, double* %b) {
124; CHECK-LABEL: @sqrt_libm_errno(
125; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[A:%.*]], align 8
126; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
127; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDX1]], align 8
128; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR3:[0-9]+]]
129; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR3]]
130; CHECK-NEXT:    store double [[SQRT1]], double* [[B:%.*]], align 8
131; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
132; CHECK-NEXT:    store double [[SQRT2]], double* [[IDX2]], align 8
133; CHECK-NEXT:    ret void
134;
135  %a0 = load double, double* %a, align 8
136  %idx1 = getelementptr inbounds double, double* %a, i64 1
137  %a1 = load double, double* %idx1, align 8
138  %sqrt1 = tail call nnan double @sqrt(double %a0) nounwind
139  %sqrt2 = tail call nnan double @sqrt(double %a1) nounwind
140  store double %sqrt1, double* %b, align 8
141  %idx2 = getelementptr inbounds double, double* %b, i64 1
142  store double %sqrt2, double* %idx2, align 8
143  ret void
144}
145
146; Negative test case
147define void @round_custom(i64* %a, i64* %b) {
148; CHECK-LABEL: @round_custom(
149; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
150; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
151; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
152; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR4:[0-9]+]]
153; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR4]]
154; CHECK-NEXT:    store i64 [[ROUND1]], i64* [[B:%.*]], align 8
155; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
156; CHECK-NEXT:    store i64 [[ROUND2]], i64* [[IDX2]], align 8
157; CHECK-NEXT:    ret void
158;
159  %a0 = load i64, i64* %a, align 8
160  %idx1 = getelementptr inbounds i64, i64* %a, i64 1
161  %a1 = load i64, i64* %idx1, align 8
162  %round1 = tail call i64 @round(i64 %a0) nounwind readnone
163  %round2 = tail call i64 @round(i64 %a1) nounwind readnone
164  store i64 %round1, i64* %b, align 8
165  %idx2 = getelementptr inbounds i64, i64* %b, i64 1
166  store i64 %round2, i64* %idx2, align 8
167  ret void
168}
169
170
171
172
173