1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -inject-tli-mappings -slp-vectorizer -vector-library=Accelerate -S %s | FileCheck %s
3; RUN: opt -inject-tli-mappings -slp-vectorizer -S %s | FileCheck --check-prefix NOACCELERATE %s
4
5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6target triple = "arm64-apple-ios14.0.0"
7
8declare float @llvm.sin.f32(float)
9
10; Accelerate provides sin() for <4 x float>
11define <4 x float> @int_sin_4x(<4 x float>* %a) {
12; CHECK-LABEL: @int_sin_4x(
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
15; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
16; CHECK-NEXT:    ret <4 x float> [[TMP1]]
17;
18; NOACCELERATE-LABEL: @int_sin_4x(
19; NOACCELERATE-NEXT:  entry:
20; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
21; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
22; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
23; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
24; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
25; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
26; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
27; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
28; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
29; NOACCELERATE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0
30; NOACCELERATE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1
31; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]])
32; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
33; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
34; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
35;
36entry:
37  %0 = load <4 x float>, <4 x float>* %a, align 16
38  %vecext = extractelement <4 x float> %0, i32 0
39  %1 = tail call fast float @llvm.sin.f32(float %vecext)
40  %vecins = insertelement <4 x float> undef, float %1, i32 0
41  %vecext.1 = extractelement <4 x float> %0, i32 1
42  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
43  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
44  %vecext.2 = extractelement <4 x float> %0, i32 2
45  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
46  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
47  %vecext.3 = extractelement <4 x float> %0, i32 3
48  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
49  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
50  ret <4 x float> %vecins.3
51}
52
53declare float @ceilf(float) readonly nounwind willreturn
54
55define <4 x float> @ceil_4x(<4 x float>* %a) {
56; CHECK-LABEL: @ceil_4x(
57; CHECK-NEXT:  entry:
58; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
59; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
60; CHECK-NEXT:    ret <4 x float> [[TMP1]]
61;
62; NOACCELERATE-LABEL: @ceil_4x(
63; NOACCELERATE-NEXT:  entry:
64; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
65; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
66; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
67;
68entry:
69  %0 = load <4 x float>, <4 x float>* %a, align 16
70  %vecext = extractelement <4 x float> %0, i32 0
71  %1 = tail call fast float @ceilf(float %vecext)
72  %vecins = insertelement <4 x float> undef, float %1, i32 0
73  %vecext.1 = extractelement <4 x float> %0, i32 1
74  %2 = tail call fast float @ceilf(float %vecext.1)
75  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
76  %vecext.2 = extractelement <4 x float> %0, i32 2
77  %3 = tail call fast float @ceilf(float %vecext.2)
78  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
79  %vecext.3 = extractelement <4 x float> %0, i32 3
80  %4 = tail call fast float @ceilf(float %vecext.3)
81  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
82  ret <4 x float> %vecins.3
83}
84
85declare float @fabsf(float) readonly nounwind willreturn
86
87define <4 x float> @fabs_4x(<4 x float>* %a) {
88; CHECK-LABEL: @fabs_4x(
89; CHECK-NEXT:  entry:
90; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
91; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
92; CHECK-NEXT:    ret <4 x float> [[TMP1]]
93;
94; NOACCELERATE-LABEL: @fabs_4x(
95; NOACCELERATE-NEXT:  entry:
96; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
97; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
98; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
99;
100entry:
101  %0 = load <4 x float>, <4 x float>* %a, align 16
102  %vecext = extractelement <4 x float> %0, i32 0
103  %1 = tail call fast float @fabsf(float %vecext)
104  %vecins = insertelement <4 x float> undef, float %1, i32 0
105  %vecext.1 = extractelement <4 x float> %0, i32 1
106  %2 = tail call fast float @fabsf(float %vecext.1)
107  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
108  %vecext.2 = extractelement <4 x float> %0, i32 2
109  %3 = tail call fast float @fabsf(float %vecext.2)
110  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
111  %vecext.3 = extractelement <4 x float> %0, i32 3
112  %4 = tail call fast float @fabsf(float %vecext.3)
113  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
114  ret <4 x float> %vecins.3
115}
116declare float @llvm.fabs.f32(float)
117define <4 x float> @int_fabs_4x(<4 x float>* %a) {
118; CHECK-LABEL: @int_fabs_4x(
119; CHECK-NEXT:  entry:
120; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
121; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
122; CHECK-NEXT:    ret <4 x float> [[TMP1]]
123;
124; NOACCELERATE-LABEL: @int_fabs_4x(
125; NOACCELERATE-NEXT:  entry:
126; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
127; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
128; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
129;
130entry:
131  %0 = load <4 x float>, <4 x float>* %a, align 16
132  %vecext = extractelement <4 x float> %0, i32 0
133  %1 = tail call fast float @llvm.fabs.f32(float %vecext)
134  %vecins = insertelement <4 x float> undef, float %1, i32 0
135  %vecext.1 = extractelement <4 x float> %0, i32 1
136  %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
137  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
138  %vecext.2 = extractelement <4 x float> %0, i32 2
139  %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
140  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
141  %vecext.3 = extractelement <4 x float> %0, i32 3
142  %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
143  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
144  ret <4 x float> %vecins.3
145}
146declare float @floorf(float) readonly nounwind willreturn
147define <4 x float> @floor_4x(<4 x float>* %a) {
148; CHECK-LABEL: @floor_4x(
149; CHECK-NEXT:  entry:
150; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
151; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
152; CHECK-NEXT:    ret <4 x float> [[TMP1]]
153;
154; NOACCELERATE-LABEL: @floor_4x(
155; NOACCELERATE-NEXT:  entry:
156; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
157; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
158; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
159;
160entry:
161  %0 = load <4 x float>, <4 x float>* %a, align 16
162  %vecext = extractelement <4 x float> %0, i32 0
163  %1 = tail call fast float @floorf(float %vecext)
164  %vecins = insertelement <4 x float> undef, float %1, i32 0
165  %vecext.1 = extractelement <4 x float> %0, i32 1
166  %2 = tail call fast float @floorf(float %vecext.1)
167  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
168  %vecext.2 = extractelement <4 x float> %0, i32 2
169  %3 = tail call fast float @floorf(float %vecext.2)
170  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
171  %vecext.3 = extractelement <4 x float> %0, i32 3
172  %4 = tail call fast float @floorf(float %vecext.3)
173  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
174  ret <4 x float> %vecins.3
175}
176declare float @sqrtf(float) readonly nounwind willreturn
177define <4 x float> @sqrt_4x(<4 x float>* %a) {
178; CHECK-LABEL: @sqrt_4x(
179; CHECK-NEXT:  entry:
180; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
181; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
182; CHECK-NEXT:    ret <4 x float> [[TMP1]]
183;
184; NOACCELERATE-LABEL: @sqrt_4x(
185; NOACCELERATE-NEXT:  entry:
186; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
187; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
188; NOACCELERATE-NEXT:    ret <4 x float> [[TMP1]]
189;
190entry:
191  %0 = load <4 x float>, <4 x float>* %a, align 16
192  %vecext = extractelement <4 x float> %0, i32 0
193  %1 = tail call fast float @sqrtf(float %vecext)
194  %vecins = insertelement <4 x float> undef, float %1, i32 0
195  %vecext.1 = extractelement <4 x float> %0, i32 1
196  %2 = tail call fast float @sqrtf(float %vecext.1)
197  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
198  %vecext.2 = extractelement <4 x float> %0, i32 2
199  %3 = tail call fast float @sqrtf(float %vecext.2)
200  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
201  %vecext.3 = extractelement <4 x float> %0, i32 3
202  %4 = tail call fast float @sqrtf(float %vecext.3)
203  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
204  ret <4 x float> %vecins.3
205}
206declare float @expf(float) readonly nounwind willreturn
207define <4 x float> @exp_4x(<4 x float>* %a) {
208; CHECK-LABEL: @exp_4x(
209; CHECK-NEXT:  entry:
210; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
211; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]])
212; CHECK-NEXT:    ret <4 x float> [[TMP1]]
213;
214; NOACCELERATE-LABEL: @exp_4x(
215; NOACCELERATE-NEXT:  entry:
216; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
217; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
218; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
219; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
220; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
221; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
222; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
223; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
224; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
225; NOACCELERATE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0
226; NOACCELERATE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1
227; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
228; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
229; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
230; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
231;
232entry:
233  %0 = load <4 x float>, <4 x float>* %a, align 16
234  %vecext = extractelement <4 x float> %0, i32 0
235  %1 = tail call fast float @expf(float %vecext)
236  %vecins = insertelement <4 x float> undef, float %1, i32 0
237  %vecext.1 = extractelement <4 x float> %0, i32 1
238  %2 = tail call fast float @expf(float %vecext.1)
239  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
240  %vecext.2 = extractelement <4 x float> %0, i32 2
241  %3 = tail call fast float @expf(float %vecext.2)
242  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
243  %vecext.3 = extractelement <4 x float> %0, i32 3
244  %4 = tail call fast float @expf(float %vecext.3)
245  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
246  ret <4 x float> %vecins.3
247}
248declare float @expm1f(float) readonly nounwind willreturn
249define <4 x float> @expm1_4x(<4 x float>* %a) {
250; CHECK-LABEL: @expm1_4x(
251; CHECK-NEXT:  entry:
252; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
253; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]])
254; CHECK-NEXT:    ret <4 x float> [[TMP1]]
255;
256; NOACCELERATE-LABEL: @expm1_4x(
257; NOACCELERATE-NEXT:  entry:
258; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
259; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
260; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]])
261; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
262; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
263; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]])
264; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
265; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
266; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]])
267; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
268; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
269; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]])
270; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
271; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
272;
273entry:
274  %0 = load <4 x float>, <4 x float>* %a, align 16
275  %vecext = extractelement <4 x float> %0, i32 0
276  %1 = tail call fast float @expm1f(float %vecext)
277  %vecins = insertelement <4 x float> undef, float %1, i32 0
278  %vecext.1 = extractelement <4 x float> %0, i32 1
279  %2 = tail call fast float @expm1f(float %vecext.1)
280  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
281  %vecext.2 = extractelement <4 x float> %0, i32 2
282  %3 = tail call fast float @expm1f(float %vecext.2)
283  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
284  %vecext.3 = extractelement <4 x float> %0, i32 3
285  %4 = tail call fast float @expm1f(float %vecext.3)
286  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
287  ret <4 x float> %vecins.3
288}
289declare float @logf(float) readonly nounwind willreturn
290define <4 x float> @log_4x(<4 x float>* %a) {
291; CHECK-LABEL: @log_4x(
292; CHECK-NEXT:  entry:
293; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
294; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]])
295; CHECK-NEXT:    ret <4 x float> [[TMP1]]
296;
297; NOACCELERATE-LABEL: @log_4x(
298; NOACCELERATE-NEXT:  entry:
299; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
300; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
301; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
302; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
303; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
304; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
305; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
306; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
307; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
308; NOACCELERATE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0
309; NOACCELERATE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1
310; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
311; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
312; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
313; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
314;
315entry:
316  %0 = load <4 x float>, <4 x float>* %a, align 16
317  %vecext = extractelement <4 x float> %0, i32 0
318  %1 = tail call fast float @logf(float %vecext)
319  %vecins = insertelement <4 x float> undef, float %1, i32 0
320  %vecext.1 = extractelement <4 x float> %0, i32 1
321  %2 = tail call fast float @logf(float %vecext.1)
322  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
323  %vecext.2 = extractelement <4 x float> %0, i32 2
324  %3 = tail call fast float @logf(float %vecext.2)
325  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
326  %vecext.3 = extractelement <4 x float> %0, i32 3
327  %4 = tail call fast float @logf(float %vecext.3)
328  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
329  ret <4 x float> %vecins.3
330}
331declare float @log1pf(float) readonly nounwind willreturn
332define <4 x float> @log1p_4x(<4 x float>* %a) {
333; CHECK-LABEL: @log1p_4x(
334; CHECK-NEXT:  entry:
335; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
336; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]])
337; CHECK-NEXT:    ret <4 x float> [[TMP1]]
338;
339; NOACCELERATE-LABEL: @log1p_4x(
340; NOACCELERATE-NEXT:  entry:
341; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
342; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
343; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]])
344; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
345; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
346; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]])
347; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
348; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
349; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]])
350; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
351; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
352; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]])
353; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
354; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
355;
356entry:
357  %0 = load <4 x float>, <4 x float>* %a, align 16
358  %vecext = extractelement <4 x float> %0, i32 0
359  %1 = tail call fast float @log1pf(float %vecext)
360  %vecins = insertelement <4 x float> undef, float %1, i32 0
361  %vecext.1 = extractelement <4 x float> %0, i32 1
362  %2 = tail call fast float @log1pf(float %vecext.1)
363  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
364  %vecext.2 = extractelement <4 x float> %0, i32 2
365  %3 = tail call fast float @log1pf(float %vecext.2)
366  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
367  %vecext.3 = extractelement <4 x float> %0, i32 3
368  %4 = tail call fast float @log1pf(float %vecext.3)
369  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
370  ret <4 x float> %vecins.3
371}
372declare float @log10pf(float) readonly nounwind willreturn
373define <4 x float> @log10p_4x(<4 x float>* %a) {
374; CHECK-LABEL: @log10p_4x(
375; CHECK-NEXT:  entry:
376; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
377; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
378; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
379; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
380; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
381; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]])
382; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
383; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
384; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]])
385; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
386; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
387; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]])
388; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
389; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
390;
391; NOACCELERATE-LABEL: @log10p_4x(
392; NOACCELERATE-NEXT:  entry:
393; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
394; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
395; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
396; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
397; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
398; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]])
399; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
400; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
401; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]])
402; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
403; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
404; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]])
405; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
406; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
407;
408entry:
409  %0 = load <4 x float>, <4 x float>* %a, align 16
410  %vecext = extractelement <4 x float> %0, i32 0
411  %1 = tail call fast float @log10pf(float %vecext)
412  %vecins = insertelement <4 x float> undef, float %1, i32 0
413  %vecext.1 = extractelement <4 x float> %0, i32 1
414  %2 = tail call fast float @log10pf(float %vecext.1)
415  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
416  %vecext.2 = extractelement <4 x float> %0, i32 2
417  %3 = tail call fast float @log10pf(float %vecext.2)
418  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
419  %vecext.3 = extractelement <4 x float> %0, i32 3
420  %4 = tail call fast float @log10pf(float %vecext.3)
421  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
422  ret <4 x float> %vecins.3
423}
424declare float @logbf(float) readonly nounwind willreturn
425define <4 x float> @logb_4x(<4 x float>* %a) {
426; CHECK-LABEL: @logb_4x(
427; CHECK-NEXT:  entry:
428; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
429; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]])
430; CHECK-NEXT:    ret <4 x float> [[TMP1]]
431;
432; NOACCELERATE-LABEL: @logb_4x(
433; NOACCELERATE-NEXT:  entry:
434; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
435; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
436; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]])
437; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
438; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
439; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]])
440; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
441; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
442; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]])
443; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
444; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
445; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]])
446; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
447; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
448;
449entry:
450  %0 = load <4 x float>, <4 x float>* %a, align 16
451  %vecext = extractelement <4 x float> %0, i32 0
452  %1 = tail call fast float @logbf(float %vecext)
453  %vecins = insertelement <4 x float> undef, float %1, i32 0
454  %vecext.1 = extractelement <4 x float> %0, i32 1
455  %2 = tail call fast float @logbf(float %vecext.1)
456  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
457  %vecext.2 = extractelement <4 x float> %0, i32 2
458  %3 = tail call fast float @logbf(float %vecext.2)
459  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
460  %vecext.3 = extractelement <4 x float> %0, i32 3
461  %4 = tail call fast float @logbf(float %vecext.3)
462  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
463  ret <4 x float> %vecins.3
464}
465declare float @sinf(float) readonly nounwind willreturn
466define <4 x float> @sin_4x(<4 x float>* %a) {
467; CHECK-LABEL: @sin_4x(
468; CHECK-NEXT:  entry:
469; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
470; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
471; CHECK-NEXT:    ret <4 x float> [[TMP1]]
472;
473; NOACCELERATE-LABEL: @sin_4x(
474; NOACCELERATE-NEXT:  entry:
475; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
476; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
477; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
478; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
479; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
480; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
481; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
482; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
483; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
484; NOACCELERATE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0
485; NOACCELERATE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1
486; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4]])
487; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
488; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
489; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
490;
491entry:
492  %0 = load <4 x float>, <4 x float>* %a, align 16
493  %vecext = extractelement <4 x float> %0, i32 0
494  %1 = tail call fast float @sinf(float %vecext)
495  %vecins = insertelement <4 x float> undef, float %1, i32 0
496  %vecext.1 = extractelement <4 x float> %0, i32 1
497  %2 = tail call fast float @sinf(float %vecext.1)
498  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
499  %vecext.2 = extractelement <4 x float> %0, i32 2
500  %3 = tail call fast float @sinf(float %vecext.2)
501  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
502  %vecext.3 = extractelement <4 x float> %0, i32 3
503  %4 = tail call fast float @sinf(float %vecext.3)
504  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
505  ret <4 x float> %vecins.3
506}
507declare float @cosf(float) readonly nounwind willreturn
508define <4 x float> @cos_4x(<4 x float>* %a) {
509; CHECK-LABEL: @cos_4x(
510; CHECK-NEXT:  entry:
511; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
512; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
513; CHECK-NEXT:    ret <4 x float> [[TMP1]]
514;
515; NOACCELERATE-LABEL: @cos_4x(
516; NOACCELERATE-NEXT:  entry:
517; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
518; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
519; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
520; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
521; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
522; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
523; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
524; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
525; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
526; NOACCELERATE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0
527; NOACCELERATE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1
528; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]])
529; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
530; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
531; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
532;
533entry:
534  %0 = load <4 x float>, <4 x float>* %a, align 16
535  %vecext = extractelement <4 x float> %0, i32 0
536  %1 = tail call fast float @cosf(float %vecext)
537  %vecins = insertelement <4 x float> undef, float %1, i32 0
538  %vecext.1 = extractelement <4 x float> %0, i32 1
539  %2 = tail call fast float @cosf(float %vecext.1)
540  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
541  %vecext.2 = extractelement <4 x float> %0, i32 2
542  %3 = tail call fast float @cosf(float %vecext.2)
543  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
544  %vecext.3 = extractelement <4 x float> %0, i32 3
545  %4 = tail call fast float @cosf(float %vecext.3)
546  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
547  ret <4 x float> %vecins.3
548}
549declare float @tanf(float) readonly nounwind willreturn
550define <4 x float> @tan_4x(<4 x float>* %a) {
551; CHECK-LABEL: @tan_4x(
552; CHECK-NEXT:  entry:
553; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
554; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]])
555; CHECK-NEXT:    ret <4 x float> [[TMP1]]
556;
557; NOACCELERATE-LABEL: @tan_4x(
558; NOACCELERATE-NEXT:  entry:
559; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
560; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
561; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
562; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
563; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
564; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
565; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
566; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
567; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
568; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
569; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
570; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
571; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
572; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
573;
574entry:
575  %0 = load <4 x float>, <4 x float>* %a, align 16
576  %vecext = extractelement <4 x float> %0, i32 0
577  %1 = tail call fast float @tanf(float %vecext)
578  %vecins = insertelement <4 x float> undef, float %1, i32 0
579  %vecext.1 = extractelement <4 x float> %0, i32 1
580  %2 = tail call fast float @tanf(float %vecext.1)
581  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
582  %vecext.2 = extractelement <4 x float> %0, i32 2
583  %3 = tail call fast float @tanf(float %vecext.2)
584  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
585  %vecext.3 = extractelement <4 x float> %0, i32 3
586  %4 = tail call fast float @tanf(float %vecext.3)
587  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
588  ret <4 x float> %vecins.3
589}
590declare float @asinf(float) readonly nounwind willreturn
591define <4 x float> @asin_4x(<4 x float>* %a) {
592; CHECK-LABEL: @asin_4x(
593; CHECK-NEXT:  entry:
594; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
595; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
596; CHECK-NEXT:    ret <4 x float> [[TMP1]]
597;
598; NOACCELERATE-LABEL: @asin_4x(
599; NOACCELERATE-NEXT:  entry:
600; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
601; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
602; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
603; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
604; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
605; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
606; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
607; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
608; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
609; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
610; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
611; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
612; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
613; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
614;
615entry:
616  %0 = load <4 x float>, <4 x float>* %a, align 16
617  %vecext = extractelement <4 x float> %0, i32 0
618  %1 = tail call fast float @asinf(float %vecext)
619  %vecins = insertelement <4 x float> undef, float %1, i32 0
620  %vecext.1 = extractelement <4 x float> %0, i32 1
621  %2 = tail call fast float @asinf(float %vecext.1)
622  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
623  %vecext.2 = extractelement <4 x float> %0, i32 2
624  %3 = tail call fast float @asinf(float %vecext.2)
625  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
626  %vecext.3 = extractelement <4 x float> %0, i32 3
627  %4 = tail call fast float @asinf(float %vecext.3)
628  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
629  ret <4 x float> %vecins.3
630}
631declare float @acosf(float) readonly nounwind willreturn
632define <4 x float> @acos_4x(<4 x float>* %a) {
633; CHECK-LABEL: @acos_4x(
634; CHECK-NEXT:  entry:
635; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
636; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
637; CHECK-NEXT:    ret <4 x float> [[TMP1]]
638;
639; NOACCELERATE-LABEL: @acos_4x(
640; NOACCELERATE-NEXT:  entry:
641; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
642; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
643; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
644; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
645; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
646; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
647; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
648; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
649; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
650; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
651; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
652; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
653; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
654; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
655;
656entry:
657  %0 = load <4 x float>, <4 x float>* %a, align 16
658  %vecext = extractelement <4 x float> %0, i32 0
659  %1 = tail call fast float @acosf(float %vecext)
660  %vecins = insertelement <4 x float> undef, float %1, i32 0
661  %vecext.1 = extractelement <4 x float> %0, i32 1
662  %2 = tail call fast float @acosf(float %vecext.1)
663  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
664  %vecext.2 = extractelement <4 x float> %0, i32 2
665  %3 = tail call fast float @acosf(float %vecext.2)
666  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
667  %vecext.3 = extractelement <4 x float> %0, i32 3
668  %4 = tail call fast float @acosf(float %vecext.3)
669  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
670  ret <4 x float> %vecins.3
671}
672declare float @atanf(float) readonly nounwind willreturn
673define <4 x float> @atan_4x(<4 x float>* %a) {
674; CHECK-LABEL: @atan_4x(
675; CHECK-NEXT:  entry:
676; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
677; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
678; CHECK-NEXT:    ret <4 x float> [[TMP1]]
679;
680; NOACCELERATE-LABEL: @atan_4x(
681; NOACCELERATE-NEXT:  entry:
682; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
683; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
684; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
685; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
686; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
687; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
688; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
689; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
690; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
691; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
692; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
693; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
694; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
695; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
696;
697entry:
698  %0 = load <4 x float>, <4 x float>* %a, align 16
699  %vecext = extractelement <4 x float> %0, i32 0
700  %1 = tail call fast float @atanf(float %vecext)
701  %vecins = insertelement <4 x float> undef, float %1, i32 0
702  %vecext.1 = extractelement <4 x float> %0, i32 1
703  %2 = tail call fast float @atanf(float %vecext.1)
704  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
705  %vecext.2 = extractelement <4 x float> %0, i32 2
706  %3 = tail call fast float @atanf(float %vecext.2)
707  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
708  %vecext.3 = extractelement <4 x float> %0, i32 3
709  %4 = tail call fast float @atanf(float %vecext.3)
710  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
711  ret <4 x float> %vecins.3
712}
713declare float @sinhf(float) readonly nounwind willreturn
714define <4 x float> @sinh_4x(<4 x float>* %a) {
715; CHECK-LABEL: @sinh_4x(
716; CHECK-NEXT:  entry:
717; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
718; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
719; CHECK-NEXT:    ret <4 x float> [[TMP1]]
720;
721; NOACCELERATE-LABEL: @sinh_4x(
722; NOACCELERATE-NEXT:  entry:
723; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
724; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
725; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
726; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
727; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
728; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
729; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
730; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
731; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
732; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
733; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
734; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
735; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
736; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
737;
738entry:
739  %0 = load <4 x float>, <4 x float>* %a, align 16
740  %vecext = extractelement <4 x float> %0, i32 0
741  %1 = tail call fast float @sinhf(float %vecext)
742  %vecins = insertelement <4 x float> undef, float %1, i32 0
743  %vecext.1 = extractelement <4 x float> %0, i32 1
744  %2 = tail call fast float @sinhf(float %vecext.1)
745  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
746  %vecext.2 = extractelement <4 x float> %0, i32 2
747  %3 = tail call fast float @sinhf(float %vecext.2)
748  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
749  %vecext.3 = extractelement <4 x float> %0, i32 3
750  %4 = tail call fast float @sinhf(float %vecext.3)
751  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
752  ret <4 x float> %vecins.3
753}
754declare float @coshf(float) readonly nounwind willreturn
755define <4 x float> @cosh_4x(<4 x float>* %a) {
756; CHECK-LABEL: @cosh_4x(
757; CHECK-NEXT:  entry:
758; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
759; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
760; CHECK-NEXT:    ret <4 x float> [[TMP1]]
761;
762; NOACCELERATE-LABEL: @cosh_4x(
763; NOACCELERATE-NEXT:  entry:
764; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
765; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
766; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
767; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
768; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
769; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
770; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
771; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
772; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
773; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
774; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
775; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
776; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
777; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
778;
779entry:
780  %0 = load <4 x float>, <4 x float>* %a, align 16
781  %vecext = extractelement <4 x float> %0, i32 0
782  %1 = tail call fast float @coshf(float %vecext)
783  %vecins = insertelement <4 x float> undef, float %1, i32 0
784  %vecext.1 = extractelement <4 x float> %0, i32 1
785  %2 = tail call fast float @coshf(float %vecext.1)
786  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
787  %vecext.2 = extractelement <4 x float> %0, i32 2
788  %3 = tail call fast float @coshf(float %vecext.2)
789  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
790  %vecext.3 = extractelement <4 x float> %0, i32 3
791  %4 = tail call fast float @coshf(float %vecext.3)
792  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
793  ret <4 x float> %vecins.3
794}
795declare float @tanhf(float) readonly nounwind willreturn
796define <4 x float> @tanh_4x(<4 x float>* %a) {
797; CHECK-LABEL: @tanh_4x(
798; CHECK-NEXT:  entry:
799; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
800; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
801; CHECK-NEXT:    ret <4 x float> [[TMP1]]
802;
803; NOACCELERATE-LABEL: @tanh_4x(
804; NOACCELERATE-NEXT:  entry:
805; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
806; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
807; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
808; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
809; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
810; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
811; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
812; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
813; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
814; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
815; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
816; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
817; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
818; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
819;
820entry:
821  %0 = load <4 x float>, <4 x float>* %a, align 16
822  %vecext = extractelement <4 x float> %0, i32 0
823  %1 = tail call fast float @tanhf(float %vecext)
824  %vecins = insertelement <4 x float> undef, float %1, i32 0
825  %vecext.1 = extractelement <4 x float> %0, i32 1
826  %2 = tail call fast float @tanhf(float %vecext.1)
827  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
828  %vecext.2 = extractelement <4 x float> %0, i32 2
829  %3 = tail call fast float @tanhf(float %vecext.2)
830  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
831  %vecext.3 = extractelement <4 x float> %0, i32 3
832  %4 = tail call fast float @tanhf(float %vecext.3)
833  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
834  ret <4 x float> %vecins.3
835}
836declare float @asinhf(float) readonly nounwind willreturn
837define <4 x float> @asinh_4x(<4 x float>* %a) {
838; CHECK-LABEL: @asinh_4x(
839; CHECK-NEXT:  entry:
840; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
841; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]])
842; CHECK-NEXT:    ret <4 x float> [[TMP1]]
843;
844; NOACCELERATE-LABEL: @asinh_4x(
845; NOACCELERATE-NEXT:  entry:
846; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
847; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
848; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
849; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
850; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
851; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
852; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
853; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
854; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
855; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
856; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
857; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
858; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
859; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
860;
861entry:
862  %0 = load <4 x float>, <4 x float>* %a, align 16
863  %vecext = extractelement <4 x float> %0, i32 0
864  %1 = tail call fast float @asinhf(float %vecext)
865  %vecins = insertelement <4 x float> undef, float %1, i32 0
866  %vecext.1 = extractelement <4 x float> %0, i32 1
867  %2 = tail call fast float @asinhf(float %vecext.1)
868  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
869  %vecext.2 = extractelement <4 x float> %0, i32 2
870  %3 = tail call fast float @asinhf(float %vecext.2)
871  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
872  %vecext.3 = extractelement <4 x float> %0, i32 3
873  %4 = tail call fast float @asinhf(float %vecext.3)
874  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
875  ret <4 x float> %vecins.3
876}
877declare float @acoshf(float) readonly nounwind willreturn
878define <4 x float> @acosh_4x(<4 x float>* %a) {
879; CHECK-LABEL: @acosh_4x(
880; CHECK-NEXT:  entry:
881; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
882; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]])
883; CHECK-NEXT:    ret <4 x float> [[TMP1]]
884;
885; NOACCELERATE-LABEL: @acosh_4x(
886; NOACCELERATE-NEXT:  entry:
887; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
888; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
889; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
890; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
891; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
892; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
893; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
894; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
895; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
896; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
897; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
898; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
899; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
900; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
901;
902entry:
903  %0 = load <4 x float>, <4 x float>* %a, align 16
904  %vecext = extractelement <4 x float> %0, i32 0
905  %1 = tail call fast float @acoshf(float %vecext)
906  %vecins = insertelement <4 x float> undef, float %1, i32 0
907  %vecext.1 = extractelement <4 x float> %0, i32 1
908  %2 = tail call fast float @acoshf(float %vecext.1)
909  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
910  %vecext.2 = extractelement <4 x float> %0, i32 2
911  %3 = tail call fast float @acoshf(float %vecext.2)
912  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
913  %vecext.3 = extractelement <4 x float> %0, i32 3
914  %4 = tail call fast float @acoshf(float %vecext.3)
915  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
916  ret <4 x float> %vecins.3
917}
918declare float @atanhf(float) readonly nounwind willreturn
919define <4 x float> @atanh_4x(<4 x float>* %a) {
920; CHECK-LABEL: @atanh_4x(
921; CHECK-NEXT:  entry:
922; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
923; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]])
924; CHECK-NEXT:    ret <4 x float> [[TMP1]]
925;
926; NOACCELERATE-LABEL: @atanh_4x(
927; NOACCELERATE-NEXT:  entry:
928; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
929; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
930; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
931; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
932; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
933; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
934; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
935; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
936; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
937; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
938; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
939; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
940; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
941; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
942;
943entry:
944  %0 = load <4 x float>, <4 x float>* %a, align 16
945  %vecext = extractelement <4 x float> %0, i32 0
946  %1 = tail call fast float @atanhf(float %vecext)
947  %vecins = insertelement <4 x float> undef, float %1, i32 0
948  %vecext.1 = extractelement <4 x float> %0, i32 1
949  %2 = tail call fast float @atanhf(float %vecext.1)
950  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
951  %vecext.2 = extractelement <4 x float> %0, i32 2
952  %3 = tail call fast float @atanhf(float %vecext.2)
953  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
954  %vecext.3 = extractelement <4 x float> %0, i32 3
955  %4 = tail call fast float @atanhf(float %vecext.3)
956  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
957  ret <4 x float> %vecins.3
958}
959
960; Accelerate *does not* provide sin() for <2 x float>.
961define <2 x float> @sin_2x(<2 x float>* %a) {
962; CHECK-LABEL: @sin_2x(
963; CHECK-NEXT:  entry:
964; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
965; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
966; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
967; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
968; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
969; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #[[ATTR2]]
970; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
971; CHECK-NEXT:    ret <2 x float> [[VECINS_1]]
972;
973; NOACCELERATE-LABEL: @sin_2x(
974; NOACCELERATE-NEXT:  entry:
975; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
976; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
977; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
978; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
979; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
980; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
981; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
982; NOACCELERATE-NEXT:    ret <2 x float> [[VECINS_1]]
983;
984entry:
985  %0 = load <2 x float>, <2 x float>* %a, align 16
986  %vecext = extractelement <2 x float> %0, i32 0
987  %1 = tail call fast float @llvm.sin.f32(float %vecext)
988  %vecins = insertelement <2 x float> undef, float %1, i32 0
989  %vecext.1 = extractelement <2 x float> %0, i32 1
990  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
991  %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1
992  ret <2 x float> %vecins.1
993}
994
995
996declare float @llvm.cos.f32(float)
997
998; Accelerate provides cos() for <4 x float>
999define <4 x float> @int_cos_4x(<4 x float>* %a) {
1000; CHECK-LABEL: @int_cos_4x(
1001; CHECK-NEXT:  entry:
1002; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
1003; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
1004; CHECK-NEXT:    ret <4 x float> [[TMP1]]
1005;
1006; NOACCELERATE-LABEL: @int_cos_4x(
1007; NOACCELERATE-NEXT:  entry:
1008; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
1009; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1010; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
1011; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1012; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1013; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
1014; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1015; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1016; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1017; NOACCELERATE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[VECEXT_2]], i32 0
1018; NOACCELERATE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_3]], i32 1
1019; NOACCELERATE-NEXT:    [[TMP5:%.*]] = call fast <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4]])
1020; NOACCELERATE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1021; NOACCELERATE-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1022; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_31]]
1023;
1024entry:
1025  %0 = load <4 x float>, <4 x float>* %a, align 16
1026  %vecext = extractelement <4 x float> %0, i32 0
1027  %1 = tail call fast float @llvm.cos.f32(float %vecext)
1028  %vecins = insertelement <4 x float> undef, float %1, i32 0
1029  %vecext.1 = extractelement <4 x float> %0, i32 1
1030  %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
1031  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1032  %vecext.2 = extractelement <4 x float> %0, i32 2
1033  %3 = tail call fast float @llvm.cos.f32(float %vecext.2)
1034  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1035  %vecext.3 = extractelement <4 x float> %0, i32 3
1036  %4 = tail call fast float @llvm.cos.f32(float %vecext.3)
1037  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1038  ret <4 x float> %vecins.3
1039}
1040
1041; Accelerate *does not* provide cos() for <2 x float>.
1042define <2 x float> @cos_2x(<2 x float>* %a) {
1043; CHECK-LABEL: @cos_2x(
1044; CHECK-NEXT:  entry:
1045; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
1046; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
1047; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #[[ATTR3:[0-9]+]]
1048; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
1049; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
1050; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #[[ATTR3]]
1051; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
1052; CHECK-NEXT:    ret <2 x float> [[VECINS_1]]
1053;
1054; NOACCELERATE-LABEL: @cos_2x(
1055; NOACCELERATE-NEXT:  entry:
1056; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
1057; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
1058; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
1059; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
1060; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
1061; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
1062; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
1063; NOACCELERATE-NEXT:    ret <2 x float> [[VECINS_1]]
1064;
1065entry:
1066  %0 = load <2 x float>, <2 x float>* %a, align 16
1067  %vecext = extractelement <2 x float> %0, i32 0
1068  %1 = tail call fast float @llvm.cos.f32(float %vecext)
1069  %vecins = insertelement <2 x float> undef, float %1, i32 0
1070  %vecext.1 = extractelement <2 x float> %0, i32 1
1071  %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
1072  %vecins.1 = insertelement <2 x float> %vecins, float %2, i32 1
1073  ret <2 x float> %vecins.1
1074}
1075