1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS
10
11;
12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
13;
14
15define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
16; FMA-LABEL: test_16f32_fmadd:
17; FMA:       # BB#0:
18; FMA-NEXT:    vfmadd213ps %ymm4, %ymm2, %ymm0
19; FMA-NEXT:    vfmadd213ps %ymm5, %ymm3, %ymm1
20; FMA-NEXT:    retq
21;
22; FMA4-LABEL: test_16f32_fmadd:
23; FMA4:       # BB#0:
24; FMA4-NEXT:    vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
25; FMA4-NEXT:    vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
26; FMA4-NEXT:    retq
27;
28; AVX512-LABEL: test_16f32_fmadd:
29; AVX512:       # BB#0:
30; AVX512-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
31; AVX512-NEXT:    retq
32  %x = fmul <16 x float> %a0, %a1
33  %res = fadd <16 x float> %x, %a2
34  ret <16 x float> %res
35}
36
37define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
38; FMA-LABEL: test_8f64_fmadd:
39; FMA:       # BB#0:
40; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
41; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
42; FMA-NEXT:    retq
43;
44; FMA4-LABEL: test_8f64_fmadd:
45; FMA4:       # BB#0:
46; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
47; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
48; FMA4-NEXT:    retq
49;
50; AVX512-LABEL: test_8f64_fmadd:
51; AVX512:       # BB#0:
52; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
53; AVX512-NEXT:    retq
54  %x = fmul <8 x double> %a0, %a1
55  %res = fadd <8 x double> %x, %a2
56  ret <8 x double> %res
57}
58
59;
60; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
61;
62
63define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
64; FMA-LABEL: test_16f32_fmsub:
65; FMA:       # BB#0:
66; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
67; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
68; FMA-NEXT:    retq
69;
70; FMA4-LABEL: test_16f32_fmsub:
71; FMA4:       # BB#0:
72; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
73; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
74; FMA4-NEXT:    retq
75;
76; AVX512-LABEL: test_16f32_fmsub:
77; AVX512:       # BB#0:
78; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
79; AVX512-NEXT:    retq
80  %x = fmul <16 x float> %a0, %a1
81  %res = fsub <16 x float> %x, %a2
82  ret <16 x float> %res
83}
84
85define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
86; FMA-LABEL: test_8f64_fmsub:
87; FMA:       # BB#0:
88; FMA-NEXT:    vfmsub213pd %ymm4, %ymm2, %ymm0
89; FMA-NEXT:    vfmsub213pd %ymm5, %ymm3, %ymm1
90; FMA-NEXT:    retq
91;
92; FMA4-LABEL: test_8f64_fmsub:
93; FMA4:       # BB#0:
94; FMA4-NEXT:    vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
95; FMA4-NEXT:    vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
96; FMA4-NEXT:    retq
97;
98; AVX512-LABEL: test_8f64_fmsub:
99; AVX512:       # BB#0:
100; AVX512-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
101; AVX512-NEXT:    retq
102  %x = fmul <8 x double> %a0, %a1
103  %res = fsub <8 x double> %x, %a2
104  ret <8 x double> %res
105}
106
107;
108; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
109;
110
111define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
112; FMA-LABEL: test_16f32_fnmadd:
113; FMA:       # BB#0:
114; FMA-NEXT:    vfnmadd213ps %ymm4, %ymm2, %ymm0
115; FMA-NEXT:    vfnmadd213ps %ymm5, %ymm3, %ymm1
116; FMA-NEXT:    retq
117;
118; FMA4-LABEL: test_16f32_fnmadd:
119; FMA4:       # BB#0:
120; FMA4-NEXT:    vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
121; FMA4-NEXT:    vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
122; FMA4-NEXT:    retq
123;
124; AVX512-LABEL: test_16f32_fnmadd:
125; AVX512:       # BB#0:
126; AVX512-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
127; AVX512-NEXT:    retq
128  %x = fmul <16 x float> %a0, %a1
129  %res = fsub <16 x float> %a2, %x
130  ret <16 x float> %res
131}
132
133define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
134; FMA-LABEL: test_8f64_fnmadd:
135; FMA:       # BB#0:
136; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
137; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
138; FMA-NEXT:    retq
139;
140; FMA4-LABEL: test_8f64_fnmadd:
141; FMA4:       # BB#0:
142; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
143; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
144; FMA4-NEXT:    retq
145;
146; AVX512-LABEL: test_8f64_fnmadd:
147; AVX512:       # BB#0:
148; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
149; AVX512-NEXT:    retq
150  %x = fmul <8 x double> %a0, %a1
151  %res = fsub <8 x double> %a2, %x
152  ret <8 x double> %res
153}
154
155;
156; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
157;
158
159define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
160; FMA-LABEL: test_16f32_fnmsub:
161; FMA:       # BB#0:
162; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
163; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
164; FMA-NEXT:    retq
165;
166; FMA4-LABEL: test_16f32_fnmsub:
167; FMA4:       # BB#0:
168; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
169; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
170; FMA4-NEXT:    retq
171;
172; AVX512-LABEL: test_16f32_fnmsub:
173; AVX512:       # BB#0:
174; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
175; AVX512-NEXT:    retq
176  %x = fmul <16 x float> %a0, %a1
177  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
178  %res = fsub <16 x float> %y, %a2
179  ret <16 x float> %res
180}
181
182define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
183; FMA-LABEL: test_8f64_fnmsub:
184; FMA:       # BB#0:
185; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
186; FMA-NEXT:    vfnmsub213pd %ymm5, %ymm3, %ymm1
187; FMA-NEXT:    retq
188;
189; FMA4-LABEL: test_8f64_fnmsub:
190; FMA4:       # BB#0:
191; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
192; FMA4-NEXT:    vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
193; FMA4-NEXT:    retq
194;
195; AVX512-LABEL: test_8f64_fnmsub:
196; AVX512:       # BB#0:
197; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
198; AVX512-NEXT:    retq
199  %x = fmul <8 x double> %a0, %a1
200  %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
201  %res = fsub <8 x double> %y, %a2
202  ret <8 x double> %res
203}
204
205;
206; Load Folding Patterns
207;
208
209define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) {
210; FMA-LABEL: test_16f32_fmadd_load:
211; FMA:       # BB#0:
212; FMA-NEXT:    vfmadd132ps (%rdi), %ymm2, %ymm0
213; FMA-NEXT:    vfmadd132ps 32(%rdi), %ymm3, %ymm1
214; FMA-NEXT:    retq
215;
216; FMA4-LABEL: test_16f32_fmadd_load:
217; FMA4:       # BB#0:
218; FMA4-NEXT:    vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
219; FMA4-NEXT:    vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
220; FMA4-NEXT:    retq
221;
222; AVX512-LABEL: test_16f32_fmadd_load:
223; AVX512:       # BB#0:
224; AVX512-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0
225; AVX512-NEXT:    retq
226  %x = load <16 x float>, <16 x float>* %a0
227  %y = fmul <16 x float> %x, %a1
228  %res = fadd <16 x float> %y, %a2
229  ret <16 x float> %res
230}
231
232define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) {
233; FMA-LABEL: test_8f64_fmsub_load:
234; FMA:       # BB#0:
235; FMA-NEXT:    vfmsub132pd (%rdi), %ymm2, %ymm0
236; FMA-NEXT:    vfmsub132pd 32(%rdi), %ymm3, %ymm1
237; FMA-NEXT:    retq
238;
239; FMA4-LABEL: test_8f64_fmsub_load:
240; FMA4:       # BB#0:
241; FMA4-NEXT:    vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
242; FMA4-NEXT:    vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
243; FMA4-NEXT:    retq
244;
245; AVX512-LABEL: test_8f64_fmsub_load:
246; AVX512:       # BB#0:
247; AVX512-NEXT:    vfmsub132pd (%rdi), %zmm1, %zmm0
248; AVX512-NEXT:    retq
249  %x = load <8 x double>, <8 x double>* %a0
250  %y = fmul <8 x double> %x, %a1
251  %res = fsub <8 x double> %y, %a2
252  ret <8 x double> %res
253}
254
255;
256; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
257;
258
259define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
260; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y:
261; FMA-INFS:       # BB#0:
262; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
263; FMA-INFS-NEXT:    vaddps %ymm4, %ymm1, %ymm1
264; FMA-INFS-NEXT:    vaddps %ymm4, %ymm0, %ymm0
265; FMA-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
266; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
267; FMA-INFS-NEXT:    retq
268;
269; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y:
270; FMA4-INFS:       # BB#0:
271; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
272; FMA4-INFS-NEXT:    vaddps %ymm4, %ymm1, %ymm1
273; FMA4-INFS-NEXT:    vaddps %ymm4, %ymm0, %ymm0
274; FMA4-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
275; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
276; FMA4-INFS-NEXT:    retq
277;
278; AVX512-INFS-LABEL: test_v16f32_mul_add_x_one_y:
279; AVX512-INFS:       # BB#0:
280; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
281; AVX512-INFS-NEXT:    vmulps %zmm1, %zmm0, %zmm0
282; AVX512-INFS-NEXT:    retq
283;
284; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
285; FMA-NOINFS:       # BB#0:
286; FMA-NOINFS-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
287; FMA-NOINFS-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
288; FMA-NOINFS-NEXT:    retq
289;
290; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
291; FMA4-NOINFS:       # BB#0:
292; FMA4-NOINFS-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
293; FMA4-NOINFS-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
294; FMA4-NOINFS-NEXT:    retq
295;
296; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y:
297; AVX512-NOINFS:       # BB#0:
298; AVX512-NOINFS-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
299; AVX512-NOINFS-NEXT:    retq
300  %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
301  %m = fmul <16 x float> %a, %y
302  ret <16 x float> %m
303}
304
305define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
306; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one:
307; FMA-INFS:       # BB#0:
308; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
309; FMA-INFS-NEXT:    vaddpd %ymm4, %ymm1, %ymm1
310; FMA-INFS-NEXT:    vaddpd %ymm4, %ymm0, %ymm0
311; FMA-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
312; FMA-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
313; FMA-INFS-NEXT:    retq
314;
315; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one:
316; FMA4-INFS:       # BB#0:
317; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
318; FMA4-INFS-NEXT:    vaddpd %ymm4, %ymm1, %ymm1
319; FMA4-INFS-NEXT:    vaddpd %ymm4, %ymm0, %ymm0
320; FMA4-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
321; FMA4-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
322; FMA4-INFS-NEXT:    retq
323;
324; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_one:
325; AVX512-INFS:       # BB#0:
326; AVX512-INFS-NEXT:    vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
327; AVX512-INFS-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
328; AVX512-INFS-NEXT:    retq
329;
330; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
331; FMA-NOINFS:       # BB#0:
332; FMA-NOINFS-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
333; FMA-NOINFS-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
334; FMA-NOINFS-NEXT:    retq
335;
336; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
337; FMA4-NOINFS:       # BB#0:
338; FMA4-NOINFS-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
339; FMA4-NOINFS-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
340; FMA4-NOINFS-NEXT:    retq
341;
342; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one:
343; AVX512-NOINFS:       # BB#0:
344; AVX512-NOINFS-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
345; AVX512-NOINFS-NEXT:    retq
346  %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
347  %m = fmul <8 x double> %y, %a
348  ret <8 x double> %m
349}
350
351define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
352; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
353; FMA-INFS:       # BB#0:
354; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
355; FMA-INFS-NEXT:    vaddps %ymm4, %ymm1, %ymm1
356; FMA-INFS-NEXT:    vaddps %ymm4, %ymm0, %ymm0
357; FMA-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
358; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
359; FMA-INFS-NEXT:    retq
360;
361; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
362; FMA4-INFS:       # BB#0:
363; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
364; FMA4-INFS-NEXT:    vaddps %ymm4, %ymm1, %ymm1
365; FMA4-INFS-NEXT:    vaddps %ymm4, %ymm0, %ymm0
366; FMA4-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
367; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
368; FMA4-INFS-NEXT:    retq
369;
370; AVX512-INFS-LABEL: test_v16f32_mul_add_x_negone_y:
371; AVX512-INFS:       # BB#0:
372; AVX512-INFS-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
373; AVX512-INFS-NEXT:    vmulps %zmm1, %zmm0, %zmm0
374; AVX512-INFS-NEXT:    retq
375;
376; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
377; FMA-NOINFS:       # BB#0:
378; FMA-NOINFS-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
379; FMA-NOINFS-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
380; FMA-NOINFS-NEXT:    retq
381;
382; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
383; FMA4-NOINFS:       # BB#0:
384; FMA4-NOINFS-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
385; FMA4-NOINFS-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
386; FMA4-NOINFS-NEXT:    retq
387;
388; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y:
389; AVX512-NOINFS:       # BB#0:
390; AVX512-NOINFS-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
391; AVX512-NOINFS-NEXT:    retq
392  %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
393  %m = fmul <16 x float> %a, %y
394  ret <16 x float> %m
395}
396
397define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
398; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
399; FMA-INFS:       # BB#0:
400; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
401; FMA-INFS-NEXT:    vaddpd %ymm4, %ymm1, %ymm1
402; FMA-INFS-NEXT:    vaddpd %ymm4, %ymm0, %ymm0
403; FMA-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
404; FMA-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
405; FMA-INFS-NEXT:    retq
406;
407; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
408; FMA4-INFS:       # BB#0:
409; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
410; FMA4-INFS-NEXT:    vaddpd %ymm4, %ymm1, %ymm1
411; FMA4-INFS-NEXT:    vaddpd %ymm4, %ymm0, %ymm0
412; FMA4-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
413; FMA4-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
414; FMA4-INFS-NEXT:    retq
415;
416; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_negone:
417; AVX512-INFS:       # BB#0:
418; AVX512-INFS-NEXT:    vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
419; AVX512-INFS-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
420; AVX512-INFS-NEXT:    retq
421;
422; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
423; FMA-NOINFS:       # BB#0:
424; FMA-NOINFS-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
425; FMA-NOINFS-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
426; FMA-NOINFS-NEXT:    retq
427;
428; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
429; FMA4-NOINFS:       # BB#0:
430; FMA4-NOINFS-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
431; FMA4-NOINFS-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
432; FMA4-NOINFS-NEXT:    retq
433;
434; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone:
435; AVX512-NOINFS:       # BB#0:
436; AVX512-NOINFS-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
437; AVX512-NOINFS-NEXT:    retq
438  %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
439  %m = fmul <8 x double> %y, %a
440  ret <8 x double> %m
441}
442
443define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
444; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
445; FMA-INFS:       # BB#0:
446; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
447; FMA-INFS-NEXT:    vsubps %ymm1, %ymm4, %ymm1
448; FMA-INFS-NEXT:    vsubps %ymm0, %ymm4, %ymm0
449; FMA-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
450; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
451; FMA-INFS-NEXT:    retq
452;
453; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
454; FMA4-INFS:       # BB#0:
455; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
456; FMA4-INFS-NEXT:    vsubps %ymm1, %ymm4, %ymm1
457; FMA4-INFS-NEXT:    vsubps %ymm0, %ymm4, %ymm0
458; FMA4-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
459; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
460; FMA4-INFS-NEXT:    retq
461;
462; AVX512-INFS-LABEL: test_v16f32_mul_sub_one_x_y:
463; AVX512-INFS:       # BB#0:
464; AVX512-INFS-NEXT:    vbroadcastss {{.*}}(%rip), %zmm2
465; AVX512-INFS-NEXT:    vsubps %zmm0, %zmm2, %zmm0
466; AVX512-INFS-NEXT:    vmulps %zmm1, %zmm0, %zmm0
467; AVX512-INFS-NEXT:    retq
468;
469; FMA-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
470; FMA-NOINFS:       # BB#0:
471; FMA-NOINFS-NEXT:    vfnmadd213ps %ymm2, %ymm2, %ymm0
472; FMA-NOINFS-NEXT:    vfnmadd213ps %ymm3, %ymm3, %ymm1
473; FMA-NOINFS-NEXT:    retq
474;
475; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
476; FMA4-NOINFS:       # BB#0:
477; FMA4-NOINFS-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
478; FMA4-NOINFS-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
479; FMA4-NOINFS-NEXT:    retq
480;
481; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y:
482; AVX512-NOINFS:       # BB#0:
483; AVX512-NOINFS-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm0
484; AVX512-NOINFS-NEXT:    retq
485  %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
486  %m = fmul <16 x float> %s, %y
487  ret <16 x float> %m
488}
489
490define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
491; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
492; FMA-INFS:       # BB#0:
493; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
494; FMA-INFS-NEXT:    vsubpd %ymm1, %ymm4, %ymm1
495; FMA-INFS-NEXT:    vsubpd %ymm0, %ymm4, %ymm0
496; FMA-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
497; FMA-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
498; FMA-INFS-NEXT:    retq
499;
500; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
501; FMA4-INFS:       # BB#0:
502; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
503; FMA4-INFS-NEXT:    vsubpd %ymm1, %ymm4, %ymm1
504; FMA4-INFS-NEXT:    vsubpd %ymm0, %ymm4, %ymm0
505; FMA4-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
506; FMA4-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
507; FMA4-INFS-NEXT:    retq
508;
509; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_one_x:
510; AVX512-INFS:       # BB#0:
511; AVX512-INFS-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm2
512; AVX512-INFS-NEXT:    vsubpd %zmm0, %zmm2, %zmm0
513; AVX512-INFS-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
514; AVX512-INFS-NEXT:    retq
515;
516; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
517; FMA-NOINFS:       # BB#0:
518; FMA-NOINFS-NEXT:    vfnmadd213pd %ymm2, %ymm2, %ymm0
519; FMA-NOINFS-NEXT:    vfnmadd213pd %ymm3, %ymm3, %ymm1
520; FMA-NOINFS-NEXT:    retq
521;
522; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
523; FMA4-NOINFS:       # BB#0:
524; FMA4-NOINFS-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
525; FMA4-NOINFS-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
526; FMA4-NOINFS-NEXT:    retq
527;
528; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x:
529; AVX512-NOINFS:       # BB#0:
530; AVX512-NOINFS-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm0
531; AVX512-NOINFS-NEXT:    retq
532  %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
533  %m = fmul <8 x double> %y, %s
534  ret <8 x double> %m
535}
536
537define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
538; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
539; FMA-INFS:       # BB#0:
540; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
541; FMA-INFS-NEXT:    vsubps %ymm1, %ymm4, %ymm1
542; FMA-INFS-NEXT:    vsubps %ymm0, %ymm4, %ymm0
543; FMA-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
544; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
545; FMA-INFS-NEXT:    retq
546;
547; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
548; FMA4-INFS:       # BB#0:
549; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
550; FMA4-INFS-NEXT:    vsubps %ymm1, %ymm4, %ymm1
551; FMA4-INFS-NEXT:    vsubps %ymm0, %ymm4, %ymm0
552; FMA4-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
553; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
554; FMA4-INFS-NEXT:    retq
555;
556; AVX512-INFS-LABEL: test_v16f32_mul_sub_negone_x_y:
557; AVX512-INFS:       # BB#0:
558; AVX512-INFS-NEXT:    vbroadcastss {{.*}}(%rip), %zmm2
559; AVX512-INFS-NEXT:    vsubps %zmm0, %zmm2, %zmm0
560; AVX512-INFS-NEXT:    vmulps %zmm1, %zmm0, %zmm0
561; AVX512-INFS-NEXT:    retq
562;
563; FMA-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
564; FMA-NOINFS:       # BB#0:
565; FMA-NOINFS-NEXT:    vfnmsub213ps %ymm2, %ymm2, %ymm0
566; FMA-NOINFS-NEXT:    vfnmsub213ps %ymm3, %ymm3, %ymm1
567; FMA-NOINFS-NEXT:    retq
568;
569; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
570; FMA4-NOINFS:       # BB#0:
571; FMA4-NOINFS-NEXT:    vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
572; FMA4-NOINFS-NEXT:    vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
573; FMA4-NOINFS-NEXT:    retq
574;
575; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y:
576; AVX512-NOINFS:       # BB#0:
577; AVX512-NOINFS-NEXT:    vfnmsub213ps %zmm1, %zmm1, %zmm0
578; AVX512-NOINFS-NEXT:    retq
579  %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
580  %m = fmul <16 x float> %s, %y
581  ret <16 x float> %m
582}
583
584define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
585; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
586; FMA-INFS:       # BB#0:
587; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
588; FMA-INFS-NEXT:    vsubpd %ymm1, %ymm4, %ymm1
589; FMA-INFS-NEXT:    vsubpd %ymm0, %ymm4, %ymm0
590; FMA-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
591; FMA-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
592; FMA-INFS-NEXT:    retq
593;
594; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
595; FMA4-INFS:       # BB#0:
596; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
597; FMA4-INFS-NEXT:    vsubpd %ymm1, %ymm4, %ymm1
598; FMA4-INFS-NEXT:    vsubpd %ymm0, %ymm4, %ymm0
599; FMA4-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
600; FMA4-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
601; FMA4-INFS-NEXT:    retq
602;
603; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_negone_x:
604; AVX512-INFS:       # BB#0:
605; AVX512-INFS-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm2
606; AVX512-INFS-NEXT:    vsubpd %zmm0, %zmm2, %zmm0
607; AVX512-INFS-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
608; AVX512-INFS-NEXT:    retq
609;
610; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
611; FMA-NOINFS:       # BB#0:
612; FMA-NOINFS-NEXT:    vfnmsub213pd %ymm2, %ymm2, %ymm0
613; FMA-NOINFS-NEXT:    vfnmsub213pd %ymm3, %ymm3, %ymm1
614; FMA-NOINFS-NEXT:    retq
615;
616; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
617; FMA4-NOINFS:       # BB#0:
618; FMA4-NOINFS-NEXT:    vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
619; FMA4-NOINFS-NEXT:    vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
620; FMA4-NOINFS-NEXT:    retq
621;
622; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x:
623; AVX512-NOINFS:       # BB#0:
624; AVX512-NOINFS-NEXT:    vfnmsub213pd %zmm1, %zmm1, %zmm0
625; AVX512-NOINFS-NEXT:    retq
626  %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
627  %m = fmul <8 x double> %y, %s
628  ret <8 x double> %m
629}
630
631define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
632; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
633; FMA-INFS:       # BB#0:
634; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
635; FMA-INFS-NEXT:    vsubps %ymm4, %ymm1, %ymm1
636; FMA-INFS-NEXT:    vsubps %ymm4, %ymm0, %ymm0
637; FMA-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
638; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
639; FMA-INFS-NEXT:    retq
640;
641; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
642; FMA4-INFS:       # BB#0:
643; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
644; FMA4-INFS-NEXT:    vsubps %ymm4, %ymm1, %ymm1
645; FMA4-INFS-NEXT:    vsubps %ymm4, %ymm0, %ymm0
646; FMA4-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
647; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
648; FMA4-INFS-NEXT:    retq
649;
650; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
651; AVX512-INFS:       # BB#0:
652; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0
653; AVX512-INFS-NEXT:    vmulps %zmm1, %zmm0, %zmm0
654; AVX512-INFS-NEXT:    retq
655;
656; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
657; FMA-NOINFS:       # BB#0:
658; FMA-NOINFS-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
659; FMA-NOINFS-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
660; FMA-NOINFS-NEXT:    retq
661;
662; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
663; FMA4-NOINFS:       # BB#0:
664; FMA4-NOINFS-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
665; FMA4-NOINFS-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
666; FMA4-NOINFS-NEXT:    retq
667;
668; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
669; AVX512-NOINFS:       # BB#0:
670; AVX512-NOINFS-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
671; AVX512-NOINFS-NEXT:    retq
672  %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
673  %m = fmul <16 x float> %s, %y
674  ret <16 x float> %m
675}
676
677define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
678; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
679; FMA-INFS:       # BB#0:
680; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
681; FMA-INFS-NEXT:    vsubpd %ymm4, %ymm1, %ymm1
682; FMA-INFS-NEXT:    vsubpd %ymm4, %ymm0, %ymm0
683; FMA-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
684; FMA-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
685; FMA-INFS-NEXT:    retq
686;
687; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
688; FMA4-INFS:       # BB#0:
689; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
690; FMA4-INFS-NEXT:    vsubpd %ymm4, %ymm1, %ymm1
691; FMA4-INFS-NEXT:    vsubpd %ymm4, %ymm0, %ymm0
692; FMA4-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
693; FMA4-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
694; FMA4-INFS-NEXT:    retq
695;
696; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
697; AVX512-INFS:       # BB#0:
698; AVX512-INFS-NEXT:    vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
699; AVX512-INFS-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
700; AVX512-INFS-NEXT:    retq
701;
702; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
703; FMA-NOINFS:       # BB#0:
704; FMA-NOINFS-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
705; FMA-NOINFS-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
706; FMA-NOINFS-NEXT:    retq
707;
708; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
709; FMA4-NOINFS:       # BB#0:
710; FMA4-NOINFS-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
711; FMA4-NOINFS-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
712; FMA4-NOINFS-NEXT:    retq
713;
714; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
715; AVX512-NOINFS:       # BB#0:
716; AVX512-NOINFS-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
717; AVX512-NOINFS-NEXT:    retq
718  %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
719  %m = fmul <8 x double> %y, %s
720  ret <8 x double> %m
721}
722
723define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
724; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
725; FMA-INFS:       # BB#0:
726; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
727; FMA-INFS-NEXT:    vsubps %ymm4, %ymm1, %ymm1
728; FMA-INFS-NEXT:    vsubps %ymm4, %ymm0, %ymm0
729; FMA-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
730; FMA-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
731; FMA-INFS-NEXT:    retq
732;
733; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
734; FMA4-INFS:       # BB#0:
735; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
736; FMA4-INFS-NEXT:    vsubps %ymm4, %ymm1, %ymm1
737; FMA4-INFS-NEXT:    vsubps %ymm4, %ymm0, %ymm0
738; FMA4-INFS-NEXT:    vmulps %ymm2, %ymm0, %ymm0
739; FMA4-INFS-NEXT:    vmulps %ymm3, %ymm1, %ymm1
740; FMA4-INFS-NEXT:    retq
741;
742; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
743; AVX512-INFS:       # BB#0:
744; AVX512-INFS-NEXT:    vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0
745; AVX512-INFS-NEXT:    vmulps %zmm1, %zmm0, %zmm0
746; AVX512-INFS-NEXT:    retq
747;
748; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
749; FMA-NOINFS:       # BB#0:
750; FMA-NOINFS-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
751; FMA-NOINFS-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
752; FMA-NOINFS-NEXT:    retq
753;
754; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
755; FMA4-NOINFS:       # BB#0:
756; FMA4-NOINFS-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
757; FMA4-NOINFS-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
758; FMA4-NOINFS-NEXT:    retq
759;
760; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
761; AVX512-NOINFS:       # BB#0:
762; AVX512-NOINFS-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
763; AVX512-NOINFS-NEXT:    retq
764  %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
765  %m = fmul <16 x float> %s, %y
766  ret <16 x float> %m
767}
768
769define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
770; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
771; FMA-INFS:       # BB#0:
772; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
773; FMA-INFS-NEXT:    vsubpd %ymm4, %ymm1, %ymm1
774; FMA-INFS-NEXT:    vsubpd %ymm4, %ymm0, %ymm0
775; FMA-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
776; FMA-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
777; FMA-INFS-NEXT:    retq
778;
779; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
780; FMA4-INFS:       # BB#0:
781; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00]
782; FMA4-INFS-NEXT:    vsubpd %ymm4, %ymm1, %ymm1
783; FMA4-INFS-NEXT:    vsubpd %ymm4, %ymm0, %ymm0
784; FMA4-INFS-NEXT:    vmulpd %ymm0, %ymm2, %ymm0
785; FMA4-INFS-NEXT:    vmulpd %ymm1, %ymm3, %ymm1
786; FMA4-INFS-NEXT:    retq
787;
788; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
789; AVX512-INFS:       # BB#0:
790; AVX512-INFS-NEXT:    vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
791; AVX512-INFS-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
792; AVX512-INFS-NEXT:    retq
793;
794; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
795; FMA-NOINFS:       # BB#0:
796; FMA-NOINFS-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
797; FMA-NOINFS-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
798; FMA-NOINFS-NEXT:    retq
799;
800; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
801; FMA4-NOINFS:       # BB#0:
802; FMA4-NOINFS-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
803; FMA4-NOINFS-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
804; FMA4-NOINFS-NEXT:    retq
805;
806; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
807; AVX512-NOINFS:       # BB#0:
808; AVX512-NOINFS-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
809; AVX512-NOINFS-NEXT:    retq
810  %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
811  %m = fmul <8 x double> %y, %s
812  ret <8 x double> %m
813}
814
815;
816; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
817;
818
819define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
820; FMA-INFS-LABEL: test_v16f32_interp:
821; FMA-INFS:       # BB#0:
822; FMA-INFS-NEXT:    vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
823; FMA-INFS-NEXT:    vsubps %ymm4, %ymm6, %ymm7
824; FMA-INFS-NEXT:    vsubps %ymm5, %ymm6, %ymm6
825; FMA-INFS-NEXT:    vmulps %ymm6, %ymm3, %ymm3
826; FMA-INFS-NEXT:    vmulps %ymm7, %ymm2, %ymm2
827; FMA-INFS-NEXT:    vfmadd213ps %ymm2, %ymm4, %ymm0
828; FMA-INFS-NEXT:    vfmadd213ps %ymm3, %ymm5, %ymm1
829; FMA-INFS-NEXT:    retq
830;
831; FMA4-INFS-LABEL: test_v16f32_interp:
832; FMA4-INFS:       # BB#0:
833; FMA4-INFS-NEXT:    vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
834; FMA4-INFS-NEXT:    vsubps %ymm4, %ymm6, %ymm7
835; FMA4-INFS-NEXT:    vsubps %ymm5, %ymm6, %ymm6
836; FMA4-INFS-NEXT:    vmulps %ymm6, %ymm3, %ymm3
837; FMA4-INFS-NEXT:    vmulps %ymm7, %ymm2, %ymm2
838; FMA4-INFS-NEXT:    vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
839; FMA4-INFS-NEXT:    vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
840; FMA4-INFS-NEXT:    retq
841;
842; AVX512-INFS-LABEL: test_v16f32_interp:
843; AVX512-INFS:       # BB#0:
844; AVX512-INFS-NEXT:    vbroadcastss {{.*}}(%rip), %zmm3
845; AVX512-INFS-NEXT:    vsubps %zmm2, %zmm3, %zmm3
846; AVX512-INFS-NEXT:    vmulps %zmm3, %zmm1, %zmm1
847; AVX512-INFS-NEXT:    vfmadd213ps %zmm1, %zmm2, %zmm0
848; AVX512-INFS-NEXT:    retq
849;
850; FMA-NOINFS-LABEL: test_v16f32_interp:
851; FMA-NOINFS:       # BB#0:
852; FMA-NOINFS-NEXT:    vfnmadd213ps %ymm3, %ymm5, %ymm3
853; FMA-NOINFS-NEXT:    vfnmadd213ps %ymm2, %ymm4, %ymm2
854; FMA-NOINFS-NEXT:    vfmadd213ps %ymm2, %ymm4, %ymm0
855; FMA-NOINFS-NEXT:    vfmadd213ps %ymm3, %ymm5, %ymm1
856; FMA-NOINFS-NEXT:    retq
857;
858; FMA4-NOINFS-LABEL: test_v16f32_interp:
859; FMA4-NOINFS:       # BB#0:
860; FMA4-NOINFS-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
861; FMA4-NOINFS-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
862; FMA4-NOINFS-NEXT:    vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
863; FMA4-NOINFS-NEXT:    vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
864; FMA4-NOINFS-NEXT:    retq
865;
866; AVX512-NOINFS-LABEL: test_v16f32_interp:
867; AVX512-NOINFS:       # BB#0:
868; AVX512-NOINFS-NEXT:    vfnmadd213ps %zmm1, %zmm2, %zmm1
869; AVX512-NOINFS-NEXT:    vfmadd213ps %zmm1, %zmm2, %zmm0
870; AVX512-NOINFS-NEXT:    retq
871  %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
872  %tx = fmul <16 x float> %x, %t
873  %ty = fmul <16 x float> %y, %t1
874  %r = fadd <16 x float> %tx, %ty
875  ret <16 x float> %r
876}
877
878define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
879; FMA-INFS-LABEL: test_v8f64_interp:
880; FMA-INFS:       # BB#0:
881; FMA-INFS-NEXT:    vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
882; FMA-INFS-NEXT:    vsubpd %ymm4, %ymm6, %ymm7
883; FMA-INFS-NEXT:    vsubpd %ymm5, %ymm6, %ymm6
884; FMA-INFS-NEXT:    vmulpd %ymm6, %ymm3, %ymm3
885; FMA-INFS-NEXT:    vmulpd %ymm7, %ymm2, %ymm2
886; FMA-INFS-NEXT:    vfmadd213pd %ymm2, %ymm4, %ymm0
887; FMA-INFS-NEXT:    vfmadd213pd %ymm3, %ymm5, %ymm1
888; FMA-INFS-NEXT:    retq
889;
890; FMA4-INFS-LABEL: test_v8f64_interp:
891; FMA4-INFS:       # BB#0:
892; FMA4-INFS-NEXT:    vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
893; FMA4-INFS-NEXT:    vsubpd %ymm4, %ymm6, %ymm7
894; FMA4-INFS-NEXT:    vsubpd %ymm5, %ymm6, %ymm6
895; FMA4-INFS-NEXT:    vmulpd %ymm6, %ymm3, %ymm3
896; FMA4-INFS-NEXT:    vmulpd %ymm7, %ymm2, %ymm2
897; FMA4-INFS-NEXT:    vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
898; FMA4-INFS-NEXT:    vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
899; FMA4-INFS-NEXT:    retq
900;
901; AVX512-INFS-LABEL: test_v8f64_interp:
902; AVX512-INFS:       # BB#0:
903; AVX512-INFS-NEXT:    vbroadcastsd {{.*}}(%rip), %zmm3
904; AVX512-INFS-NEXT:    vsubpd %zmm2, %zmm3, %zmm3
905; AVX512-INFS-NEXT:    vmulpd %zmm3, %zmm1, %zmm1
906; AVX512-INFS-NEXT:    vfmadd213pd %zmm1, %zmm2, %zmm0
907; AVX512-INFS-NEXT:    retq
908;
909; FMA-NOINFS-LABEL: test_v8f64_interp:
910; FMA-NOINFS:       # BB#0:
911; FMA-NOINFS-NEXT:    vfnmadd213pd %ymm3, %ymm5, %ymm3
912; FMA-NOINFS-NEXT:    vfnmadd213pd %ymm2, %ymm4, %ymm2
913; FMA-NOINFS-NEXT:    vfmadd213pd %ymm2, %ymm4, %ymm0
914; FMA-NOINFS-NEXT:    vfmadd213pd %ymm3, %ymm5, %ymm1
915; FMA-NOINFS-NEXT:    retq
916;
917; FMA4-NOINFS-LABEL: test_v8f64_interp:
918; FMA4-NOINFS:       # BB#0:
919; FMA4-NOINFS-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
920; FMA4-NOINFS-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
921; FMA4-NOINFS-NEXT:    vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
922; FMA4-NOINFS-NEXT:    vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
923; FMA4-NOINFS-NEXT:    retq
924;
925; AVX512-NOINFS-LABEL: test_v8f64_interp:
926; AVX512-NOINFS:       # BB#0:
927; AVX512-NOINFS-NEXT:    vfnmadd213pd %zmm1, %zmm2, %zmm1
928; AVX512-NOINFS-NEXT:    vfmadd213pd %zmm1, %zmm2, %zmm0
929; AVX512-NOINFS-NEXT:    retq
930  %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
931  %tx = fmul <8 x double> %x, %t
932  %ty = fmul <8 x double> %y, %t1
933  %r = fadd <8 x double> %tx, %ty
934  ret <8 x double> %r
935}
936
937;
938; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
939;
940
941define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
942; FMA-LABEL: test_v16f32_fneg_fmadd:
943; FMA:       # BB#0:
944; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
945; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
946; FMA-NEXT:    retq
947;
948; FMA4-LABEL: test_v16f32_fneg_fmadd:
949; FMA4:       # BB#0:
950; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
951; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
952; FMA4-NEXT:    retq
953;
954; AVX512-LABEL: test_v16f32_fneg_fmadd:
955; AVX512:       # BB#0:
956; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
957; AVX512-NEXT:    retq
958  %mul = fmul <16 x float> %a0, %a1
959  %add = fadd <16 x float> %mul, %a2
960  %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
961  ret <16 x float> %neg
962}
963
964define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
965; FMA-LABEL: test_v8f64_fneg_fmsub:
966; FMA:       # BB#0:
967; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
968; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
969; FMA-NEXT:    retq
970;
971; FMA4-LABEL: test_v8f64_fneg_fmsub:
972; FMA4:       # BB#0:
973; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
974; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
975; FMA4-NEXT:    retq
976;
977; AVX512-LABEL: test_v8f64_fneg_fmsub:
978; AVX512:       # BB#0:
979; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
980; AVX512-NEXT:    retq
981  %mul = fmul <8 x double> %a0, %a1
982  %sub = fsub <8 x double> %mul, %a2
983  %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
984  ret <8 x double> %neg
985}
986
987define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
988; FMA-LABEL: test_v16f32_fneg_fnmadd:
989; FMA:       # BB#0:
990; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
991; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
992; FMA-NEXT:    retq
993;
994; FMA4-LABEL: test_v16f32_fneg_fnmadd:
995; FMA4:       # BB#0:
996; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
997; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
998; FMA4-NEXT:    retq
999;
1000; AVX512-LABEL: test_v16f32_fneg_fnmadd:
1001; AVX512:       # BB#0:
1002; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
1003; AVX512-NEXT:    retq
1004  %mul = fmul <16 x float> %a0, %a1
1005  %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
1006  %add = fadd <16 x float> %neg0, %a2
1007  %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
1008  ret <16 x float> %neg1
1009}
1010
1011define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
1012; FMA-LABEL: test_v8f64_fneg_fnmsub:
1013; FMA:       # BB#0:
1014; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
1015; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
1016; FMA-NEXT:    retq
1017;
1018; FMA4-LABEL: test_v8f64_fneg_fnmsub:
1019; FMA4:       # BB#0:
1020; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
1021; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
1022; FMA4-NEXT:    retq
1023;
1024; AVX512-LABEL: test_v8f64_fneg_fnmsub:
1025; AVX512:       # BB#0:
1026; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
1027; AVX512-NEXT:    retq
1028  %mul = fmul <8 x double> %a0, %a1
1029  %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
1030  %sub = fsub <8 x double> %neg0, %a2
1031  %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1032  ret <8 x double> %neg1
1033}
1034
1035;
1036; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
1037;
1038
1039define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
1040; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
1041; FMA:       # BB#0:
1042; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
1043; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
1044; FMA-NEXT:    retq
1045;
1046; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
1047; FMA4:       # BB#0:
1048; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
1049; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
1050; FMA4-NEXT:    retq
1051;
1052; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
1053; AVX512:       # BB#0:
1054; AVX512-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
1055; AVX512-NEXT:    retq
1056  %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
1057  %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
1058  %a  = fadd <16 x float> %m0, %m1
1059  ret <16 x float> %a
1060}
1061
1062;
1063; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
1064;
1065
1066define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
1067; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
1068; FMA:       # BB#0:
1069; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
1070; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
1071; FMA-NEXT:    retq
1072;
1073; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
1074; FMA4:       # BB#0:
1075; FMA4-NEXT:    vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
1076; FMA4-NEXT:    vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
1077; FMA4-NEXT:    retq
1078;
1079; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
1080; AVX512:       # BB#0:
1081; AVX512-NEXT:    vfmadd132ps {{.*}}(%rip), %zmm1, %zmm0
1082; AVX512-NEXT:    retq
1083  %m0 = fmul <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
1084  %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
1085  %a  = fadd <16 x float> %m1, %y
1086  ret <16 x float> %a
1087}
1088
1089; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
1090
1091define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
1092; FMA-LABEL: test_v16f32_fneg_fmul:
1093; FMA:       # BB#0:
1094; FMA-NEXT:    vxorps %ymm4, %ymm4, %ymm4
1095; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
1096; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm3, %ymm1
1097; FMA-NEXT:    retq
1098;
1099; FMA4-LABEL: test_v16f32_fneg_fmul:
1100; FMA4:       # BB#0:
1101; FMA4-NEXT:    vxorps %ymm4, %ymm4, %ymm4
1102; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
1103; FMA4-NEXT:    vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
1104; FMA4-NEXT:    retq
1105;
1106; AVX512-LABEL: test_v16f32_fneg_fmul:
1107; AVX512:       # BB#0:
1108; AVX512-NEXT:    vxorps %zmm2, %zmm2, %zmm2
1109; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
1110; AVX512-NEXT:    retq
1111  %m = fmul nsz <16 x float> %x, %y
1112  %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m
1113  ret <16 x float> %n
1114}
1115
1116define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
1117; FMA-LABEL: test_v8f64_fneg_fmul:
1118; FMA:       # BB#0:
1119; FMA-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
1120; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
1121; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm3, %ymm1
1122; FMA-NEXT:    retq
1123;
1124; FMA4-LABEL: test_v8f64_fneg_fmul:
1125; FMA4:       # BB#0:
1126; FMA4-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
1127; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
1128; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
1129; FMA4-NEXT:    retq
1130;
1131; AVX512-LABEL: test_v8f64_fneg_fmul:
1132; AVX512:       # BB#0:
1133; AVX512-NEXT:    vxorpd %zmm2, %zmm2, %zmm2
1134; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
1135; AVX512-NEXT:    retq
1136  %m = fmul nsz <8 x double> %x, %y
1137  %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
1138  ret <8 x double> %n
1139}
1140
1141define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
1142; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
1143; FMA:       # BB#0:
1144; FMA-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
1145; FMA-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
1146; FMA-NEXT:    vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
1147; FMA-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
1148; FMA-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
1149; FMA-NEXT:    retq
1150;
1151; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz:
1152; FMA4:       # BB#0:
1153; FMA4-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
1154; FMA4-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
1155; FMA4-NEXT:    vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
1156; FMA4-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
1157; FMA4-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
1158; FMA4-NEXT:    retq
1159;
1160; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
1161; AVX512:       # BB#0:
1162; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
1163; AVX512-NEXT:    vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
1164; AVX512-NEXT:    retq
1165  %m = fmul <8 x double> %x, %y
1166  %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
1167  ret <8 x double> %n
1168}
1169
1170attributes #0 = { "unsafe-fp-math"="true" }
1171