1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VL
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VLDQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VLDQ
8
9define <2 x double> @fabs_v2f64(<2 x double> %p) {
10; X86-AVX-LABEL: fabs_v2f64:
11; X86-AVX:       # %bb.0:
12; X86-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
13; X86-AVX-NEXT:    retl
14;
15; X86-AVX512VL-LABEL: fabs_v2f64:
16; X86-AVX512VL:       # %bb.0:
17; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
18; X86-AVX512VL-NEXT:    retl
19;
20; X86-AVX512VLDQ-LABEL: fabs_v2f64:
21; X86-AVX512VLDQ:       # %bb.0:
22; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
23; X86-AVX512VLDQ-NEXT:    retl
24;
25; X64-AVX-LABEL: fabs_v2f64:
26; X64-AVX:       # %bb.0:
27; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
28; X64-AVX-NEXT:    retq
29;
30; X64-AVX512VL-LABEL: fabs_v2f64:
31; X64-AVX512VL:       # %bb.0:
32; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
33; X64-AVX512VL-NEXT:    retq
34;
35; X64-AVX512VLDQ-LABEL: fabs_v2f64:
36; X64-AVX512VLDQ:       # %bb.0:
37; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
38; X64-AVX512VLDQ-NEXT:    retq
39  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
40  ret <2 x double> %t
41}
42declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
43
44define <4 x float> @fabs_v4f32(<4 x float> %p) {
45; X86-AVX-LABEL: fabs_v4f32:
46; X86-AVX:       # %bb.0:
47; X86-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
48; X86-AVX-NEXT:    retl
49;
50; X86-AVX512VL-LABEL: fabs_v4f32:
51; X86-AVX512VL:       # %bb.0:
52; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
53; X86-AVX512VL-NEXT:    retl
54;
55; X86-AVX512VLDQ-LABEL: fabs_v4f32:
56; X86-AVX512VLDQ:       # %bb.0:
57; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
58; X86-AVX512VLDQ-NEXT:    retl
59;
60; X64-AVX-LABEL: fabs_v4f32:
61; X64-AVX:       # %bb.0:
62; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
63; X64-AVX-NEXT:    retq
64;
65; X64-AVX512VL-LABEL: fabs_v4f32:
66; X64-AVX512VL:       # %bb.0:
67; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
68; X64-AVX512VL-NEXT:    retq
69;
70; X64-AVX512VLDQ-LABEL: fabs_v4f32:
71; X64-AVX512VLDQ:       # %bb.0:
72; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
73; X64-AVX512VLDQ-NEXT:    retq
74  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
75  ret <4 x float> %t
76}
77declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
78
79define <4 x double> @fabs_v4f64(<4 x double> %p) {
80; X86-AVX-LABEL: fabs_v4f64:
81; X86-AVX:       # %bb.0:
82; X86-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
83; X86-AVX-NEXT:    retl
84;
85; X86-AVX512VL-LABEL: fabs_v4f64:
86; X86-AVX512VL:       # %bb.0:
87; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
88; X86-AVX512VL-NEXT:    retl
89;
90; X86-AVX512VLDQ-LABEL: fabs_v4f64:
91; X86-AVX512VLDQ:       # %bb.0:
92; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
93; X86-AVX512VLDQ-NEXT:    retl
94;
95; X64-AVX-LABEL: fabs_v4f64:
96; X64-AVX:       # %bb.0:
97; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
98; X64-AVX-NEXT:    retq
99;
100; X64-AVX512VL-LABEL: fabs_v4f64:
101; X64-AVX512VL:       # %bb.0:
102; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
103; X64-AVX512VL-NEXT:    retq
104;
105; X64-AVX512VLDQ-LABEL: fabs_v4f64:
106; X64-AVX512VLDQ:       # %bb.0:
107; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
108; X64-AVX512VLDQ-NEXT:    retq
109  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
110  ret <4 x double> %t
111}
112declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
113
114define <8 x float> @fabs_v8f32(<8 x float> %p) {
115; X86-AVX-LABEL: fabs_v8f32:
116; X86-AVX:       # %bb.0:
117; X86-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
118; X86-AVX-NEXT:    retl
119;
120; X86-AVX512VL-LABEL: fabs_v8f32:
121; X86-AVX512VL:       # %bb.0:
122; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
123; X86-AVX512VL-NEXT:    retl
124;
125; X86-AVX512VLDQ-LABEL: fabs_v8f32:
126; X86-AVX512VLDQ:       # %bb.0:
127; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
128; X86-AVX512VLDQ-NEXT:    retl
129;
130; X64-AVX-LABEL: fabs_v8f32:
131; X64-AVX:       # %bb.0:
132; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
133; X64-AVX-NEXT:    retq
134;
135; X64-AVX512VL-LABEL: fabs_v8f32:
136; X64-AVX512VL:       # %bb.0:
137; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
138; X64-AVX512VL-NEXT:    retq
139;
140; X64-AVX512VLDQ-LABEL: fabs_v8f32:
141; X64-AVX512VLDQ:       # %bb.0:
142; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
143; X64-AVX512VLDQ-NEXT:    retq
144  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
145  ret <8 x float> %t
146}
147declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
148
149define <8 x double> @fabs_v8f64(<8 x double> %p) {
150; X86-AVX-LABEL: fabs_v8f64:
151; X86-AVX:       # %bb.0:
152; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
153; X86-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
154; X86-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
155; X86-AVX-NEXT:    retl
156;
157; X86-AVX512VL-LABEL: fabs_v8f64:
158; X86-AVX512VL:       # %bb.0:
159; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
160; X86-AVX512VL-NEXT:    retl
161;
162; X86-AVX512VLDQ-LABEL: fabs_v8f64:
163; X86-AVX512VLDQ:       # %bb.0:
164; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
165; X86-AVX512VLDQ-NEXT:    retl
166;
167; X64-AVX-LABEL: fabs_v8f64:
168; X64-AVX:       # %bb.0:
169; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN]
170; X64-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
171; X64-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
172; X64-AVX-NEXT:    retq
173;
174; X64-AVX512VL-LABEL: fabs_v8f64:
175; X64-AVX512VL:       # %bb.0:
176; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
177; X64-AVX512VL-NEXT:    retq
178;
179; X64-AVX512VLDQ-LABEL: fabs_v8f64:
180; X64-AVX512VLDQ:       # %bb.0:
181; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
182; X64-AVX512VLDQ-NEXT:    retq
183  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
184  ret <8 x double> %t
185}
186declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
187
188define <16 x float> @fabs_v16f32(<16 x float> %p) {
189; X86-AVX-LABEL: fabs_v16f32:
190; X86-AVX:       # %bb.0:
191; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
192; X86-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
193; X86-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
194; X86-AVX-NEXT:    retl
195;
196; X86-AVX512VL-LABEL: fabs_v16f32:
197; X86-AVX512VL:       # %bb.0:
198; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
199; X86-AVX512VL-NEXT:    retl
200;
201; X86-AVX512VLDQ-LABEL: fabs_v16f32:
202; X86-AVX512VLDQ:       # %bb.0:
203; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
204; X86-AVX512VLDQ-NEXT:    retl
205;
206; X64-AVX-LABEL: fabs_v16f32:
207; X64-AVX:       # %bb.0:
208; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
209; X64-AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
210; X64-AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
211; X64-AVX-NEXT:    retq
212;
213; X64-AVX512VL-LABEL: fabs_v16f32:
214; X64-AVX512VL:       # %bb.0:
215; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
216; X64-AVX512VL-NEXT:    retq
217;
218; X64-AVX512VLDQ-LABEL: fabs_v16f32:
219; X64-AVX512VLDQ:       # %bb.0:
220; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
221; X64-AVX512VLDQ-NEXT:    retq
222  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
223  ret <16 x float> %t
224}
225declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
226
227; PR20354: when generating code for a vector fabs op,
228; make sure that we're only turning off the sign bit of each float value.
229; No constant pool loads or vector ops are needed for the fabs of a
230; bitcasted integer constant; we should just return an integer constant
231; that has the sign bits turned off.
232;
233; So instead of something like this:
234;    movabsq (constant pool load of mask for sign bits)
235;    vmovq   (move from integer register to vector/fp register)
236;    vandps  (mask off sign bits)
237;    vmovq   (move vector/fp register back to integer return register)
238;
239; We should generate:
240;    mov     (put constant value in return register)
241
242define i64 @fabs_v2f32_1() {
243; X86-LABEL: fabs_v2f32_1:
244; X86:       # %bb.0:
245; X86-NEXT:    xorl %eax, %eax
246; X86-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
247; X86-NEXT:    retl
248;
249; X64-LABEL: fabs_v2f32_1:
250; X64:       # %bb.0:
251; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
252; X64-NEXT:    retq
253 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
254 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
255 %ret = bitcast <2 x float> %fabs to i64
256 ret i64 %ret
257}
258
259define i64 @fabs_v2f32_2() {
260; X86-LABEL: fabs_v2f32_2:
261; X86:       # %bb.0:
262; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
263; X86-NEXT:    xorl %edx, %edx
264; X86-NEXT:    retl
265;
266; X64-LABEL: fabs_v2f32_2:
267; X64:       # %bb.0:
268; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
269; X64-NEXT:    retq
270 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
271 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
272 %ret = bitcast <2 x float> %fabs to i64
273 ret i64 %ret
274}
275
276declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
277