1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VL
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VLDQ
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ
8
9; FIXME: Drop the regex pattern matching of 'nan' once we drop support for MSVC
10; 2013.
11
12define <2 x double> @fabs_v2f64(<2 x double> %p) {
13; X32_AVX-LABEL: fabs_v2f64:
14; X32_AVX:       # BB#0:
15; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
16; X32_AVX-NEXT:    retl
17;
18; X32_AVX512VL-LABEL: fabs_v2f64:
19; X32_AVX512VL:       # BB#0:
20; X32_AVX512VL-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0
21; X32_AVX512VL-NEXT:    retl
22;
23; X32_AVX512VLDQ-LABEL: fabs_v2f64:
24; X32_AVX512VLDQ:       # BB#0:
25; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
26; X32_AVX512VLDQ-NEXT:    retl
27;
28; X64_AVX-LABEL: fabs_v2f64:
29; X64_AVX:       # BB#0:
30; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
31; X64_AVX-NEXT:    retq
32;
33; X64_AVX512VL-LABEL: fabs_v2f64:
34; X64_AVX512VL:       # BB#0:
35; X64_AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
36; X64_AVX512VL-NEXT:    retq
37;
38; X64_AVX512VLDQ-LABEL: fabs_v2f64:
39; X64_AVX512VLDQ:       # BB#0:
40; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
41; X64_AVX512VLDQ-NEXT:    retq
42  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
43  ret <2 x double> %t
44}
45declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
46
47define <4 x float> @fabs_v4f32(<4 x float> %p) {
48; X32_AVX-LABEL: fabs_v4f32:
49; X32_AVX:       # BB#0:
50; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
51; X32_AVX-NEXT:    retl
52;
53; X32_AVX512VL-LABEL: fabs_v4f32:
54; X32_AVX512VL:       # BB#0:
55; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
56; X32_AVX512VL-NEXT:    retl
57;
58; X32_AVX512VLDQ-LABEL: fabs_v4f32:
59; X32_AVX512VLDQ:       # BB#0:
60; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
61; X32_AVX512VLDQ-NEXT:    retl
62;
63; X64_AVX-LABEL: fabs_v4f32:
64; X64_AVX:       # BB#0:
65; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
66; X64_AVX-NEXT:    retq
67;
68; X64_AVX512VL-LABEL: fabs_v4f32:
69; X64_AVX512VL:       # BB#0:
70; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
71; X64_AVX512VL-NEXT:    retq
72;
73; X64_AVX512VLDQ-LABEL: fabs_v4f32:
74; X64_AVX512VLDQ:       # BB#0:
75; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
76; X64_AVX512VLDQ-NEXT:    retq
77  %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
78  ret <4 x float> %t
79}
80declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
81
82define <4 x double> @fabs_v4f64(<4 x double> %p) {
83; X32_AVX-LABEL: fabs_v4f64:
84; X32_AVX:       # BB#0:
85; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
86; X32_AVX-NEXT:    retl
87;
88; X32_AVX512VL-LABEL: fabs_v4f64:
89; X32_AVX512VL:       # BB#0:
90; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
91; X32_AVX512VL-NEXT:    retl
92;
93; X32_AVX512VLDQ-LABEL: fabs_v4f64:
94; X32_AVX512VLDQ:       # BB#0:
95; X32_AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
96; X32_AVX512VLDQ-NEXT:    retl
97;
98; X64_AVX-LABEL: fabs_v4f64:
99; X64_AVX:       # BB#0:
100; X64_AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
101; X64_AVX-NEXT:    retq
102;
103; X64_AVX512VL-LABEL: fabs_v4f64:
104; X64_AVX512VL:       # BB#0:
105; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
106; X64_AVX512VL-NEXT:    retq
107;
108; X64_AVX512VLDQ-LABEL: fabs_v4f64:
109; X64_AVX512VLDQ:       # BB#0:
110; X64_AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
111; X64_AVX512VLDQ-NEXT:    retq
112  %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
113  ret <4 x double> %t
114}
115declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
116
117define <8 x float> @fabs_v8f32(<8 x float> %p) {
118; X32_AVX-LABEL: fabs_v8f32:
119; X32_AVX:       # BB#0:
120; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
121; X32_AVX-NEXT:    retl
122;
123; X32_AVX512VL-LABEL: fabs_v8f32:
124; X32_AVX512VL:       # BB#0:
125; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
126; X32_AVX512VL-NEXT:    retl
127;
128; X32_AVX512VLDQ-LABEL: fabs_v8f32:
129; X32_AVX512VLDQ:       # BB#0:
130; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
131; X32_AVX512VLDQ-NEXT:    retl
132;
133; X64_AVX-LABEL: fabs_v8f32:
134; X64_AVX:       # BB#0:
135; X64_AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
136; X64_AVX-NEXT:    retq
137;
138; X64_AVX512VL-LABEL: fabs_v8f32:
139; X64_AVX512VL:       # BB#0:
140; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
141; X64_AVX512VL-NEXT:    retq
142;
143; X64_AVX512VLDQ-LABEL: fabs_v8f32:
144; X64_AVX512VLDQ:       # BB#0:
145; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
146; X64_AVX512VLDQ-NEXT:    retq
147  %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
148  ret <8 x float> %t
149}
150declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
151
152define <8 x double> @fabs_v8f64(<8 x double> %p) {
153; X32_AVX-LABEL: fabs_v8f64:
154; X32_AVX:       # BB#0:
155; X32_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
156; X32_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
157; X32_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
158; X32_AVX-NEXT:    retl
159;
160; X32_AVX512VL-LABEL: fabs_v8f64:
161; X32_AVX512VL:       # BB#0:
162; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
163; X32_AVX512VL-NEXT:    retl
164;
165; X32_AVX512VLDQ-LABEL: fabs_v8f64:
166; X32_AVX512VLDQ:       # BB#0:
167; X32_AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
168; X32_AVX512VLDQ-NEXT:    retl
169;
170; X64_AVX-LABEL: fabs_v8f64:
171; X64_AVX:       # BB#0:
172; X64_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
173; X64_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
174; X64_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
175; X64_AVX-NEXT:    retq
176;
177; X64_AVX512VL-LABEL: fabs_v8f64:
178; X64_AVX512VL:       # BB#0:
179; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
180; X64_AVX512VL-NEXT:    retq
181;
182; X64_AVX512VLDQ-LABEL: fabs_v8f64:
183; X64_AVX512VLDQ:       # BB#0:
184; X64_AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
185; X64_AVX512VLDQ-NEXT:    retq
186  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
187  ret <8 x double> %t
188}
189declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
190
191define <16 x float> @fabs_v16f32(<16 x float> %p) {
192; X32_AVX-LABEL: fabs_v16f32:
193; X32_AVX:       # BB#0:
194; X32_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
195; X32_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
196; X32_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
197; X32_AVX-NEXT:    retl
198;
199; X32_AVX512VL-LABEL: fabs_v16f32:
200; X32_AVX512VL:       # BB#0:
201; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
202; X32_AVX512VL-NEXT:    retl
203;
204; X32_AVX512VLDQ-LABEL: fabs_v16f32:
205; X32_AVX512VLDQ:       # BB#0:
206; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
207; X32_AVX512VLDQ-NEXT:    retl
208;
209; X64_AVX-LABEL: fabs_v16f32:
210; X64_AVX:       # BB#0:
211; X64_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
212; X64_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
213; X64_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
214; X64_AVX-NEXT:    retq
215;
216; X64_AVX512VL-LABEL: fabs_v16f32:
217; X64_AVX512VL:       # BB#0:
218; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
219; X64_AVX512VL-NEXT:    retq
220;
221; X64_AVX512VLDQ-LABEL: fabs_v16f32:
222; X64_AVX512VLDQ:       # BB#0:
223; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
224; X64_AVX512VLDQ-NEXT:    retq
225  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
226  ret <16 x float> %t
227}
228declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
229
230; PR20354: when generating code for a vector fabs op,
231; make sure that we're only turning off the sign bit of each float value.
232; No constant pool loads or vector ops are needed for the fabs of a
233; bitcasted integer constant; we should just return an integer constant
234; that has the sign bits turned off.
235;
236; So instead of something like this:
237;    movabsq (constant pool load of mask for sign bits)
238;    vmovq   (move from integer register to vector/fp register)
239;    vandps  (mask off sign bits)
240;    vmovq   (move vector/fp register back to integer return register)
241;
242; We should generate:
243;    mov     (put constant value in return register)
244
245define i64 @fabs_v2f32_1() {
246; X32-LABEL: fabs_v2f32_1:
247; X32:       # BB#0:
248; X32-NEXT:    xorl %eax, %eax
249; X32-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
250; X32-NEXT:    retl
251;
252; X64-LABEL: fabs_v2f32_1:
253; X64:       # BB#0:
254; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
255; X64-NEXT:    retq
256 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
257 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
258 %ret = bitcast <2 x float> %fabs to i64
259 ret i64 %ret
260}
261
262define i64 @fabs_v2f32_2() {
263; X32-LABEL: fabs_v2f32_2:
264; X32:       # BB#0:
265; X32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
266; X32-NEXT:    xorl %edx, %edx
267; X32-NEXT:    retl
268;
269; X64-LABEL: fabs_v2f32_2:
270; X64:       # BB#0:
271; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
272; X64-NEXT:    retq
273 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
274 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
275 %ret = bitcast <2 x float> %fabs to i64
276 ret i64 %ret
277}
278
279declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
280