1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2    | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1  | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx     | FileCheck %s --check-prefixes=X64_AVX1
5; RUN: llc < %s -mtriple=i686--   -mattr=+avx     | FileCheck %s --check-prefixes=X32_AVX1
6
7declare i32 @llvm.fptoui.sat.i32.f32(float)
8declare i64 @llvm.fptosi.sat.i64.f64(double)
9
10define float @trunc_unsigned_f32(float %x) #0 {
11; SSE2-LABEL: trunc_unsigned_f32:
12; SSE2:       # %bb.0:
13; SSE2-NEXT:    cvttss2si %xmm0, %rax
14; SSE2-NEXT:    movl %eax, %eax
15; SSE2-NEXT:    xorps %xmm0, %xmm0
16; SSE2-NEXT:    cvtsi2ss %rax, %xmm0
17; SSE2-NEXT:    retq
18;
19; SSE41-LABEL: trunc_unsigned_f32:
20; SSE41:       # %bb.0:
21; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
22; SSE41-NEXT:    retq
23;
24; X64_AVX1-LABEL: trunc_unsigned_f32:
25; X64_AVX1:       # %bb.0:
26; X64_AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
27; X64_AVX1-NEXT:    retq
28;
29; X32_AVX1-LABEL: trunc_unsigned_f32:
30; X32_AVX1:       # %bb.0:
31; X32_AVX1-NEXT:    pushl %eax
32; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
33; X32_AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
34; X32_AVX1-NEXT:    vmovss %xmm0, (%esp)
35; X32_AVX1-NEXT:    flds (%esp)
36; X32_AVX1-NEXT:    popl %eax
37; X32_AVX1-NEXT:    retl
38  %i = fptoui float %x to i32
39  %r = uitofp i32 %i to float
40  ret float %r
41}
42
43define double @trunc_unsigned_f64(double %x) #0 {
44; SSE2-LABEL: trunc_unsigned_f64:
45; SSE2:       # %bb.0:
46; SSE2-NEXT:    cvttsd2si %xmm0, %rax
47; SSE2-NEXT:    movq %rax, %rcx
48; SSE2-NEXT:    sarq $63, %rcx
49; SSE2-NEXT:    subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
50; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
51; SSE2-NEXT:    andq %rcx, %rdx
52; SSE2-NEXT:    orq %rax, %rdx
53; SSE2-NEXT:    movq %rdx, %xmm1
54; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
55; SSE2-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
56; SSE2-NEXT:    movapd %xmm1, %xmm0
57; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
58; SSE2-NEXT:    addsd %xmm1, %xmm0
59; SSE2-NEXT:    retq
60;
61; SSE41-LABEL: trunc_unsigned_f64:
62; SSE41:       # %bb.0:
63; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
64; SSE41-NEXT:    retq
65;
66; X64_AVX1-LABEL: trunc_unsigned_f64:
67; X64_AVX1:       # %bb.0:
68; X64_AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
69; X64_AVX1-NEXT:    retq
70;
71; X32_AVX1-LABEL: trunc_unsigned_f64:
72; X32_AVX1:       # %bb.0:
73; X32_AVX1-NEXT:    pushl %ebp
74; X32_AVX1-NEXT:    movl %esp, %ebp
75; X32_AVX1-NEXT:    andl $-8, %esp
76; X32_AVX1-NEXT:    subl $8, %esp
77; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
78; X32_AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
79; X32_AVX1-NEXT:    vmovsd %xmm0, (%esp)
80; X32_AVX1-NEXT:    fldl (%esp)
81; X32_AVX1-NEXT:    movl %ebp, %esp
82; X32_AVX1-NEXT:    popl %ebp
83; X32_AVX1-NEXT:    retl
84  %i = fptoui double %x to i64
85  %r = uitofp i64 %i to double
86  ret double %r
87}
88
89define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 {
90; SSE2-LABEL: trunc_unsigned_v4f32:
91; SSE2:       # %bb.0:
92; SSE2-NEXT:    cvttps2dq %xmm0, %xmm1
93; SSE2-NEXT:    movdqa %xmm1, %xmm2
94; SSE2-NEXT:    psrad $31, %xmm2
95; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
96; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
97; SSE2-NEXT:    pand %xmm2, %xmm0
98; SSE2-NEXT:    por %xmm1, %xmm0
99; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
100; SSE2-NEXT:    pand %xmm0, %xmm1
101; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
102; SSE2-NEXT:    psrld $16, %xmm0
103; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
104; SSE2-NEXT:    subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
105; SSE2-NEXT:    addps %xmm1, %xmm0
106; SSE2-NEXT:    retq
107;
108; SSE41-LABEL: trunc_unsigned_v4f32:
109; SSE41:       # %bb.0:
110; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
111; SSE41-NEXT:    retq
112;
113; X64_AVX1-LABEL: trunc_unsigned_v4f32:
114; X64_AVX1:       # %bb.0:
115; X64_AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
116; X64_AVX1-NEXT:    retq
117;
118; X32_AVX1-LABEL: trunc_unsigned_v4f32:
119; X32_AVX1:       # %bb.0:
120; X32_AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
121; X32_AVX1-NEXT:    retl
122  %i = fptoui <4 x float> %x to <4 x i32>
123  %r = uitofp <4 x i32> %i to <4 x float>
124  ret <4 x float> %r
125}
126
127define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
128; SSE2-LABEL: trunc_unsigned_v2f64:
129; SSE2:       # %bb.0:
130; SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
131; SSE2-NEXT:    movapd %xmm0, %xmm1
132; SSE2-NEXT:    subsd %xmm2, %xmm1
133; SSE2-NEXT:    cvttsd2si %xmm1, %rax
134; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
135; SSE2-NEXT:    movq %rcx, %rdx
136; SSE2-NEXT:    sarq $63, %rdx
137; SSE2-NEXT:    andq %rax, %rdx
138; SSE2-NEXT:    orq %rcx, %rdx
139; SSE2-NEXT:    movq %rdx, %xmm1
140; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
141; SSE2-NEXT:    cvttsd2si %xmm0, %rax
142; SSE2-NEXT:    subsd %xmm2, %xmm0
143; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
144; SSE2-NEXT:    movq %rax, %rdx
145; SSE2-NEXT:    sarq $63, %rdx
146; SSE2-NEXT:    andq %rcx, %rdx
147; SSE2-NEXT:    orq %rax, %rdx
148; SSE2-NEXT:    movq %rdx, %xmm0
149; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
150; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
151; SSE2-NEXT:    pand %xmm1, %xmm0
152; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
153; SSE2-NEXT:    psrlq $32, %xmm1
154; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
155; SSE2-NEXT:    subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
156; SSE2-NEXT:    addpd %xmm0, %xmm1
157; SSE2-NEXT:    movapd %xmm1, %xmm0
158; SSE2-NEXT:    retq
159;
160; SSE41-LABEL: trunc_unsigned_v2f64:
161; SSE41:       # %bb.0:
162; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
163; SSE41-NEXT:    retq
164;
165; X64_AVX1-LABEL: trunc_unsigned_v2f64:
166; X64_AVX1:       # %bb.0:
167; X64_AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
168; X64_AVX1-NEXT:    retq
169;
170; X32_AVX1-LABEL: trunc_unsigned_v2f64:
171; X32_AVX1:       # %bb.0:
172; X32_AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
173; X32_AVX1-NEXT:    retl
174  %i = fptoui <2 x double> %x to <2 x i64>
175  %r = uitofp <2 x i64> %i to <2 x double>
176  ret <2 x double> %r
177}
178
179define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
180; SSE2-LABEL: trunc_unsigned_v4f64:
181; SSE2:       # %bb.0:
182; SSE2-NEXT:    movapd %xmm1, %xmm2
183; SSE2-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
184; SSE2-NEXT:    subsd %xmm3, %xmm1
185; SSE2-NEXT:    cvttsd2si %xmm1, %rax
186; SSE2-NEXT:    cvttsd2si %xmm2, %rcx
187; SSE2-NEXT:    movq %rcx, %rdx
188; SSE2-NEXT:    sarq $63, %rdx
189; SSE2-NEXT:    andq %rax, %rdx
190; SSE2-NEXT:    orq %rcx, %rdx
191; SSE2-NEXT:    movq %rdx, %xmm1
192; SSE2-NEXT:    unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
193; SSE2-NEXT:    cvttsd2si %xmm2, %rax
194; SSE2-NEXT:    subsd %xmm3, %xmm2
195; SSE2-NEXT:    cvttsd2si %xmm2, %rcx
196; SSE2-NEXT:    movq %rax, %rdx
197; SSE2-NEXT:    sarq $63, %rdx
198; SSE2-NEXT:    andq %rcx, %rdx
199; SSE2-NEXT:    orq %rax, %rdx
200; SSE2-NEXT:    movq %rdx, %xmm2
201; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
202; SSE2-NEXT:    movapd %xmm0, %xmm2
203; SSE2-NEXT:    subsd %xmm3, %xmm2
204; SSE2-NEXT:    cvttsd2si %xmm2, %rax
205; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
206; SSE2-NEXT:    movq %rcx, %rdx
207; SSE2-NEXT:    sarq $63, %rdx
208; SSE2-NEXT:    andq %rax, %rdx
209; SSE2-NEXT:    orq %rcx, %rdx
210; SSE2-NEXT:    movq %rdx, %xmm2
211; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
212; SSE2-NEXT:    cvttsd2si %xmm0, %rax
213; SSE2-NEXT:    subsd %xmm3, %xmm0
214; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
215; SSE2-NEXT:    movq %rax, %rdx
216; SSE2-NEXT:    sarq $63, %rdx
217; SSE2-NEXT:    andq %rcx, %rdx
218; SSE2-NEXT:    orq %rax, %rdx
219; SSE2-NEXT:    movq %rdx, %xmm0
220; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
221; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [4294967295,4294967295]
222; SSE2-NEXT:    movdqa %xmm2, %xmm3
223; SSE2-NEXT:    pand %xmm0, %xmm3
224; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200]
225; SSE2-NEXT:    por %xmm4, %xmm3
226; SSE2-NEXT:    psrlq $32, %xmm2
227; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072]
228; SSE2-NEXT:    por %xmm5, %xmm2
229; SSE2-NEXT:    movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25]
230; SSE2-NEXT:    subpd %xmm6, %xmm2
231; SSE2-NEXT:    addpd %xmm3, %xmm2
232; SSE2-NEXT:    pand %xmm1, %xmm0
233; SSE2-NEXT:    por %xmm4, %xmm0
234; SSE2-NEXT:    psrlq $32, %xmm1
235; SSE2-NEXT:    por %xmm5, %xmm1
236; SSE2-NEXT:    subpd %xmm6, %xmm1
237; SSE2-NEXT:    addpd %xmm0, %xmm1
238; SSE2-NEXT:    movapd %xmm2, %xmm0
239; SSE2-NEXT:    retq
240;
241; SSE41-LABEL: trunc_unsigned_v4f64:
242; SSE41:       # %bb.0:
243; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
244; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
245; SSE41-NEXT:    retq
246;
247; X64_AVX1-LABEL: trunc_unsigned_v4f64:
248; X64_AVX1:       # %bb.0:
249; X64_AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
250; X64_AVX1-NEXT:    retq
251;
252; X32_AVX1-LABEL: trunc_unsigned_v4f64:
253; X32_AVX1:       # %bb.0:
254; X32_AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
255; X32_AVX1-NEXT:    retl
256  %i = fptoui <4 x double> %x to <4 x i64>
257  %r = uitofp <4 x i64> %i to <4 x double>
258  ret <4 x double> %r
259}
260
261define float @trunc_signed_f32_no_fast_math(float %x) {
262; SSE-LABEL: trunc_signed_f32_no_fast_math:
263; SSE:       # %bb.0:
264; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
265; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
266; SSE-NEXT:    retq
267;
268; X64_AVX1-LABEL: trunc_signed_f32_no_fast_math:
269; X64_AVX1:       # %bb.0:
270; X64_AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
271; X64_AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
272; X64_AVX1-NEXT:    retq
273;
274; X32_AVX1-LABEL: trunc_signed_f32_no_fast_math:
275; X32_AVX1:       # %bb.0:
276; X32_AVX1-NEXT:    pushl %eax
277; X32_AVX1-NEXT:    .cfi_def_cfa_offset 8
278; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
279; X32_AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
280; X32_AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
281; X32_AVX1-NEXT:    vmovss %xmm0, (%esp)
282; X32_AVX1-NEXT:    flds (%esp)
283; X32_AVX1-NEXT:    popl %eax
284; X32_AVX1-NEXT:    .cfi_def_cfa_offset 4
285; X32_AVX1-NEXT:    retl
286  %i = fptosi float %x to i32
287  %r = sitofp i32 %i to float
288  ret float %r
289}
290
291; Without -0.0, it is ok to use roundss if it is available.
292
293define float @trunc_signed_f32_nsz(float %x) #0 {
294; SSE2-LABEL: trunc_signed_f32_nsz:
295; SSE2:       # %bb.0:
296; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
297; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
298; SSE2-NEXT:    retq
299;
300; SSE41-LABEL: trunc_signed_f32_nsz:
301; SSE41:       # %bb.0:
302; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
303; SSE41-NEXT:    retq
304;
305; X64_AVX1-LABEL: trunc_signed_f32_nsz:
306; X64_AVX1:       # %bb.0:
307; X64_AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
308; X64_AVX1-NEXT:    retq
309;
310; X32_AVX1-LABEL: trunc_signed_f32_nsz:
311; X32_AVX1:       # %bb.0:
312; X32_AVX1-NEXT:    pushl %eax
313; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
314; X32_AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
315; X32_AVX1-NEXT:    vmovss %xmm0, (%esp)
316; X32_AVX1-NEXT:    flds (%esp)
317; X32_AVX1-NEXT:    popl %eax
318; X32_AVX1-NEXT:    retl
319  %i = fptosi float %x to i32
320  %r = sitofp i32 %i to float
321  ret float %r
322}
323
324define double @trunc_signed32_f64_no_fast_math(double %x) {
325; SSE-LABEL: trunc_signed32_f64_no_fast_math:
326; SSE:       # %bb.0:
327; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
328; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
329; SSE-NEXT:    retq
330;
331; X64_AVX1-LABEL: trunc_signed32_f64_no_fast_math:
332; X64_AVX1:       # %bb.0:
333; X64_AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
334; X64_AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
335; X64_AVX1-NEXT:    retq
336;
337; X32_AVX1-LABEL: trunc_signed32_f64_no_fast_math:
338; X32_AVX1:       # %bb.0:
339; X32_AVX1-NEXT:    pushl %ebp
340; X32_AVX1-NEXT:    .cfi_def_cfa_offset 8
341; X32_AVX1-NEXT:    .cfi_offset %ebp, -8
342; X32_AVX1-NEXT:    movl %esp, %ebp
343; X32_AVX1-NEXT:    .cfi_def_cfa_register %ebp
344; X32_AVX1-NEXT:    andl $-8, %esp
345; X32_AVX1-NEXT:    subl $8, %esp
346; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
347; X32_AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
348; X32_AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
349; X32_AVX1-NEXT:    vmovlps %xmm0, (%esp)
350; X32_AVX1-NEXT:    fldl (%esp)
351; X32_AVX1-NEXT:    movl %ebp, %esp
352; X32_AVX1-NEXT:    popl %ebp
353; X32_AVX1-NEXT:    .cfi_def_cfa %esp, 4
354; X32_AVX1-NEXT:    retl
355  %i = fptosi double %x to i32
356  %r = sitofp i32 %i to double
357  ret double %r
358}
359
360define double @trunc_signed32_f64_nsz(double %x) #0 {
361; SSE2-LABEL: trunc_signed32_f64_nsz:
362; SSE2:       # %bb.0:
363; SSE2-NEXT:    cvttpd2dq %xmm0, %xmm0
364; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
365; SSE2-NEXT:    retq
366;
367; SSE41-LABEL: trunc_signed32_f64_nsz:
368; SSE41:       # %bb.0:
369; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
370; SSE41-NEXT:    retq
371;
372; X64_AVX1-LABEL: trunc_signed32_f64_nsz:
373; X64_AVX1:       # %bb.0:
374; X64_AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
375; X64_AVX1-NEXT:    retq
376;
377; X32_AVX1-LABEL: trunc_signed32_f64_nsz:
378; X32_AVX1:       # %bb.0:
379; X32_AVX1-NEXT:    pushl %ebp
380; X32_AVX1-NEXT:    movl %esp, %ebp
381; X32_AVX1-NEXT:    andl $-8, %esp
382; X32_AVX1-NEXT:    subl $8, %esp
383; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
384; X32_AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
385; X32_AVX1-NEXT:    vmovsd %xmm0, (%esp)
386; X32_AVX1-NEXT:    fldl (%esp)
387; X32_AVX1-NEXT:    movl %ebp, %esp
388; X32_AVX1-NEXT:    popl %ebp
389; X32_AVX1-NEXT:    retl
390  %i = fptosi double %x to i32
391  %r = sitofp i32 %i to double
392  ret double %r
393}
394
395define double @trunc_f32_signed32_f64_no_fast_math(float %x) {
396; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math:
397; SSE:       # %bb.0:
398; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
399; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
400; SSE-NEXT:    retq
401;
402; X64_AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
403; X64_AVX1:       # %bb.0:
404; X64_AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
405; X64_AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
406; X64_AVX1-NEXT:    retq
407;
408; X32_AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math:
409; X32_AVX1:       # %bb.0:
410; X32_AVX1-NEXT:    pushl %ebp
411; X32_AVX1-NEXT:    .cfi_def_cfa_offset 8
412; X32_AVX1-NEXT:    .cfi_offset %ebp, -8
413; X32_AVX1-NEXT:    movl %esp, %ebp
414; X32_AVX1-NEXT:    .cfi_def_cfa_register %ebp
415; X32_AVX1-NEXT:    andl $-8, %esp
416; X32_AVX1-NEXT:    subl $8, %esp
417; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
418; X32_AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
419; X32_AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
420; X32_AVX1-NEXT:    vmovlps %xmm0, (%esp)
421; X32_AVX1-NEXT:    fldl (%esp)
422; X32_AVX1-NEXT:    movl %ebp, %esp
423; X32_AVX1-NEXT:    popl %ebp
424; X32_AVX1-NEXT:    .cfi_def_cfa %esp, 4
425; X32_AVX1-NEXT:    retl
426  %i = fptosi float %x to i32
427  %r = sitofp i32 %i to double
428  ret double %r
429}
430
431define double @trunc_f32_signed32_f64_nsz(float %x) #0 {
432; SSE-LABEL: trunc_f32_signed32_f64_nsz:
433; SSE:       # %bb.0:
434; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
435; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
436; SSE-NEXT:    retq
437;
438; X64_AVX1-LABEL: trunc_f32_signed32_f64_nsz:
439; X64_AVX1:       # %bb.0:
440; X64_AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
441; X64_AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
442; X64_AVX1-NEXT:    retq
443;
444; X32_AVX1-LABEL: trunc_f32_signed32_f64_nsz:
445; X32_AVX1:       # %bb.0:
446; X32_AVX1-NEXT:    pushl %ebp
447; X32_AVX1-NEXT:    movl %esp, %ebp
448; X32_AVX1-NEXT:    andl $-8, %esp
449; X32_AVX1-NEXT:    subl $8, %esp
450; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
451; X32_AVX1-NEXT:    vcvttps2dq %xmm0, %xmm0
452; X32_AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0
453; X32_AVX1-NEXT:    vmovlps %xmm0, (%esp)
454; X32_AVX1-NEXT:    fldl (%esp)
455; X32_AVX1-NEXT:    movl %ebp, %esp
456; X32_AVX1-NEXT:    popl %ebp
457; X32_AVX1-NEXT:    retl
458  %i = fptosi float %x to i32
459  %r = sitofp i32 %i to double
460  ret double %r
461}
462
463define float @trunc_f64_signed32_f32_no_fast_math(double %x) {
464; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math:
465; SSE:       # %bb.0:
466; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
467; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
468; SSE-NEXT:    retq
469;
470; X64_AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
471; X64_AVX1:       # %bb.0:
472; X64_AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
473; X64_AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
474; X64_AVX1-NEXT:    retq
475;
476; X32_AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math:
477; X32_AVX1:       # %bb.0:
478; X32_AVX1-NEXT:    pushl %eax
479; X32_AVX1-NEXT:    .cfi_def_cfa_offset 8
480; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
481; X32_AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
482; X32_AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
483; X32_AVX1-NEXT:    vmovss %xmm0, (%esp)
484; X32_AVX1-NEXT:    flds (%esp)
485; X32_AVX1-NEXT:    popl %eax
486; X32_AVX1-NEXT:    .cfi_def_cfa_offset 4
487; X32_AVX1-NEXT:    retl
488  %i = fptosi double %x to i32
489  %r = sitofp i32 %i to float
490  ret float %r
491}
492
493define float @trunc_f64_signed32_f32_nsz(double %x) #0 {
494; SSE-LABEL: trunc_f64_signed32_f32_nsz:
495; SSE:       # %bb.0:
496; SSE-NEXT:    cvttpd2dq %xmm0, %xmm0
497; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
498; SSE-NEXT:    retq
499;
500; X64_AVX1-LABEL: trunc_f64_signed32_f32_nsz:
501; X64_AVX1:       # %bb.0:
502; X64_AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
503; X64_AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
504; X64_AVX1-NEXT:    retq
505;
506; X32_AVX1-LABEL: trunc_f64_signed32_f32_nsz:
507; X32_AVX1:       # %bb.0:
508; X32_AVX1-NEXT:    pushl %eax
509; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
510; X32_AVX1-NEXT:    vcvttpd2dq %xmm0, %xmm0
511; X32_AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0
512; X32_AVX1-NEXT:    vmovss %xmm0, (%esp)
513; X32_AVX1-NEXT:    flds (%esp)
514; X32_AVX1-NEXT:    popl %eax
515; X32_AVX1-NEXT:    retl
516  %i = fptosi double %x to i32
517  %r = sitofp i32 %i to float
518  ret float %r
519}
520
521define double @trunc_signed_f64_no_fast_math(double %x) {
522; SSE-LABEL: trunc_signed_f64_no_fast_math:
523; SSE:       # %bb.0:
524; SSE-NEXT:    cvttsd2si %xmm0, %rax
525; SSE-NEXT:    xorps %xmm0, %xmm0
526; SSE-NEXT:    cvtsi2sd %rax, %xmm0
527; SSE-NEXT:    retq
528;
529; X64_AVX1-LABEL: trunc_signed_f64_no_fast_math:
530; X64_AVX1:       # %bb.0:
531; X64_AVX1-NEXT:    vcvttsd2si %xmm0, %rax
532; X64_AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
533; X64_AVX1-NEXT:    retq
534;
535; X32_AVX1-LABEL: trunc_signed_f64_no_fast_math:
536; X32_AVX1:       # %bb.0:
537; X32_AVX1-NEXT:    pushl %ebp
538; X32_AVX1-NEXT:    .cfi_def_cfa_offset 8
539; X32_AVX1-NEXT:    .cfi_offset %ebp, -8
540; X32_AVX1-NEXT:    movl %esp, %ebp
541; X32_AVX1-NEXT:    .cfi_def_cfa_register %ebp
542; X32_AVX1-NEXT:    andl $-8, %esp
543; X32_AVX1-NEXT:    subl $24, %esp
544; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
545; X32_AVX1-NEXT:    vmovsd %xmm0, (%esp)
546; X32_AVX1-NEXT:    fldl (%esp)
547; X32_AVX1-NEXT:    fisttpll (%esp)
548; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
549; X32_AVX1-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
550; X32_AVX1-NEXT:    fildll {{[0-9]+}}(%esp)
551; X32_AVX1-NEXT:    fstpl {{[0-9]+}}(%esp)
552; X32_AVX1-NEXT:    fldl {{[0-9]+}}(%esp)
553; X32_AVX1-NEXT:    movl %ebp, %esp
554; X32_AVX1-NEXT:    popl %ebp
555; X32_AVX1-NEXT:    .cfi_def_cfa %esp, 4
556; X32_AVX1-NEXT:    retl
557  %i = fptosi double %x to i64
558  %r = sitofp i64 %i to double
559  ret double %r
560}
561
562define double @trunc_signed_f64_nsz(double %x) #0 {
563; SSE2-LABEL: trunc_signed_f64_nsz:
564; SSE2:       # %bb.0:
565; SSE2-NEXT:    cvttsd2si %xmm0, %rax
566; SSE2-NEXT:    xorps %xmm0, %xmm0
567; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
568; SSE2-NEXT:    retq
569;
570; SSE41-LABEL: trunc_signed_f64_nsz:
571; SSE41:       # %bb.0:
572; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
573; SSE41-NEXT:    retq
574;
575; X64_AVX1-LABEL: trunc_signed_f64_nsz:
576; X64_AVX1:       # %bb.0:
577; X64_AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
578; X64_AVX1-NEXT:    retq
579;
580; X32_AVX1-LABEL: trunc_signed_f64_nsz:
581; X32_AVX1:       # %bb.0:
582; X32_AVX1-NEXT:    pushl %ebp
583; X32_AVX1-NEXT:    movl %esp, %ebp
584; X32_AVX1-NEXT:    andl $-8, %esp
585; X32_AVX1-NEXT:    subl $8, %esp
586; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
587; X32_AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
588; X32_AVX1-NEXT:    vmovsd %xmm0, (%esp)
589; X32_AVX1-NEXT:    fldl (%esp)
590; X32_AVX1-NEXT:    movl %ebp, %esp
591; X32_AVX1-NEXT:    popl %ebp
592; X32_AVX1-NEXT:    retl
593  %i = fptosi double %x to i64
594  %r = sitofp i64 %i to double
595  ret double %r
596}
597
598define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 {
599; SSE2-LABEL: trunc_signed_v4f32_nsz:
600; SSE2:       # %bb.0:
601; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
602; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
603; SSE2-NEXT:    retq
604;
605; SSE41-LABEL: trunc_signed_v4f32_nsz:
606; SSE41:       # %bb.0:
607; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
608; SSE41-NEXT:    retq
609;
610; X64_AVX1-LABEL: trunc_signed_v4f32_nsz:
611; X64_AVX1:       # %bb.0:
612; X64_AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
613; X64_AVX1-NEXT:    retq
614;
615; X32_AVX1-LABEL: trunc_signed_v4f32_nsz:
616; X32_AVX1:       # %bb.0:
617; X32_AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
618; X32_AVX1-NEXT:    retl
619  %i = fptosi <4 x float> %x to <4 x i32>
620  %r = sitofp <4 x i32> %i to <4 x float>
621  ret <4 x float> %r
622}
623
624define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 {
625; SSE2-LABEL: trunc_signed_v2f64_nsz:
626; SSE2:       # %bb.0:
627; SSE2-NEXT:    cvttsd2si %xmm0, %rax
628; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
629; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
630; SSE2-NEXT:    xorps %xmm0, %xmm0
631; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
632; SSE2-NEXT:    cvtsi2sd %rcx, %xmm1
633; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
634; SSE2-NEXT:    retq
635;
636; SSE41-LABEL: trunc_signed_v2f64_nsz:
637; SSE41:       # %bb.0:
638; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
639; SSE41-NEXT:    retq
640;
641; X64_AVX1-LABEL: trunc_signed_v2f64_nsz:
642; X64_AVX1:       # %bb.0:
643; X64_AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
644; X64_AVX1-NEXT:    retq
645;
646; X32_AVX1-LABEL: trunc_signed_v2f64_nsz:
647; X32_AVX1:       # %bb.0:
648; X32_AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
649; X32_AVX1-NEXT:    retl
650  %i = fptosi <2 x double> %x to <2 x i64>
651  %r = sitofp <2 x i64> %i to <2 x double>
652  ret <2 x double> %r
653}
654
655define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 {
656; SSE2-LABEL: trunc_signed_v4f64_nsz:
657; SSE2:       # %bb.0:
658; SSE2-NEXT:    cvttsd2si %xmm1, %rax
659; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
660; SSE2-NEXT:    cvttsd2si %xmm1, %rcx
661; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
662; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
663; SSE2-NEXT:    cvttsd2si %xmm0, %rsi
664; SSE2-NEXT:    xorps %xmm0, %xmm0
665; SSE2-NEXT:    cvtsi2sd %rdx, %xmm0
666; SSE2-NEXT:    xorps %xmm1, %xmm1
667; SSE2-NEXT:    cvtsi2sd %rsi, %xmm1
668; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
669; SSE2-NEXT:    xorps %xmm1, %xmm1
670; SSE2-NEXT:    cvtsi2sd %rax, %xmm1
671; SSE2-NEXT:    cvtsi2sd %rcx, %xmm2
672; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
673; SSE2-NEXT:    retq
674;
675; SSE41-LABEL: trunc_signed_v4f64_nsz:
676; SSE41:       # %bb.0:
677; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
678; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
679; SSE41-NEXT:    retq
680;
681; X64_AVX1-LABEL: trunc_signed_v4f64_nsz:
682; X64_AVX1:       # %bb.0:
683; X64_AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
684; X64_AVX1-NEXT:    retq
685;
686; X32_AVX1-LABEL: trunc_signed_v4f64_nsz:
687; X32_AVX1:       # %bb.0:
688; X32_AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
689; X32_AVX1-NEXT:    retl
690  %i = fptosi <4 x double> %x to <4 x i64>
691  %r = sitofp <4 x i64> %i to <4 x double>
692  ret <4 x double> %r
693}
694
695; The FTRUNC ("round**" x86 asm) fold relies on UB in the case of overflow.
696; This used to be guarded with an attribute check. That allowed existing
697; code to continue working based on its assumptions that float->int
698; overflow had saturating behavior.
699;
700; Now, we expect a front-end to use IR intrinsics if it wants to avoid this
701; transform.
702
703define float @trunc_unsigned_f32_disable_via_intrinsic(float %x) #0 {
704; SSE-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
705; SSE:       # %bb.0:
706; SSE-NEXT:    cvttss2si %xmm0, %rax
707; SSE-NEXT:    xorl %ecx, %ecx
708; SSE-NEXT:    xorps %xmm1, %xmm1
709; SSE-NEXT:    ucomiss %xmm1, %xmm0
710; SSE-NEXT:    cmovael %eax, %ecx
711; SSE-NEXT:    ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
712; SSE-NEXT:    movl $-1, %eax
713; SSE-NEXT:    cmovbel %ecx, %eax
714; SSE-NEXT:    xorps %xmm0, %xmm0
715; SSE-NEXT:    cvtsi2ss %rax, %xmm0
716; SSE-NEXT:    retq
717;
718; X64_AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
719; X64_AVX1:       # %bb.0:
720; X64_AVX1-NEXT:    vcvttss2si %xmm0, %rax
721; X64_AVX1-NEXT:    xorl %ecx, %ecx
722; X64_AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
723; X64_AVX1-NEXT:    vucomiss %xmm1, %xmm0
724; X64_AVX1-NEXT:    cmovael %eax, %ecx
725; X64_AVX1-NEXT:    vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
726; X64_AVX1-NEXT:    movl $-1, %eax
727; X64_AVX1-NEXT:    cmovbel %ecx, %eax
728; X64_AVX1-NEXT:    vcvtsi2ss %rax, %xmm2, %xmm0
729; X64_AVX1-NEXT:    retq
730;
731; X32_AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
732; X32_AVX1:       # %bb.0:
733; X32_AVX1-NEXT:    pushl %eax
734; X32_AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
735; X32_AVX1-NEXT:    vcvttss2si %xmm0, %eax
736; X32_AVX1-NEXT:    movl %eax, %ecx
737; X32_AVX1-NEXT:    sarl $31, %ecx
738; X32_AVX1-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
739; X32_AVX1-NEXT:    vcvttss2si %xmm1, %edx
740; X32_AVX1-NEXT:    andl %ecx, %edx
741; X32_AVX1-NEXT:    orl %eax, %edx
742; X32_AVX1-NEXT:    xorl %eax, %eax
743; X32_AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
744; X32_AVX1-NEXT:    vucomiss %xmm1, %xmm0
745; X32_AVX1-NEXT:    cmovael %edx, %eax
746; X32_AVX1-NEXT:    vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
747; X32_AVX1-NEXT:    movl $-1, %ecx
748; X32_AVX1-NEXT:    cmovbel %eax, %ecx
749; X32_AVX1-NEXT:    vmovd %ecx, %xmm0
750; X32_AVX1-NEXT:    vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
751; X32_AVX1-NEXT:    vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
752; X32_AVX1-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
753; X32_AVX1-NEXT:    vmovss %xmm0, (%esp)
754; X32_AVX1-NEXT:    flds (%esp)
755; X32_AVX1-NEXT:    popl %eax
756; X32_AVX1-NEXT:    retl
757  %i = call i32 @llvm.fptoui.sat.i32.f32(float %x)
758  %r = uitofp i32 %i to float
759  ret float %r
760}
761
762define double @trunc_signed_f64_disable_via_intrinsic(double %x) #0 {
763; SSE-LABEL: trunc_signed_f64_disable_via_intrinsic:
764; SSE:       # %bb.0:
765; SSE-NEXT:    cvttsd2si %xmm0, %rax
766; SSE-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
767; SSE-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
768; SSE-NEXT:    cmovbeq %rax, %rcx
769; SSE-NEXT:    xorl %eax, %eax
770; SSE-NEXT:    ucomisd %xmm0, %xmm0
771; SSE-NEXT:    cmovnpq %rcx, %rax
772; SSE-NEXT:    xorps %xmm0, %xmm0
773; SSE-NEXT:    cvtsi2sd %rax, %xmm0
774; SSE-NEXT:    retq
775;
776; X64_AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:
777; X64_AVX1:       # %bb.0:
778; X64_AVX1-NEXT:    vcvttsd2si %xmm0, %rax
779; X64_AVX1-NEXT:    vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
780; X64_AVX1-NEXT:    movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
781; X64_AVX1-NEXT:    cmovbeq %rax, %rcx
782; X64_AVX1-NEXT:    xorl %eax, %eax
783; X64_AVX1-NEXT:    vucomisd %xmm0, %xmm0
784; X64_AVX1-NEXT:    cmovnpq %rcx, %rax
785; X64_AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
786; X64_AVX1-NEXT:    retq
787;
788; X32_AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:
789; X32_AVX1:       # %bb.0:
790; X32_AVX1-NEXT:    pushl %ebp
791; X32_AVX1-NEXT:    movl %esp, %ebp
792; X32_AVX1-NEXT:    pushl %esi
793; X32_AVX1-NEXT:    andl $-8, %esp
794; X32_AVX1-NEXT:    subl $32, %esp
795; X32_AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
796; X32_AVX1-NEXT:    vmovsd %xmm0, (%esp)
797; X32_AVX1-NEXT:    fldl (%esp)
798; X32_AVX1-NEXT:    fisttpll (%esp)
799; X32_AVX1-NEXT:    xorl %eax, %eax
800; X32_AVX1-NEXT:    vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
801; X32_AVX1-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
802; X32_AVX1-NEXT:    movl $0, %edx
803; X32_AVX1-NEXT:    jb .LBB19_2
804; X32_AVX1-NEXT:  # %bb.1:
805; X32_AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
806; X32_AVX1-NEXT:    movl (%esp), %edx
807; X32_AVX1-NEXT:  .LBB19_2:
808; X32_AVX1-NEXT:    vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
809; X32_AVX1-NEXT:    movl $-1, %esi
810; X32_AVX1-NEXT:    cmovbel %edx, %esi
811; X32_AVX1-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
812; X32_AVX1-NEXT:    cmovbel %ecx, %edx
813; X32_AVX1-NEXT:    vucomisd %xmm0, %xmm0
814; X32_AVX1-NEXT:    cmovpl %eax, %edx
815; X32_AVX1-NEXT:    cmovpl %eax, %esi
816; X32_AVX1-NEXT:    vmovd %esi, %xmm0
817; X32_AVX1-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0
818; X32_AVX1-NEXT:    vmovq %xmm0, {{[0-9]+}}(%esp)
819; X32_AVX1-NEXT:    fildll {{[0-9]+}}(%esp)
820; X32_AVX1-NEXT:    fstpl {{[0-9]+}}(%esp)
821; X32_AVX1-NEXT:    fldl {{[0-9]+}}(%esp)
822; X32_AVX1-NEXT:    leal -4(%ebp), %esp
823; X32_AVX1-NEXT:    popl %esi
824; X32_AVX1-NEXT:    popl %ebp
825; X32_AVX1-NEXT:    retl
826  %i = call i64 @llvm.fptosi.sat.i64.f64(double %x)
827  %r = sitofp i64 %i to double
828  ret double %r
829}
830
831attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
832