1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
4
5define double @f1(double %a) {
6; X86-LABEL: f1:
7; X86:       # %bb.0:
8; X86-NEXT:    pushl %ebp
9; X86-NEXT:    .cfi_def_cfa_offset 8
10; X86-NEXT:    .cfi_offset %ebp, -8
11; X86-NEXT:    movl %esp, %ebp
12; X86-NEXT:    .cfi_def_cfa_register %ebp
13; X86-NEXT:    andl $-8, %esp
14; X86-NEXT:    subl $8, %esp
15; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
16; X86-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
17; X86-NEXT:    movsd %xmm0, (%esp)
18; X86-NEXT:    fldl (%esp)
19; X86-NEXT:    movl %ebp, %esp
20; X86-NEXT:    popl %ebp
21; X86-NEXT:    .cfi_def_cfa %esp, 4
22; X86-NEXT:    retl
23;
24; X64-LABEL: f1:
25; X64:       # %bb.0:
26; X64-NEXT:    mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
27; X64-NEXT:    retq
28  %1 = fadd fast double %a, %a
29  %2 = fadd fast double %a, %a
30  %3 = fadd fast double %1, %2
31  ret double %3
32}
33
34define double @f2(double %a) {
35; X86-LABEL: f2:
36; X86:       # %bb.0:
37; X86-NEXT:    pushl %ebp
38; X86-NEXT:    .cfi_def_cfa_offset 8
39; X86-NEXT:    .cfi_offset %ebp, -8
40; X86-NEXT:    movl %esp, %ebp
41; X86-NEXT:    .cfi_def_cfa_register %ebp
42; X86-NEXT:    andl $-8, %esp
43; X86-NEXT:    subl $8, %esp
44; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
45; X86-NEXT:    addsd %xmm0, %xmm0
46; X86-NEXT:    movapd %xmm0, %xmm1
47; X86-NEXT:    #ARITH_FENCE
48; X86-NEXT:    addsd %xmm0, %xmm1
49; X86-NEXT:    movsd %xmm1, (%esp)
50; X86-NEXT:    fldl (%esp)
51; X86-NEXT:    movl %ebp, %esp
52; X86-NEXT:    popl %ebp
53; X86-NEXT:    .cfi_def_cfa %esp, 4
54; X86-NEXT:    retl
55;
56; X64-LABEL: f2:
57; X64:       # %bb.0:
58; X64-NEXT:    addsd %xmm0, %xmm0
59; X64-NEXT:    movapd %xmm0, %xmm1
60; X64-NEXT:    #ARITH_FENCE
61; X64-NEXT:    addsd %xmm1, %xmm0
62; X64-NEXT:    retq
63  %1 = fadd fast double %a, %a
64  %t = call double @llvm.arithmetic.fence.f64(double %1)
65  %2 = fadd fast double %a, %a
66  %3 = fadd fast double %t, %2
67  ret double %3
68}
69
70define <2 x float> @f3(<2 x float> %a) {
71; X86-LABEL: f3:
72; X86:       # %bb.0:
73; X86-NEXT:    mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
74; X86-NEXT:    retl
75;
76; X64-LABEL: f3:
77; X64:       # %bb.0:
78; X64-NEXT:    mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
79; X64-NEXT:    retq
80  %1 = fadd fast <2 x float> %a, %a
81  %2 = fadd fast <2 x float> %a, %a
82  %3 = fadd fast <2 x float> %1, %2
83  ret <2 x float> %3
84}
85
86define <2 x float> @f4(<2 x float> %a) {
87; X86-LABEL: f4:
88; X86:       # %bb.0:
89; X86-NEXT:    addps %xmm0, %xmm0
90; X86-NEXT:    movaps %xmm0, %xmm1
91; X86-NEXT:    #ARITH_FENCE
92; X86-NEXT:    addps %xmm1, %xmm0
93; X86-NEXT:    retl
94;
95; X64-LABEL: f4:
96; X64:       # %bb.0:
97; X64-NEXT:    addps %xmm0, %xmm0
98; X64-NEXT:    movaps %xmm0, %xmm1
99; X64-NEXT:    #ARITH_FENCE
100; X64-NEXT:    addps %xmm1, %xmm0
101; X64-NEXT:    retq
102  %1 = fadd fast <2 x float> %a, %a
103  %t = call <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float> %1)
104  %2 = fadd fast <2 x float> %a, %a
105  %3 = fadd fast <2 x float> %t, %2
106  ret <2 x float> %3
107}
108
109define <8 x float> @f5(<8 x float> %a) {
110; X86-LABEL: f5:
111; X86:       # %bb.0:
112; X86-NEXT:    movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0]
113; X86-NEXT:    mulps %xmm2, %xmm0
114; X86-NEXT:    mulps %xmm2, %xmm1
115; X86-NEXT:    retl
116;
117; X64-LABEL: f5:
118; X64:       # %bb.0:
119; X64-NEXT:    movaps {{.*#+}} xmm2 = [4.0E+0,4.0E+0,4.0E+0,4.0E+0]
120; X64-NEXT:    mulps %xmm2, %xmm0
121; X64-NEXT:    mulps %xmm2, %xmm1
122; X64-NEXT:    retq
123  %1 = fadd fast <8 x float> %a, %a
124  %2 = fadd fast <8 x float> %a, %a
125  %3 = fadd fast <8 x float> %1, %2
126  ret <8 x float> %3
127}
128
129define <8 x float> @f6(<8 x float> %a) {
130; X86-LABEL: f6:
131; X86:       # %bb.0:
132; X86-NEXT:    addps %xmm0, %xmm0
133; X86-NEXT:    addps %xmm1, %xmm1
134; X86-NEXT:    movaps %xmm1, %xmm2
135; X86-NEXT:    #ARITH_FENCE
136; X86-NEXT:    movaps %xmm0, %xmm3
137; X86-NEXT:    #ARITH_FENCE
138; X86-NEXT:    addps %xmm3, %xmm0
139; X86-NEXT:    addps %xmm2, %xmm1
140; X86-NEXT:    retl
141;
142; X64-LABEL: f6:
143; X64:       # %bb.0:
144; X64-NEXT:    addps %xmm0, %xmm0
145; X64-NEXT:    addps %xmm1, %xmm1
146; X64-NEXT:    movaps %xmm1, %xmm2
147; X64-NEXT:    #ARITH_FENCE
148; X64-NEXT:    movaps %xmm0, %xmm3
149; X64-NEXT:    #ARITH_FENCE
150; X64-NEXT:    addps %xmm3, %xmm0
151; X64-NEXT:    addps %xmm2, %xmm1
152; X64-NEXT:    retq
153  %1 = fadd fast <8 x float> %a, %a
154  %t = call <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float> %1)
155  %2 = fadd fast <8 x float> %a, %a
156  %3 = fadd fast <8 x float> %t, %2
157  ret <8 x float> %3
158}
159
160declare float @llvm.arithmetic.fence.f32(float)
161declare double @llvm.arithmetic.fence.f64(double)
162declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>)
163declare <8 x float> @llvm.arithmetic.fence.v8f32(<8 x float>)
164