1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X86-NOSSE
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE1
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 -verify-machineinstrs | FileCheck %s --check-prefix=X86-SSE2
5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX
6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X86-AVX
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=X64-SSE
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -verify-machineinstrs | FileCheck %s --check-prefix=X64-AVX
10
11; ----- FADD -----
12
13define dso_local void @fadd_32r(ptr %loc, float %val) nounwind {
14; X86-NOSSE-LABEL: fadd_32r:
15; X86-NOSSE:       # %bb.0:
16; X86-NOSSE-NEXT:    subl $8, %esp
17; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
18; X86-NOSSE-NEXT:    movl (%eax), %ecx
19; X86-NOSSE-NEXT:    movl %ecx, (%esp)
20; X86-NOSSE-NEXT:    flds (%esp)
21; X86-NOSSE-NEXT:    fadds {{[0-9]+}}(%esp)
22; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
23; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
24; X86-NOSSE-NEXT:    movl %ecx, (%eax)
25; X86-NOSSE-NEXT:    addl $8, %esp
26; X86-NOSSE-NEXT:    retl
27;
28; X86-SSE1-LABEL: fadd_32r:
29; X86-SSE1:       # %bb.0:
30; X86-SSE1-NEXT:    subl $8, %esp
31; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
32; X86-SSE1-NEXT:    movl (%eax), %ecx
33; X86-SSE1-NEXT:    movl %ecx, (%esp)
34; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
35; X86-SSE1-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
36; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
37; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
38; X86-SSE1-NEXT:    movl %ecx, (%eax)
39; X86-SSE1-NEXT:    addl $8, %esp
40; X86-SSE1-NEXT:    retl
41;
42; X86-SSE2-LABEL: fadd_32r:
43; X86-SSE2:       # %bb.0:
44; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
45; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
46; X86-SSE2-NEXT:    addss (%eax), %xmm0
47; X86-SSE2-NEXT:    movss %xmm0, (%eax)
48; X86-SSE2-NEXT:    retl
49;
50; X86-AVX-LABEL: fadd_32r:
51; X86-AVX:       # %bb.0:
52; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
53; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
54; X86-AVX-NEXT:    vaddss (%eax), %xmm0, %xmm0
55; X86-AVX-NEXT:    vmovss %xmm0, (%eax)
56; X86-AVX-NEXT:    retl
57;
58; X64-SSE-LABEL: fadd_32r:
59; X64-SSE:       # %bb.0:
60; X64-SSE-NEXT:    addss (%rdi), %xmm0
61; X64-SSE-NEXT:    movss %xmm0, (%rdi)
62; X64-SSE-NEXT:    retq
63;
64; X64-AVX-LABEL: fadd_32r:
65; X64-AVX:       # %bb.0:
66; X64-AVX-NEXT:    vaddss (%rdi), %xmm0, %xmm0
67; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
68; X64-AVX-NEXT:    retq
69  %1 = load atomic i32, ptr %loc seq_cst, align 4
70  %2 = bitcast i32 %1 to float
71  %add = fadd float %2, %val
72  %3 = bitcast float %add to i32
73  store atomic i32 %3, ptr %loc release, align 4
74  ret void
75}
76
77define dso_local void @fadd_64r(ptr %loc, double %val) nounwind {
78; X86-NOSSE-LABEL: fadd_64r:
79; X86-NOSSE:       # %bb.0:
80; X86-NOSSE-NEXT:    pushl %ebp
81; X86-NOSSE-NEXT:    movl %esp, %ebp
82; X86-NOSSE-NEXT:    andl $-8, %esp
83; X86-NOSSE-NEXT:    subl $32, %esp
84; X86-NOSSE-NEXT:    movl 8(%ebp), %eax
85; X86-NOSSE-NEXT:    fildll (%eax)
86; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
87; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
88; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
89; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
90; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
91; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
92; X86-NOSSE-NEXT:    faddl 12(%ebp)
93; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
94; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
95; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
96; X86-NOSSE-NEXT:    movl %ecx, (%esp)
97; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
98; X86-NOSSE-NEXT:    fildll (%esp)
99; X86-NOSSE-NEXT:    fistpll (%eax)
100; X86-NOSSE-NEXT:    movl %ebp, %esp
101; X86-NOSSE-NEXT:    popl %ebp
102; X86-NOSSE-NEXT:    retl
103;
104; X86-SSE1-LABEL: fadd_64r:
105; X86-SSE1:       # %bb.0:
106; X86-SSE1-NEXT:    pushl %ebp
107; X86-SSE1-NEXT:    movl %esp, %ebp
108; X86-SSE1-NEXT:    andl $-8, %esp
109; X86-SSE1-NEXT:    subl $16, %esp
110; X86-SSE1-NEXT:    movl 8(%ebp), %eax
111; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
112; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
113; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
114; X86-SSE1-NEXT:    movss %xmm1, (%esp)
115; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
116; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
117; X86-SSE1-NEXT:    fldl (%esp)
118; X86-SSE1-NEXT:    faddl 12(%ebp)
119; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
120; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
121; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
122; X86-SSE1-NEXT:    movl %ebp, %esp
123; X86-SSE1-NEXT:    popl %ebp
124; X86-SSE1-NEXT:    retl
125;
126; X86-SSE2-LABEL: fadd_64r:
127; X86-SSE2:       # %bb.0:
128; X86-SSE2-NEXT:    pushl %ebp
129; X86-SSE2-NEXT:    movl %esp, %ebp
130; X86-SSE2-NEXT:    andl $-8, %esp
131; X86-SSE2-NEXT:    subl $8, %esp
132; X86-SSE2-NEXT:    movl 8(%ebp), %eax
133; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
134; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
135; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
136; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
137; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
138; X86-SSE2-NEXT:    movl %ebp, %esp
139; X86-SSE2-NEXT:    popl %ebp
140; X86-SSE2-NEXT:    retl
141;
142; X86-AVX-LABEL: fadd_64r:
143; X86-AVX:       # %bb.0:
144; X86-AVX-NEXT:    pushl %ebp
145; X86-AVX-NEXT:    movl %esp, %ebp
146; X86-AVX-NEXT:    andl $-8, %esp
147; X86-AVX-NEXT:    subl $8, %esp
148; X86-AVX-NEXT:    movl 8(%ebp), %eax
149; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
150; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
151; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
152; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
153; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
154; X86-AVX-NEXT:    movl %ebp, %esp
155; X86-AVX-NEXT:    popl %ebp
156; X86-AVX-NEXT:    retl
157;
158; X64-SSE-LABEL: fadd_64r:
159; X64-SSE:       # %bb.0:
160; X64-SSE-NEXT:    addsd (%rdi), %xmm0
161; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
162; X64-SSE-NEXT:    retq
163;
164; X64-AVX-LABEL: fadd_64r:
165; X64-AVX:       # %bb.0:
166; X64-AVX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0
167; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
168; X64-AVX-NEXT:    retq
169  %1 = load atomic i64, ptr %loc seq_cst, align 8
170  %2 = bitcast i64 %1 to double
171  %add = fadd double %2, %val
172  %3 = bitcast double %add to i64
173  store atomic i64 %3, ptr %loc release, align 8
174  ret void
175}
176
177@glob32 = dso_local global float 0.000000e+00, align 4
178@glob64 = dso_local global double 0.000000e+00, align 8
179
180; Floating-point add to a global using an immediate.
181define dso_local void @fadd_32g() nounwind {
182; X86-NOSSE-LABEL: fadd_32g:
183; X86-NOSSE:       # %bb.0:
184; X86-NOSSE-NEXT:    subl $8, %esp
185; X86-NOSSE-NEXT:    movl glob32, %eax
186; X86-NOSSE-NEXT:    movl %eax, (%esp)
187; X86-NOSSE-NEXT:    fld1
188; X86-NOSSE-NEXT:    fadds (%esp)
189; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
190; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
191; X86-NOSSE-NEXT:    movl %eax, glob32
192; X86-NOSSE-NEXT:    addl $8, %esp
193; X86-NOSSE-NEXT:    retl
194;
195; X86-SSE1-LABEL: fadd_32g:
196; X86-SSE1:       # %bb.0:
197; X86-SSE1-NEXT:    subl $8, %esp
198; X86-SSE1-NEXT:    movl glob32, %eax
199; X86-SSE1-NEXT:    movl %eax, (%esp)
200; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
201; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
202; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
203; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
204; X86-SSE1-NEXT:    movl %eax, glob32
205; X86-SSE1-NEXT:    addl $8, %esp
206; X86-SSE1-NEXT:    retl
207;
208; X86-SSE2-LABEL: fadd_32g:
209; X86-SSE2:       # %bb.0:
210; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211; X86-SSE2-NEXT:    addss glob32, %xmm0
212; X86-SSE2-NEXT:    movss %xmm0, glob32
213; X86-SSE2-NEXT:    retl
214;
215; X86-AVX-LABEL: fadd_32g:
216; X86-AVX:       # %bb.0:
217; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
218; X86-AVX-NEXT:    vaddss glob32, %xmm0, %xmm0
219; X86-AVX-NEXT:    vmovss %xmm0, glob32
220; X86-AVX-NEXT:    retl
221;
222; X64-SSE-LABEL: fadd_32g:
223; X64-SSE:       # %bb.0:
224; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
225; X64-SSE-NEXT:    addss glob32(%rip), %xmm0
226; X64-SSE-NEXT:    movss %xmm0, glob32(%rip)
227; X64-SSE-NEXT:    retq
228;
229; X64-AVX-LABEL: fadd_32g:
230; X64-AVX:       # %bb.0:
231; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
232; X64-AVX-NEXT:    vaddss glob32(%rip), %xmm0, %xmm0
233; X64-AVX-NEXT:    vmovss %xmm0, glob32(%rip)
234; X64-AVX-NEXT:    retq
235  %i = load atomic i32, ptr @glob32 monotonic, align 4
236  %f = bitcast i32 %i to float
237  %add = fadd float %f, 1.000000e+00
238  %s = bitcast float %add to i32
239  store atomic i32 %s, ptr @glob32 monotonic, align 4
240  ret void
241}
242
243define dso_local void @fadd_64g() nounwind {
244; X86-NOSSE-LABEL: fadd_64g:
245; X86-NOSSE:       # %bb.0:
246; X86-NOSSE-NEXT:    pushl %ebp
247; X86-NOSSE-NEXT:    movl %esp, %ebp
248; X86-NOSSE-NEXT:    andl $-8, %esp
249; X86-NOSSE-NEXT:    subl $32, %esp
250; X86-NOSSE-NEXT:    fildll glob64
251; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
252; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
253; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
254; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
255; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
256; X86-NOSSE-NEXT:    fld1
257; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
258; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
259; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
260; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
261; X86-NOSSE-NEXT:    movl %eax, (%esp)
262; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
263; X86-NOSSE-NEXT:    fildll (%esp)
264; X86-NOSSE-NEXT:    fistpll glob64
265; X86-NOSSE-NEXT:    movl %ebp, %esp
266; X86-NOSSE-NEXT:    popl %ebp
267; X86-NOSSE-NEXT:    retl
268;
269; X86-SSE1-LABEL: fadd_64g:
270; X86-SSE1:       # %bb.0:
271; X86-SSE1-NEXT:    pushl %ebp
272; X86-SSE1-NEXT:    movl %esp, %ebp
273; X86-SSE1-NEXT:    andl $-8, %esp
274; X86-SSE1-NEXT:    subl $16, %esp
275; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
276; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
277; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
278; X86-SSE1-NEXT:    movss %xmm1, (%esp)
279; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
280; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
281; X86-SSE1-NEXT:    fld1
282; X86-SSE1-NEXT:    faddl (%esp)
283; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
284; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
285; X86-SSE1-NEXT:    movlps %xmm0, glob64
286; X86-SSE1-NEXT:    movl %ebp, %esp
287; X86-SSE1-NEXT:    popl %ebp
288; X86-SSE1-NEXT:    retl
289;
290; X86-SSE2-LABEL: fadd_64g:
291; X86-SSE2:       # %bb.0:
292; X86-SSE2-NEXT:    pushl %ebp
293; X86-SSE2-NEXT:    movl %esp, %ebp
294; X86-SSE2-NEXT:    andl $-8, %esp
295; X86-SSE2-NEXT:    subl $8, %esp
296; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
297; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
298; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
299; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
300; X86-SSE2-NEXT:    movlps %xmm0, glob64
301; X86-SSE2-NEXT:    movl %ebp, %esp
302; X86-SSE2-NEXT:    popl %ebp
303; X86-SSE2-NEXT:    retl
304;
305; X86-AVX-LABEL: fadd_64g:
306; X86-AVX:       # %bb.0:
307; X86-AVX-NEXT:    pushl %ebp
308; X86-AVX-NEXT:    movl %esp, %ebp
309; X86-AVX-NEXT:    andl $-8, %esp
310; X86-AVX-NEXT:    subl $8, %esp
311; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
312; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
313; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
314; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
315; X86-AVX-NEXT:    vmovlps %xmm0, glob64
316; X86-AVX-NEXT:    movl %ebp, %esp
317; X86-AVX-NEXT:    popl %ebp
318; X86-AVX-NEXT:    retl
319;
320; X64-SSE-LABEL: fadd_64g:
321; X64-SSE:       # %bb.0:
322; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
323; X64-SSE-NEXT:    addsd glob64(%rip), %xmm0
324; X64-SSE-NEXT:    movsd %xmm0, glob64(%rip)
325; X64-SSE-NEXT:    retq
326;
327; X64-AVX-LABEL: fadd_64g:
328; X64-AVX:       # %bb.0:
329; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
330; X64-AVX-NEXT:    vaddsd glob64(%rip), %xmm0, %xmm0
331; X64-AVX-NEXT:    vmovsd %xmm0, glob64(%rip)
332; X64-AVX-NEXT:    retq
333  %i = load atomic i64, ptr @glob64 monotonic, align 8
334  %f = bitcast i64 %i to double
335  %add = fadd double %f, 1.000000e+00
336  %s = bitcast double %add to i64
337  store atomic i64 %s, ptr @glob64 monotonic, align 8
338  ret void
339}
340
341; Floating-point add to a hard-coded immediate location using an immediate.
342define dso_local void @fadd_32imm() nounwind {
343; X86-NOSSE-LABEL: fadd_32imm:
344; X86-NOSSE:       # %bb.0:
345; X86-NOSSE-NEXT:    subl $8, %esp
346; X86-NOSSE-NEXT:    movl -559038737, %eax
347; X86-NOSSE-NEXT:    movl %eax, (%esp)
348; X86-NOSSE-NEXT:    fld1
349; X86-NOSSE-NEXT:    fadds (%esp)
350; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
351; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
352; X86-NOSSE-NEXT:    movl %eax, -559038737
353; X86-NOSSE-NEXT:    addl $8, %esp
354; X86-NOSSE-NEXT:    retl
355;
356; X86-SSE1-LABEL: fadd_32imm:
357; X86-SSE1:       # %bb.0:
358; X86-SSE1-NEXT:    subl $8, %esp
359; X86-SSE1-NEXT:    movl -559038737, %eax
360; X86-SSE1-NEXT:    movl %eax, (%esp)
361; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
362; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
363; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
364; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
365; X86-SSE1-NEXT:    movl %eax, -559038737
366; X86-SSE1-NEXT:    addl $8, %esp
367; X86-SSE1-NEXT:    retl
368;
369; X86-SSE2-LABEL: fadd_32imm:
370; X86-SSE2:       # %bb.0:
371; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
372; X86-SSE2-NEXT:    addss -559038737, %xmm0
373; X86-SSE2-NEXT:    movss %xmm0, -559038737
374; X86-SSE2-NEXT:    retl
375;
376; X86-AVX-LABEL: fadd_32imm:
377; X86-AVX:       # %bb.0:
378; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
379; X86-AVX-NEXT:    vaddss -559038737, %xmm0, %xmm0
380; X86-AVX-NEXT:    vmovss %xmm0, -559038737
381; X86-AVX-NEXT:    retl
382;
383; X64-SSE-LABEL: fadd_32imm:
384; X64-SSE:       # %bb.0:
385; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
386; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
387; X64-SSE-NEXT:    addss (%rax), %xmm0
388; X64-SSE-NEXT:    movss %xmm0, (%rax)
389; X64-SSE-NEXT:    retq
390;
391; X64-AVX-LABEL: fadd_32imm:
392; X64-AVX:       # %bb.0:
393; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
394; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
395; X64-AVX-NEXT:    vaddss (%rax), %xmm0, %xmm0
396; X64-AVX-NEXT:    vmovss %xmm0, (%rax)
397; X64-AVX-NEXT:    retq
398  %i = load atomic i32, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
399  %f = bitcast i32 %i to float
400  %add = fadd float %f, 1.000000e+00
401  %s = bitcast float %add to i32
402  store atomic i32 %s, ptr inttoptr (i32 3735928559 to ptr) monotonic, align 4
403  ret void
404}
405
406define dso_local void @fadd_64imm() nounwind {
407; X86-NOSSE-LABEL: fadd_64imm:
408; X86-NOSSE:       # %bb.0:
409; X86-NOSSE-NEXT:    pushl %ebp
410; X86-NOSSE-NEXT:    movl %esp, %ebp
411; X86-NOSSE-NEXT:    andl $-8, %esp
412; X86-NOSSE-NEXT:    subl $32, %esp
413; X86-NOSSE-NEXT:    fildll -559038737
414; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
415; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
416; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
417; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
418; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
419; X86-NOSSE-NEXT:    fld1
420; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
421; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
422; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
423; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
424; X86-NOSSE-NEXT:    movl %eax, (%esp)
425; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
426; X86-NOSSE-NEXT:    fildll (%esp)
427; X86-NOSSE-NEXT:    fistpll -559038737
428; X86-NOSSE-NEXT:    movl %ebp, %esp
429; X86-NOSSE-NEXT:    popl %ebp
430; X86-NOSSE-NEXT:    retl
431;
432; X86-SSE1-LABEL: fadd_64imm:
433; X86-SSE1:       # %bb.0:
434; X86-SSE1-NEXT:    pushl %ebp
435; X86-SSE1-NEXT:    movl %esp, %ebp
436; X86-SSE1-NEXT:    andl $-8, %esp
437; X86-SSE1-NEXT:    subl $16, %esp
438; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
439; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
440; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
441; X86-SSE1-NEXT:    movss %xmm1, (%esp)
442; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
443; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
444; X86-SSE1-NEXT:    fld1
445; X86-SSE1-NEXT:    faddl (%esp)
446; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
447; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
448; X86-SSE1-NEXT:    movlps %xmm0, -559038737
449; X86-SSE1-NEXT:    movl %ebp, %esp
450; X86-SSE1-NEXT:    popl %ebp
451; X86-SSE1-NEXT:    retl
452;
453; X86-SSE2-LABEL: fadd_64imm:
454; X86-SSE2:       # %bb.0:
455; X86-SSE2-NEXT:    pushl %ebp
456; X86-SSE2-NEXT:    movl %esp, %ebp
457; X86-SSE2-NEXT:    andl $-8, %esp
458; X86-SSE2-NEXT:    subl $8, %esp
459; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
460; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
461; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
462; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
463; X86-SSE2-NEXT:    movlps %xmm0, -559038737
464; X86-SSE2-NEXT:    movl %ebp, %esp
465; X86-SSE2-NEXT:    popl %ebp
466; X86-SSE2-NEXT:    retl
467;
468; X86-AVX-LABEL: fadd_64imm:
469; X86-AVX:       # %bb.0:
470; X86-AVX-NEXT:    pushl %ebp
471; X86-AVX-NEXT:    movl %esp, %ebp
472; X86-AVX-NEXT:    andl $-8, %esp
473; X86-AVX-NEXT:    subl $8, %esp
474; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
475; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
476; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
477; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
478; X86-AVX-NEXT:    vmovlps %xmm0, -559038737
479; X86-AVX-NEXT:    movl %ebp, %esp
480; X86-AVX-NEXT:    popl %ebp
481; X86-AVX-NEXT:    retl
482;
483; X64-SSE-LABEL: fadd_64imm:
484; X64-SSE:       # %bb.0:
485; X64-SSE-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
486; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
487; X64-SSE-NEXT:    addsd (%rax), %xmm0
488; X64-SSE-NEXT:    movsd %xmm0, (%rax)
489; X64-SSE-NEXT:    retq
490;
491; X64-AVX-LABEL: fadd_64imm:
492; X64-AVX:       # %bb.0:
493; X64-AVX-NEXT:    movl $3735928559, %eax # imm = 0xDEADBEEF
494; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
495; X64-AVX-NEXT:    vaddsd (%rax), %xmm0, %xmm0
496; X64-AVX-NEXT:    vmovsd %xmm0, (%rax)
497; X64-AVX-NEXT:    retq
498  %i = load atomic i64, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
499  %f = bitcast i64 %i to double
500  %add = fadd double %f, 1.000000e+00
501  %s = bitcast double %add to i64
502  store atomic i64 %s, ptr inttoptr (i64 3735928559 to ptr) monotonic, align 8
503  ret void
504}
505
506; Floating-point add to a stack location.
507define dso_local void @fadd_32stack() nounwind {
508; X86-NOSSE-LABEL: fadd_32stack:
509; X86-NOSSE:       # %bb.0:
510; X86-NOSSE-NEXT:    subl $12, %esp
511; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
512; X86-NOSSE-NEXT:    movl %eax, (%esp)
513; X86-NOSSE-NEXT:    fld1
514; X86-NOSSE-NEXT:    fadds (%esp)
515; X86-NOSSE-NEXT:    fstps {{[0-9]+}}(%esp)
516; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
517; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
518; X86-NOSSE-NEXT:    addl $12, %esp
519; X86-NOSSE-NEXT:    retl
520;
521; X86-SSE1-LABEL: fadd_32stack:
522; X86-SSE1:       # %bb.0:
523; X86-SSE1-NEXT:    subl $12, %esp
524; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
525; X86-SSE1-NEXT:    movl %eax, (%esp)
526; X86-SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
527; X86-SSE1-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
528; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
529; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
530; X86-SSE1-NEXT:    movl %eax, {{[0-9]+}}(%esp)
531; X86-SSE1-NEXT:    addl $12, %esp
532; X86-SSE1-NEXT:    retl
533;
534; X86-SSE2-LABEL: fadd_32stack:
535; X86-SSE2:       # %bb.0:
536; X86-SSE2-NEXT:    pushl %eax
537; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
538; X86-SSE2-NEXT:    addss (%esp), %xmm0
539; X86-SSE2-NEXT:    movss %xmm0, (%esp)
540; X86-SSE2-NEXT:    popl %eax
541; X86-SSE2-NEXT:    retl
542;
543; X86-AVX-LABEL: fadd_32stack:
544; X86-AVX:       # %bb.0:
545; X86-AVX-NEXT:    pushl %eax
546; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
547; X86-AVX-NEXT:    vaddss (%esp), %xmm0, %xmm0
548; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
549; X86-AVX-NEXT:    popl %eax
550; X86-AVX-NEXT:    retl
551;
552; X64-SSE-LABEL: fadd_32stack:
553; X64-SSE:       # %bb.0:
554; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
555; X64-SSE-NEXT:    addss -{{[0-9]+}}(%rsp), %xmm0
556; X64-SSE-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
557; X64-SSE-NEXT:    retq
558;
559; X64-AVX-LABEL: fadd_32stack:
560; X64-AVX:       # %bb.0:
561; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
562; X64-AVX-NEXT:    vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
563; X64-AVX-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
564; X64-AVX-NEXT:    retq
565  %ptr = alloca i32, align 4
566  %load = load atomic i32, ptr %ptr acquire, align 4
567  %bc0 = bitcast i32 %load to float
568  %fadd = fadd float 1.000000e+00, %bc0
569  %bc1 = bitcast float %fadd to i32
570  store atomic i32 %bc1, ptr %ptr release, align 4
571  ret void
572}
573
574define dso_local void @fadd_64stack() nounwind {
575; X86-NOSSE-LABEL: fadd_64stack:
576; X86-NOSSE:       # %bb.0:
577; X86-NOSSE-NEXT:    pushl %ebp
578; X86-NOSSE-NEXT:    movl %esp, %ebp
579; X86-NOSSE-NEXT:    andl $-8, %esp
580; X86-NOSSE-NEXT:    subl $40, %esp
581; X86-NOSSE-NEXT:    fildll {{[0-9]+}}(%esp)
582; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
583; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
584; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
585; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
586; X86-NOSSE-NEXT:    movl %eax, {{[0-9]+}}(%esp)
587; X86-NOSSE-NEXT:    fld1
588; X86-NOSSE-NEXT:    faddl {{[0-9]+}}(%esp)
589; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
590; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
591; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
592; X86-NOSSE-NEXT:    movl %eax, (%esp)
593; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
594; X86-NOSSE-NEXT:    fildll (%esp)
595; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
596; X86-NOSSE-NEXT:    movl %ebp, %esp
597; X86-NOSSE-NEXT:    popl %ebp
598; X86-NOSSE-NEXT:    retl
599;
600; X86-SSE1-LABEL: fadd_64stack:
601; X86-SSE1:       # %bb.0:
602; X86-SSE1-NEXT:    pushl %ebp
603; X86-SSE1-NEXT:    movl %esp, %ebp
604; X86-SSE1-NEXT:    andl $-8, %esp
605; X86-SSE1-NEXT:    subl $24, %esp
606; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
607; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
608; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
609; X86-SSE1-NEXT:    movss %xmm1, (%esp)
610; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
611; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
612; X86-SSE1-NEXT:    fld1
613; X86-SSE1-NEXT:    faddl (%esp)
614; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
615; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
616; X86-SSE1-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
617; X86-SSE1-NEXT:    movl %ebp, %esp
618; X86-SSE1-NEXT:    popl %ebp
619; X86-SSE1-NEXT:    retl
620;
621; X86-SSE2-LABEL: fadd_64stack:
622; X86-SSE2:       # %bb.0:
623; X86-SSE2-NEXT:    pushl %ebp
624; X86-SSE2-NEXT:    movl %esp, %ebp
625; X86-SSE2-NEXT:    andl $-8, %esp
626; X86-SSE2-NEXT:    subl $16, %esp
627; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
628; X86-SSE2-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
629; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
630; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
631; X86-SSE2-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
632; X86-SSE2-NEXT:    movl %ebp, %esp
633; X86-SSE2-NEXT:    popl %ebp
634; X86-SSE2-NEXT:    retl
635;
636; X86-AVX-LABEL: fadd_64stack:
637; X86-AVX:       # %bb.0:
638; X86-AVX-NEXT:    pushl %ebp
639; X86-AVX-NEXT:    movl %esp, %ebp
640; X86-AVX-NEXT:    andl $-8, %esp
641; X86-AVX-NEXT:    subl $16, %esp
642; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
643; X86-AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
644; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
645; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
646; X86-AVX-NEXT:    vmovlps %xmm0, {{[0-9]+}}(%esp)
647; X86-AVX-NEXT:    movl %ebp, %esp
648; X86-AVX-NEXT:    popl %ebp
649; X86-AVX-NEXT:    retl
650;
651; X64-SSE-LABEL: fadd_64stack:
652; X64-SSE:       # %bb.0:
653; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
654; X64-SSE-NEXT:    addsd -{{[0-9]+}}(%rsp), %xmm0
655; X64-SSE-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
656; X64-SSE-NEXT:    retq
657;
658; X64-AVX-LABEL: fadd_64stack:
659; X64-AVX:       # %bb.0:
660; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
661; X64-AVX-NEXT:    vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
662; X64-AVX-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
663; X64-AVX-NEXT:    retq
664  %ptr = alloca i64, align 8
665  %load = load atomic i64, ptr %ptr acquire, align 8
666  %bc0 = bitcast i64 %load to double
667  %fadd = fadd double 1.000000e+00, %bc0
668  %bc1 = bitcast double %fadd to i64
669  store atomic i64 %bc1, ptr %ptr release, align 8
670  ret void
671}
672
673define dso_local void @fadd_array(ptr %arg, double %arg1, i64 %arg2) nounwind {
674; X86-NOSSE-LABEL: fadd_array:
675; X86-NOSSE:       # %bb.0: # %bb
676; X86-NOSSE-NEXT:    pushl %ebp
677; X86-NOSSE-NEXT:    movl %esp, %ebp
678; X86-NOSSE-NEXT:    pushl %esi
679; X86-NOSSE-NEXT:    andl $-8, %esp
680; X86-NOSSE-NEXT:    subl $40, %esp
681; X86-NOSSE-NEXT:    movl 20(%ebp), %eax
682; X86-NOSSE-NEXT:    movl 8(%ebp), %ecx
683; X86-NOSSE-NEXT:    fildll (%ecx,%eax,8)
684; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
685; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
686; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
687; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
688; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
689; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
690; X86-NOSSE-NEXT:    faddl 12(%ebp)
691; X86-NOSSE-NEXT:    fstpl {{[0-9]+}}(%esp)
692; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
693; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
694; X86-NOSSE-NEXT:    movl %edx, (%esp)
695; X86-NOSSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)
696; X86-NOSSE-NEXT:    fildll (%esp)
697; X86-NOSSE-NEXT:    fistpll (%ecx,%eax,8)
698; X86-NOSSE-NEXT:    leal -4(%ebp), %esp
699; X86-NOSSE-NEXT:    popl %esi
700; X86-NOSSE-NEXT:    popl %ebp
701; X86-NOSSE-NEXT:    retl
702;
703; X86-SSE1-LABEL: fadd_array:
704; X86-SSE1:       # %bb.0: # %bb
705; X86-SSE1-NEXT:    pushl %ebp
706; X86-SSE1-NEXT:    movl %esp, %ebp
707; X86-SSE1-NEXT:    andl $-8, %esp
708; X86-SSE1-NEXT:    subl $16, %esp
709; X86-SSE1-NEXT:    movl 20(%ebp), %eax
710; X86-SSE1-NEXT:    movl 8(%ebp), %ecx
711; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
712; X86-SSE1-NEXT:    xorps %xmm1, %xmm1
713; X86-SSE1-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
714; X86-SSE1-NEXT:    movss %xmm1, (%esp)
715; X86-SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
716; X86-SSE1-NEXT:    movss %xmm1, {{[0-9]+}}(%esp)
717; X86-SSE1-NEXT:    fldl (%esp)
718; X86-SSE1-NEXT:    faddl 12(%ebp)
719; X86-SSE1-NEXT:    fstpl {{[0-9]+}}(%esp)
720; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
721; X86-SSE1-NEXT:    movlps %xmm0, (%ecx,%eax,8)
722; X86-SSE1-NEXT:    movl %ebp, %esp
723; X86-SSE1-NEXT:    popl %ebp
724; X86-SSE1-NEXT:    retl
725;
726; X86-SSE2-LABEL: fadd_array:
727; X86-SSE2:       # %bb.0: # %bb
728; X86-SSE2-NEXT:    pushl %ebp
729; X86-SSE2-NEXT:    movl %esp, %ebp
730; X86-SSE2-NEXT:    andl $-8, %esp
731; X86-SSE2-NEXT:    subl $8, %esp
732; X86-SSE2-NEXT:    movl 20(%ebp), %eax
733; X86-SSE2-NEXT:    movl 8(%ebp), %ecx
734; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
735; X86-SSE2-NEXT:    addsd 12(%ebp), %xmm0
736; X86-SSE2-NEXT:    movsd %xmm0, (%esp)
737; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
738; X86-SSE2-NEXT:    movlps %xmm0, (%ecx,%eax,8)
739; X86-SSE2-NEXT:    movl %ebp, %esp
740; X86-SSE2-NEXT:    popl %ebp
741; X86-SSE2-NEXT:    retl
742;
743; X86-AVX-LABEL: fadd_array:
744; X86-AVX:       # %bb.0: # %bb
745; X86-AVX-NEXT:    pushl %ebp
746; X86-AVX-NEXT:    movl %esp, %ebp
747; X86-AVX-NEXT:    andl $-8, %esp
748; X86-AVX-NEXT:    subl $8, %esp
749; X86-AVX-NEXT:    movl 20(%ebp), %eax
750; X86-AVX-NEXT:    movl 8(%ebp), %ecx
751; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
752; X86-AVX-NEXT:    vaddsd 12(%ebp), %xmm0, %xmm0
753; X86-AVX-NEXT:    vmovsd %xmm0, (%esp)
754; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
755; X86-AVX-NEXT:    vmovlps %xmm0, (%ecx,%eax,8)
756; X86-AVX-NEXT:    movl %ebp, %esp
757; X86-AVX-NEXT:    popl %ebp
758; X86-AVX-NEXT:    retl
759;
760; X64-SSE-LABEL: fadd_array:
761; X64-SSE:       # %bb.0: # %bb
762; X64-SSE-NEXT:    addsd (%rdi,%rsi,8), %xmm0
763; X64-SSE-NEXT:    movsd %xmm0, (%rdi,%rsi,8)
764; X64-SSE-NEXT:    retq
765;
766; X64-AVX-LABEL: fadd_array:
767; X64-AVX:       # %bb.0: # %bb
768; X64-AVX-NEXT:    vaddsd (%rdi,%rsi,8), %xmm0, %xmm0
769; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi,%rsi,8)
770; X64-AVX-NEXT:    retq
771bb:
772  %tmp4 = getelementptr inbounds i64, ptr %arg, i64 %arg2
773  %tmp6 = load atomic i64, ptr %tmp4 monotonic, align 8
774  %tmp7 = bitcast i64 %tmp6 to double
775  %tmp8 = fadd double %tmp7, %arg1
776  %tmp9 = bitcast double %tmp8 to i64
777  store atomic i64 %tmp9, ptr %tmp4 monotonic, align 8
778  ret void
779}
780