1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1
3; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
4; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
5; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
6; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOSSE
7; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X64,X64-SSE
8; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
9; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
10
11; Note: This test is testing that the lowering for atomics matches what we
12; currently emit for non-atomics + the atomic restriction.  The presence of
13; particular lowering detail in these tests should not be read as requiring
14; that detail for correctness unless it's related to the atomicity itself.
15; (Specifically, there were reviewer questions about the lowering for halfs
16;  and their calling convention which remain unresolved.)
17
18define void @store_half(half* %fptr, half %v) {
19; X86-LABEL: store_half:
20; X86:       # %bb.0:
21; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
22; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
23; X86-NEXT:    movw %ax, (%ecx)
24; X86-NEXT:    retl
25;
26; X64-LABEL: store_half:
27; X64:       # %bb.0:
28; X64-NEXT:    movw %si, (%rdi)
29; X64-NEXT:    retq
30  store atomic half %v, half* %fptr unordered, align 2
31  ret void
32}
33
34define void @store_float(float* %fptr, float %v) {
35; X86-LABEL: store_float:
36; X86:       # %bb.0:
37; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
38; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
39; X86-NEXT:    movl %ecx, (%eax)
40; X86-NEXT:    retl
41;
42; X64-SSE-LABEL: store_float:
43; X64-SSE:       # %bb.0:
44; X64-SSE-NEXT:    movss %xmm0, (%rdi)
45; X64-SSE-NEXT:    retq
46;
47; X64-AVX-LABEL: store_float:
48; X64-AVX:       # %bb.0:
49; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
50; X64-AVX-NEXT:    retq
51  store atomic float %v, float* %fptr unordered, align 4
52  ret void
53}
54
55define void @store_double(double* %fptr, double %v) {
56; X86-SSE1-LABEL: store_double:
57; X86-SSE1:       # %bb.0:
58; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
59; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
60; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
61; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
62; X86-SSE1-NEXT:    retl
63;
64; X86-SSE2-LABEL: store_double:
65; X86-SSE2:       # %bb.0:
66; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
67; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
68; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
69; X86-SSE2-NEXT:    retl
70;
71; X86-AVX-LABEL: store_double:
72; X86-AVX:       # %bb.0:
73; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
74; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
75; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
76; X86-AVX-NEXT:    retl
77;
78; X86-NOSSE-LABEL: store_double:
79; X86-NOSSE:       # %bb.0:
80; X86-NOSSE-NEXT:    subl $12, %esp
81; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
82; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
83; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
84; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
85; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
86; X86-NOSSE-NEXT:    movl %ecx, (%esp)
87; X86-NOSSE-NEXT:    fildll (%esp)
88; X86-NOSSE-NEXT:    fistpll (%eax)
89; X86-NOSSE-NEXT:    addl $12, %esp
90; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
91; X86-NOSSE-NEXT:    retl
92;
93; X64-SSE-LABEL: store_double:
94; X64-SSE:       # %bb.0:
95; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
96; X64-SSE-NEXT:    retq
97;
98; X64-AVX-LABEL: store_double:
99; X64-AVX:       # %bb.0:
100; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
101; X64-AVX-NEXT:    retq
102  store atomic double %v, double* %fptr unordered, align 8
103  ret void
104}
105
106define void @store_fp128(fp128* %fptr, fp128 %v) {
107; X86-SSE-LABEL: store_fp128:
108; X86-SSE:       # %bb.0:
109; X86-SSE-NEXT:    subl $36, %esp
110; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 36
111; X86-SSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
112; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
113; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
114; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
115; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
116; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
117; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
118; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
119; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
120; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
121; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
122; X86-SSE-NEXT:    pushl %eax
123; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
124; X86-SSE-NEXT:    calll __sync_lock_test_and_set_16
125; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -4
126; X86-SSE-NEXT:    addl $56, %esp
127; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -56
128; X86-SSE-NEXT:    retl
129;
130; X86-AVX-LABEL: store_fp128:
131; X86-AVX:       # %bb.0:
132; X86-AVX-NEXT:    subl $44, %esp
133; X86-AVX-NEXT:    .cfi_def_cfa_offset 48
134; X86-AVX-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
135; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
136; X86-AVX-NEXT:    movl %eax, {{[0-9]+}}(%esp)
137; X86-AVX-NEXT:    vmovups %xmm0, {{[0-9]+}}(%esp)
138; X86-AVX-NEXT:    leal {{[0-9]+}}(%esp), %eax
139; X86-AVX-NEXT:    movl %eax, (%esp)
140; X86-AVX-NEXT:    calll __sync_lock_test_and_set_16
141; X86-AVX-NEXT:    addl $40, %esp
142; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
143; X86-AVX-NEXT:    retl
144;
145; X86-NOSSE-LABEL: store_fp128:
146; X86-NOSSE:       # %bb.0:
147; X86-NOSSE-NEXT:    subl $36, %esp
148; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 36
149; X86-NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
150; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
151; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
152; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
153; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
154; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
155; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
156; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
157; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
158; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
159; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
160; X86-NOSSE-NEXT:    pushl %eax
161; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
162; X86-NOSSE-NEXT:    calll __sync_lock_test_and_set_16
163; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -4
164; X86-NOSSE-NEXT:    addl $56, %esp
165; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -56
166; X86-NOSSE-NEXT:    retl
167;
168; X64-SSE-LABEL: store_fp128:
169; X64-SSE:       # %bb.0:
170; X64-SSE-NEXT:    subq $24, %rsp
171; X64-SSE-NEXT:    .cfi_def_cfa_offset 32
172; X64-SSE-NEXT:    movaps %xmm0, (%rsp)
173; X64-SSE-NEXT:    movq (%rsp), %rsi
174; X64-SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
175; X64-SSE-NEXT:    callq __sync_lock_test_and_set_16@PLT
176; X64-SSE-NEXT:    addq $24, %rsp
177; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
178; X64-SSE-NEXT:    retq
179;
180; X64-AVX-LABEL: store_fp128:
181; X64-AVX:       # %bb.0:
182; X64-AVX-NEXT:    subq $24, %rsp
183; X64-AVX-NEXT:    .cfi_def_cfa_offset 32
184; X64-AVX-NEXT:    vmovaps %xmm0, (%rsp)
185; X64-AVX-NEXT:    movq (%rsp), %rsi
186; X64-AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
187; X64-AVX-NEXT:    callq __sync_lock_test_and_set_16@PLT
188; X64-AVX-NEXT:    addq $24, %rsp
189; X64-AVX-NEXT:    .cfi_def_cfa_offset 8
190; X64-AVX-NEXT:    retq
191  store atomic fp128 %v, fp128* %fptr unordered, align 16
192  ret void
193}
194
195define half @load_half(half* %fptr) {
196; X86-LABEL: load_half:
197; X86:       # %bb.0:
198; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
199; X86-NEXT:    movzwl (%eax), %eax
200; X86-NEXT:    retl
201;
202; X64-LABEL: load_half:
203; X64:       # %bb.0:
204; X64-NEXT:    movzwl (%rdi), %eax
205; X64-NEXT:    retq
206  %v = load atomic half, half* %fptr unordered, align 2
207  ret half %v
208}
209
210define float @load_float(float* %fptr) {
211; X86-SSE1-LABEL: load_float:
212; X86-SSE1:       # %bb.0:
213; X86-SSE1-NEXT:    pushl %eax
214; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
215; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
216; X86-SSE1-NEXT:    movl (%eax), %eax
217; X86-SSE1-NEXT:    movl %eax, (%esp)
218; X86-SSE1-NEXT:    flds (%esp)
219; X86-SSE1-NEXT:    popl %eax
220; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
221; X86-SSE1-NEXT:    retl
222;
223; X86-SSE2-LABEL: load_float:
224; X86-SSE2:       # %bb.0:
225; X86-SSE2-NEXT:    pushl %eax
226; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
227; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
228; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
229; X86-SSE2-NEXT:    movss %xmm0, (%esp)
230; X86-SSE2-NEXT:    flds (%esp)
231; X86-SSE2-NEXT:    popl %eax
232; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
233; X86-SSE2-NEXT:    retl
234;
235; X86-AVX-LABEL: load_float:
236; X86-AVX:       # %bb.0:
237; X86-AVX-NEXT:    pushl %eax
238; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
239; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
240; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
241; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
242; X86-AVX-NEXT:    flds (%esp)
243; X86-AVX-NEXT:    popl %eax
244; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
245; X86-AVX-NEXT:    retl
246;
247; X86-NOSSE-LABEL: load_float:
248; X86-NOSSE:       # %bb.0:
249; X86-NOSSE-NEXT:    pushl %eax
250; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
251; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
252; X86-NOSSE-NEXT:    movl (%eax), %eax
253; X86-NOSSE-NEXT:    movl %eax, (%esp)
254; X86-NOSSE-NEXT:    flds (%esp)
255; X86-NOSSE-NEXT:    popl %eax
256; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
257; X86-NOSSE-NEXT:    retl
258;
259; X64-SSE-LABEL: load_float:
260; X64-SSE:       # %bb.0:
261; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
262; X64-SSE-NEXT:    retq
263;
264; X64-AVX-LABEL: load_float:
265; X64-AVX:       # %bb.0:
266; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
267; X64-AVX-NEXT:    retq
268  %v = load atomic float, float* %fptr unordered, align 4
269  ret float %v
270}
271
272define double @load_double(double* %fptr) {
273; X86-SSE1-LABEL: load_double:
274; X86-SSE1:       # %bb.0:
275; X86-SSE1-NEXT:    subl $12, %esp
276; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
277; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
278; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
279; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
280; X86-SSE1-NEXT:    movss %xmm0, (%esp)
281; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
282; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
283; X86-SSE1-NEXT:    fldl (%esp)
284; X86-SSE1-NEXT:    addl $12, %esp
285; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
286; X86-SSE1-NEXT:    retl
287;
288; X86-SSE2-LABEL: load_double:
289; X86-SSE2:       # %bb.0:
290; X86-SSE2-NEXT:    subl $12, %esp
291; X86-SSE2-NEXT:    .cfi_def_cfa_offset 16
292; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
293; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
294; X86-SSE2-NEXT:    movlps %xmm0, (%esp)
295; X86-SSE2-NEXT:    fldl (%esp)
296; X86-SSE2-NEXT:    addl $12, %esp
297; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
298; X86-SSE2-NEXT:    retl
299;
300; X86-AVX-LABEL: load_double:
301; X86-AVX:       # %bb.0:
302; X86-AVX-NEXT:    subl $12, %esp
303; X86-AVX-NEXT:    .cfi_def_cfa_offset 16
304; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
305; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
306; X86-AVX-NEXT:    vmovlps %xmm0, (%esp)
307; X86-AVX-NEXT:    fldl (%esp)
308; X86-AVX-NEXT:    addl $12, %esp
309; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
310; X86-AVX-NEXT:    retl
311;
312; X86-NOSSE-LABEL: load_double:
313; X86-NOSSE:       # %bb.0:
314; X86-NOSSE-NEXT:    subl $20, %esp
315; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
316; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
317; X86-NOSSE-NEXT:    fildll (%eax)
318; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
319; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
320; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
321; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
322; X86-NOSSE-NEXT:    movl %eax, (%esp)
323; X86-NOSSE-NEXT:    fldl (%esp)
324; X86-NOSSE-NEXT:    addl $20, %esp
325; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
326; X86-NOSSE-NEXT:    retl
327;
328; X64-SSE-LABEL: load_double:
329; X64-SSE:       # %bb.0:
330; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
331; X64-SSE-NEXT:    retq
332;
333; X64-AVX-LABEL: load_double:
334; X64-AVX:       # %bb.0:
335; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
336; X64-AVX-NEXT:    retq
337  %v = load atomic double, double* %fptr unordered, align 8
338  ret double %v
339}
340
341define fp128 @load_fp128(fp128* %fptr) {
342; X86-SSE-LABEL: load_fp128:
343; X86-SSE:       # %bb.0:
344; X86-SSE-NEXT:    pushl %edi
345; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
346; X86-SSE-NEXT:    pushl %esi
347; X86-SSE-NEXT:    .cfi_def_cfa_offset 12
348; X86-SSE-NEXT:    subl $20, %esp
349; X86-SSE-NEXT:    .cfi_def_cfa_offset 32
350; X86-SSE-NEXT:    .cfi_offset %esi, -12
351; X86-SSE-NEXT:    .cfi_offset %edi, -8
352; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
353; X86-SSE-NEXT:    subl $8, %esp
354; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 8
355; X86-SSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
356; X86-SSE-NEXT:    pushl $0
357; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
358; X86-SSE-NEXT:    pushl $0
359; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
360; X86-SSE-NEXT:    pushl $0
361; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
362; X86-SSE-NEXT:    pushl $0
363; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
364; X86-SSE-NEXT:    pushl $0
365; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
366; X86-SSE-NEXT:    pushl $0
367; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
368; X86-SSE-NEXT:    pushl $0
369; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
370; X86-SSE-NEXT:    pushl $0
371; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
372; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
373; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
374; X86-SSE-NEXT:    pushl %eax
375; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
376; X86-SSE-NEXT:    calll __sync_val_compare_and_swap_16
377; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -4
378; X86-SSE-NEXT:    addl $44, %esp
379; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -44
380; X86-SSE-NEXT:    movl (%esp), %eax
381; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
382; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
383; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
384; X86-SSE-NEXT:    movl %edi, 8(%esi)
385; X86-SSE-NEXT:    movl %edx, 12(%esi)
386; X86-SSE-NEXT:    movl %eax, (%esi)
387; X86-SSE-NEXT:    movl %ecx, 4(%esi)
388; X86-SSE-NEXT:    movl %esi, %eax
389; X86-SSE-NEXT:    addl $20, %esp
390; X86-SSE-NEXT:    .cfi_def_cfa_offset 12
391; X86-SSE-NEXT:    popl %esi
392; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
393; X86-SSE-NEXT:    popl %edi
394; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
395; X86-SSE-NEXT:    retl $4
396;
397; X86-AVX-LABEL: load_fp128:
398; X86-AVX:       # %bb.0:
399; X86-AVX-NEXT:    pushl %esi
400; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
401; X86-AVX-NEXT:    subl $56, %esp
402; X86-AVX-NEXT:    .cfi_def_cfa_offset 64
403; X86-AVX-NEXT:    .cfi_offset %esi, -8
404; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %esi
405; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
406; X86-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
407; X86-AVX-NEXT:    vmovups %ymm0, {{[0-9]+}}(%esp)
408; X86-AVX-NEXT:    movl %eax, {{[0-9]+}}(%esp)
409; X86-AVX-NEXT:    leal {{[0-9]+}}(%esp), %eax
410; X86-AVX-NEXT:    movl %eax, (%esp)
411; X86-AVX-NEXT:    vzeroupper
412; X86-AVX-NEXT:    calll __sync_val_compare_and_swap_16
413; X86-AVX-NEXT:    subl $4, %esp
414; X86-AVX-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
415; X86-AVX-NEXT:    vmovaps %xmm0, (%esi)
416; X86-AVX-NEXT:    movl %esi, %eax
417; X86-AVX-NEXT:    addl $56, %esp
418; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
419; X86-AVX-NEXT:    popl %esi
420; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
421; X86-AVX-NEXT:    retl $4
422;
423; X86-NOSSE-LABEL: load_fp128:
424; X86-NOSSE:       # %bb.0:
425; X86-NOSSE-NEXT:    pushl %edi
426; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
427; X86-NOSSE-NEXT:    pushl %esi
428; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
429; X86-NOSSE-NEXT:    subl $20, %esp
430; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 32
431; X86-NOSSE-NEXT:    .cfi_offset %esi, -12
432; X86-NOSSE-NEXT:    .cfi_offset %edi, -8
433; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
434; X86-NOSSE-NEXT:    subl $8, %esp
435; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 8
436; X86-NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
437; X86-NOSSE-NEXT:    pushl $0
438; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
439; X86-NOSSE-NEXT:    pushl $0
440; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
441; X86-NOSSE-NEXT:    pushl $0
442; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
443; X86-NOSSE-NEXT:    pushl $0
444; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
445; X86-NOSSE-NEXT:    pushl $0
446; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
447; X86-NOSSE-NEXT:    pushl $0
448; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
449; X86-NOSSE-NEXT:    pushl $0
450; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
451; X86-NOSSE-NEXT:    pushl $0
452; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
453; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
454; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
455; X86-NOSSE-NEXT:    pushl %eax
456; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
457; X86-NOSSE-NEXT:    calll __sync_val_compare_and_swap_16
458; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -4
459; X86-NOSSE-NEXT:    addl $44, %esp
460; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -44
461; X86-NOSSE-NEXT:    movl (%esp), %eax
462; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
463; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
464; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
465; X86-NOSSE-NEXT:    movl %edi, 8(%esi)
466; X86-NOSSE-NEXT:    movl %edx, 12(%esi)
467; X86-NOSSE-NEXT:    movl %eax, (%esi)
468; X86-NOSSE-NEXT:    movl %ecx, 4(%esi)
469; X86-NOSSE-NEXT:    movl %esi, %eax
470; X86-NOSSE-NEXT:    addl $20, %esp
471; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
472; X86-NOSSE-NEXT:    popl %esi
473; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
474; X86-NOSSE-NEXT:    popl %edi
475; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
476; X86-NOSSE-NEXT:    retl $4
477;
478; X64-SSE-LABEL: load_fp128:
479; X64-SSE:       # %bb.0:
480; X64-SSE-NEXT:    subq $24, %rsp
481; X64-SSE-NEXT:    .cfi_def_cfa_offset 32
482; X64-SSE-NEXT:    xorl %esi, %esi
483; X64-SSE-NEXT:    xorl %edx, %edx
484; X64-SSE-NEXT:    xorl %ecx, %ecx
485; X64-SSE-NEXT:    xorl %r8d, %r8d
486; X64-SSE-NEXT:    callq __sync_val_compare_and_swap_16@PLT
487; X64-SSE-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
488; X64-SSE-NEXT:    movq %rax, (%rsp)
489; X64-SSE-NEXT:    movaps (%rsp), %xmm0
490; X64-SSE-NEXT:    addq $24, %rsp
491; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
492; X64-SSE-NEXT:    retq
493;
494; X64-AVX-LABEL: load_fp128:
495; X64-AVX:       # %bb.0:
496; X64-AVX-NEXT:    subq $24, %rsp
497; X64-AVX-NEXT:    .cfi_def_cfa_offset 32
498; X64-AVX-NEXT:    xorl %esi, %esi
499; X64-AVX-NEXT:    xorl %edx, %edx
500; X64-AVX-NEXT:    xorl %ecx, %ecx
501; X64-AVX-NEXT:    xorl %r8d, %r8d
502; X64-AVX-NEXT:    callq __sync_val_compare_and_swap_16@PLT
503; X64-AVX-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
504; X64-AVX-NEXT:    movq %rax, (%rsp)
505; X64-AVX-NEXT:    vmovaps (%rsp), %xmm0
506; X64-AVX-NEXT:    addq $24, %rsp
507; X64-AVX-NEXT:    .cfi_def_cfa_offset 8
508; X64-AVX-NEXT:    retq
509  %v = load atomic fp128, fp128* %fptr unordered, align 16
510  ret fp128 %v
511}
512
513
514; Check the seq_cst lowering since that's the
515; interesting one from an ordering perspective on x86.
516
517define void @store_float_seq_cst(float* %fptr, float %v) {
518; X86-LABEL: store_float_seq_cst:
519; X86:       # %bb.0:
520; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
521; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
522; X86-NEXT:    xchgl %ecx, (%eax)
523; X86-NEXT:    retl
524;
525; X64-SSE-LABEL: store_float_seq_cst:
526; X64-SSE:       # %bb.0:
527; X64-SSE-NEXT:    movd %xmm0, %eax
528; X64-SSE-NEXT:    xchgl %eax, (%rdi)
529; X64-SSE-NEXT:    retq
530;
531; X64-AVX-LABEL: store_float_seq_cst:
532; X64-AVX:       # %bb.0:
533; X64-AVX-NEXT:    vmovd %xmm0, %eax
534; X64-AVX-NEXT:    xchgl %eax, (%rdi)
535; X64-AVX-NEXT:    retq
536  store atomic float %v, float* %fptr seq_cst, align 4
537  ret void
538}
539
540define void @store_double_seq_cst(double* %fptr, double %v) {
541; X86-SSE1-LABEL: store_double_seq_cst:
542; X86-SSE1:       # %bb.0:
543; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
544; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
545; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
546; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
547; X86-SSE1-NEXT:    lock orl $0, (%esp)
548; X86-SSE1-NEXT:    retl
549;
550; X86-SSE2-LABEL: store_double_seq_cst:
551; X86-SSE2:       # %bb.0:
552; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
553; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
554; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
555; X86-SSE2-NEXT:    lock orl $0, (%esp)
556; X86-SSE2-NEXT:    retl
557;
558; X86-AVX-LABEL: store_double_seq_cst:
559; X86-AVX:       # %bb.0:
560; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
561; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
562; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
563; X86-AVX-NEXT:    lock orl $0, (%esp)
564; X86-AVX-NEXT:    retl
565;
566; X86-NOSSE-LABEL: store_double_seq_cst:
567; X86-NOSSE:       # %bb.0:
568; X86-NOSSE-NEXT:    subl $12, %esp
569; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
570; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
571; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
572; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
573; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
574; X86-NOSSE-NEXT:    movl %ecx, (%esp)
575; X86-NOSSE-NEXT:    fildll (%esp)
576; X86-NOSSE-NEXT:    fistpll (%eax)
577; X86-NOSSE-NEXT:    lock orl $0, (%esp)
578; X86-NOSSE-NEXT:    addl $12, %esp
579; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
580; X86-NOSSE-NEXT:    retl
581;
582; X64-SSE-LABEL: store_double_seq_cst:
583; X64-SSE:       # %bb.0:
584; X64-SSE-NEXT:    movq %xmm0, %rax
585; X64-SSE-NEXT:    xchgq %rax, (%rdi)
586; X64-SSE-NEXT:    retq
587;
588; X64-AVX-LABEL: store_double_seq_cst:
589; X64-AVX:       # %bb.0:
590; X64-AVX-NEXT:    vmovq %xmm0, %rax
591; X64-AVX-NEXT:    xchgq %rax, (%rdi)
592; X64-AVX-NEXT:    retq
593  store atomic double %v, double* %fptr seq_cst, align 8
594  ret void
595}
596
597define float @load_float_seq_cst(float* %fptr) {
598; X86-SSE1-LABEL: load_float_seq_cst:
599; X86-SSE1:       # %bb.0:
600; X86-SSE1-NEXT:    pushl %eax
601; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
602; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
603; X86-SSE1-NEXT:    movl (%eax), %eax
604; X86-SSE1-NEXT:    movl %eax, (%esp)
605; X86-SSE1-NEXT:    flds (%esp)
606; X86-SSE1-NEXT:    popl %eax
607; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
608; X86-SSE1-NEXT:    retl
609;
610; X86-SSE2-LABEL: load_float_seq_cst:
611; X86-SSE2:       # %bb.0:
612; X86-SSE2-NEXT:    pushl %eax
613; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
614; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
615; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
616; X86-SSE2-NEXT:    movss %xmm0, (%esp)
617; X86-SSE2-NEXT:    flds (%esp)
618; X86-SSE2-NEXT:    popl %eax
619; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
620; X86-SSE2-NEXT:    retl
621;
622; X86-AVX-LABEL: load_float_seq_cst:
623; X86-AVX:       # %bb.0:
624; X86-AVX-NEXT:    pushl %eax
625; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
626; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
627; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
628; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
629; X86-AVX-NEXT:    flds (%esp)
630; X86-AVX-NEXT:    popl %eax
631; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
632; X86-AVX-NEXT:    retl
633;
634; X86-NOSSE-LABEL: load_float_seq_cst:
635; X86-NOSSE:       # %bb.0:
636; X86-NOSSE-NEXT:    pushl %eax
637; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
638; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
639; X86-NOSSE-NEXT:    movl (%eax), %eax
640; X86-NOSSE-NEXT:    movl %eax, (%esp)
641; X86-NOSSE-NEXT:    flds (%esp)
642; X86-NOSSE-NEXT:    popl %eax
643; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
644; X86-NOSSE-NEXT:    retl
645;
646; X64-SSE-LABEL: load_float_seq_cst:
647; X64-SSE:       # %bb.0:
648; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
649; X64-SSE-NEXT:    retq
650;
651; X64-AVX-LABEL: load_float_seq_cst:
652; X64-AVX:       # %bb.0:
653; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
654; X64-AVX-NEXT:    retq
655  %v = load atomic float, float* %fptr seq_cst, align 4
656  ret float %v
657}
658
659define double @load_double_seq_cst(double* %fptr) {
660; X86-SSE1-LABEL: load_double_seq_cst:
661; X86-SSE1:       # %bb.0:
662; X86-SSE1-NEXT:    subl $12, %esp
663; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
664; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
665; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
666; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
667; X86-SSE1-NEXT:    movss %xmm0, (%esp)
668; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
669; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
670; X86-SSE1-NEXT:    fldl (%esp)
671; X86-SSE1-NEXT:    addl $12, %esp
672; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
673; X86-SSE1-NEXT:    retl
674;
675; X86-SSE2-LABEL: load_double_seq_cst:
676; X86-SSE2:       # %bb.0:
677; X86-SSE2-NEXT:    subl $12, %esp
678; X86-SSE2-NEXT:    .cfi_def_cfa_offset 16
679; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
680; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
681; X86-SSE2-NEXT:    movlps %xmm0, (%esp)
682; X86-SSE2-NEXT:    fldl (%esp)
683; X86-SSE2-NEXT:    addl $12, %esp
684; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
685; X86-SSE2-NEXT:    retl
686;
687; X86-AVX-LABEL: load_double_seq_cst:
688; X86-AVX:       # %bb.0:
689; X86-AVX-NEXT:    subl $12, %esp
690; X86-AVX-NEXT:    .cfi_def_cfa_offset 16
691; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
692; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
693; X86-AVX-NEXT:    vmovlps %xmm0, (%esp)
694; X86-AVX-NEXT:    fldl (%esp)
695; X86-AVX-NEXT:    addl $12, %esp
696; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
697; X86-AVX-NEXT:    retl
698;
699; X86-NOSSE-LABEL: load_double_seq_cst:
700; X86-NOSSE:       # %bb.0:
701; X86-NOSSE-NEXT:    subl $20, %esp
702; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
703; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
704; X86-NOSSE-NEXT:    fildll (%eax)
705; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
706; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
707; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
708; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
709; X86-NOSSE-NEXT:    movl %eax, (%esp)
710; X86-NOSSE-NEXT:    fldl (%esp)
711; X86-NOSSE-NEXT:    addl $20, %esp
712; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
713; X86-NOSSE-NEXT:    retl
714;
715; X64-SSE-LABEL: load_double_seq_cst:
716; X64-SSE:       # %bb.0:
717; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
718; X64-SSE-NEXT:    retq
719;
720; X64-AVX-LABEL: load_double_seq_cst:
721; X64-AVX:       # %bb.0:
722; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
723; X64-AVX-NEXT:    retq
724  %v = load atomic double, double* %fptr seq_cst, align 8
725  ret double %v
726}
727