1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE1
3; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2
4; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX
5; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX
6; RUN: llc < %s -mtriple=i386-linux-generic -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOSSE
7; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s --check-prefixes=X64-SSE
8; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx | FileCheck %s --check-prefixes=X64-AVX
9; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX
10
11; Note: This test is testing that the lowering for atomics matches what we
12; currently emit for non-atomics + the atomic restriction.  The presence of
13; particular lowering detail in these tests should not be read as requiring
14; that detail for correctness unless it's related to the atomicity itself.
15; (Specifically, there were reviewer questions about the lowering for halfs
16;  and their calling convention which remain unresolved.)
17
18define void @store_half(ptr %fptr, half %v) {
19; X86-SSE1-LABEL: store_half:
20; X86-SSE1:       # %bb.0:
21; X86-SSE1-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
22; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
23; X86-SSE1-NEXT:    movw %ax, (%ecx)
24; X86-SSE1-NEXT:    retl
25;
26; X86-SSE2-LABEL: store_half:
27; X86-SSE2:       # %bb.0:
28; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
29; X86-SSE2-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
30; X86-SSE2-NEXT:    movw %cx, (%eax)
31; X86-SSE2-NEXT:    retl
32;
33; X86-AVX-LABEL: store_half:
34; X86-AVX:       # %bb.0:
35; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
36; X86-AVX-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
37; X86-AVX-NEXT:    movw %cx, (%eax)
38; X86-AVX-NEXT:    retl
39;
40; X86-NOSSE-LABEL: store_half:
41; X86-NOSSE:       # %bb.0:
42; X86-NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
43; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
44; X86-NOSSE-NEXT:    movw %ax, (%ecx)
45; X86-NOSSE-NEXT:    retl
46;
47; X64-SSE-LABEL: store_half:
48; X64-SSE:       # %bb.0:
49; X64-SSE-NEXT:    pextrw $0, %xmm0, %eax
50; X64-SSE-NEXT:    movw %ax, (%rdi)
51; X64-SSE-NEXT:    retq
52;
53; X64-AVX-LABEL: store_half:
54; X64-AVX:       # %bb.0:
55; X64-AVX-NEXT:    vpextrw $0, %xmm0, %eax
56; X64-AVX-NEXT:    movw %ax, (%rdi)
57; X64-AVX-NEXT:    retq
58  store atomic half %v, ptr %fptr unordered, align 2
59  ret void
60}
61
62define void @store_float(ptr %fptr, float %v) {
63; X86-LABEL: store_float:
64; X86:       # %bb.0:
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
67; X86-NEXT:    movl %ecx, (%eax)
68; X86-NEXT:    retl
69;
70; X64-SSE-LABEL: store_float:
71; X64-SSE:       # %bb.0:
72; X64-SSE-NEXT:    movss %xmm0, (%rdi)
73; X64-SSE-NEXT:    retq
74;
75; X64-AVX-LABEL: store_float:
76; X64-AVX:       # %bb.0:
77; X64-AVX-NEXT:    vmovss %xmm0, (%rdi)
78; X64-AVX-NEXT:    retq
79  store atomic float %v, ptr %fptr unordered, align 4
80  ret void
81}
82
83define void @store_double(ptr %fptr, double %v) {
84; X86-SSE1-LABEL: store_double:
85; X86-SSE1:       # %bb.0:
86; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
87; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
88; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
89; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
90; X86-SSE1-NEXT:    retl
91;
92; X86-SSE2-LABEL: store_double:
93; X86-SSE2:       # %bb.0:
94; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
95; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
96; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
97; X86-SSE2-NEXT:    retl
98;
99; X86-AVX-LABEL: store_double:
100; X86-AVX:       # %bb.0:
101; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
103; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
104; X86-AVX-NEXT:    retl
105;
106; X86-NOSSE-LABEL: store_double:
107; X86-NOSSE:       # %bb.0:
108; X86-NOSSE-NEXT:    subl $12, %esp
109; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
110; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
111; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
112; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
113; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
114; X86-NOSSE-NEXT:    movl %ecx, (%esp)
115; X86-NOSSE-NEXT:    fildll (%esp)
116; X86-NOSSE-NEXT:    fistpll (%eax)
117; X86-NOSSE-NEXT:    addl $12, %esp
118; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
119; X86-NOSSE-NEXT:    retl
120;
121; X64-SSE-LABEL: store_double:
122; X64-SSE:       # %bb.0:
123; X64-SSE-NEXT:    movsd %xmm0, (%rdi)
124; X64-SSE-NEXT:    retq
125;
126; X64-AVX-LABEL: store_double:
127; X64-AVX:       # %bb.0:
128; X64-AVX-NEXT:    vmovsd %xmm0, (%rdi)
129; X64-AVX-NEXT:    retq
130  store atomic double %v, ptr %fptr unordered, align 8
131  ret void
132}
133
134define void @store_fp128(ptr %fptr, fp128 %v) {
135; X86-SSE-LABEL: store_fp128:
136; X86-SSE:       # %bb.0:
137; X86-SSE-NEXT:    subl $36, %esp
138; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 36
139; X86-SSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
140; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
141; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
142; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
143; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
144; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
145; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
146; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
147; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
148; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
149; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
150; X86-SSE-NEXT:    pushl %eax
151; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
152; X86-SSE-NEXT:    calll __sync_lock_test_and_set_16
153; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -4
154; X86-SSE-NEXT:    addl $56, %esp
155; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -56
156; X86-SSE-NEXT:    retl
157;
158; X86-AVX-LABEL: store_fp128:
159; X86-AVX:       # %bb.0:
160; X86-AVX-NEXT:    subl $44, %esp
161; X86-AVX-NEXT:    .cfi_def_cfa_offset 48
162; X86-AVX-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
163; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
164; X86-AVX-NEXT:    movl %eax, {{[0-9]+}}(%esp)
165; X86-AVX-NEXT:    vmovups %xmm0, {{[0-9]+}}(%esp)
166; X86-AVX-NEXT:    leal {{[0-9]+}}(%esp), %eax
167; X86-AVX-NEXT:    movl %eax, (%esp)
168; X86-AVX-NEXT:    calll __sync_lock_test_and_set_16
169; X86-AVX-NEXT:    addl $40, %esp
170; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
171; X86-AVX-NEXT:    retl
172;
173; X86-NOSSE-LABEL: store_fp128:
174; X86-NOSSE:       # %bb.0:
175; X86-NOSSE-NEXT:    subl $36, %esp
176; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 36
177; X86-NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
178; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
179; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
180; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
181; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
182; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
183; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
184; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
185; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
186; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
187; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
188; X86-NOSSE-NEXT:    pushl %eax
189; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
190; X86-NOSSE-NEXT:    calll __sync_lock_test_and_set_16
191; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -4
192; X86-NOSSE-NEXT:    addl $56, %esp
193; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -56
194; X86-NOSSE-NEXT:    retl
195;
196; X64-SSE-LABEL: store_fp128:
197; X64-SSE:       # %bb.0:
198; X64-SSE-NEXT:    subq $24, %rsp
199; X64-SSE-NEXT:    .cfi_def_cfa_offset 32
200; X64-SSE-NEXT:    movaps %xmm0, (%rsp)
201; X64-SSE-NEXT:    movq (%rsp), %rsi
202; X64-SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
203; X64-SSE-NEXT:    callq __sync_lock_test_and_set_16@PLT
204; X64-SSE-NEXT:    addq $24, %rsp
205; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
206; X64-SSE-NEXT:    retq
207;
208; X64-AVX-LABEL: store_fp128:
209; X64-AVX:       # %bb.0:
210; X64-AVX-NEXT:    subq $24, %rsp
211; X64-AVX-NEXT:    .cfi_def_cfa_offset 32
212; X64-AVX-NEXT:    vmovaps %xmm0, (%rsp)
213; X64-AVX-NEXT:    movq (%rsp), %rsi
214; X64-AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
215; X64-AVX-NEXT:    callq __sync_lock_test_and_set_16@PLT
216; X64-AVX-NEXT:    addq $24, %rsp
217; X64-AVX-NEXT:    .cfi_def_cfa_offset 8
218; X64-AVX-NEXT:    retq
219  store atomic fp128 %v, ptr %fptr unordered, align 16
220  ret void
221}
222
223define half @load_half(ptr %fptr) {
224; X86-SSE1-LABEL: load_half:
225; X86-SSE1:       # %bb.0:
226; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
227; X86-SSE1-NEXT:    movzwl (%eax), %eax
228; X86-SSE1-NEXT:    retl
229;
230; X86-SSE2-LABEL: load_half:
231; X86-SSE2:       # %bb.0:
232; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
233; X86-SSE2-NEXT:    movzwl (%eax), %eax
234; X86-SSE2-NEXT:    pinsrw $0, %eax, %xmm0
235; X86-SSE2-NEXT:    retl
236;
237; X86-AVX-LABEL: load_half:
238; X86-AVX:       # %bb.0:
239; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
240; X86-AVX-NEXT:    movzwl (%eax), %eax
241; X86-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
242; X86-AVX-NEXT:    retl
243;
244; X86-NOSSE-LABEL: load_half:
245; X86-NOSSE:       # %bb.0:
246; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
247; X86-NOSSE-NEXT:    movzwl (%eax), %eax
248; X86-NOSSE-NEXT:    retl
249;
250; X64-SSE-LABEL: load_half:
251; X64-SSE:       # %bb.0:
252; X64-SSE-NEXT:    movzwl (%rdi), %eax
253; X64-SSE-NEXT:    pinsrw $0, %eax, %xmm0
254; X64-SSE-NEXT:    retq
255;
256; X64-AVX-LABEL: load_half:
257; X64-AVX:       # %bb.0:
258; X64-AVX-NEXT:    movzwl (%rdi), %eax
259; X64-AVX-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
260; X64-AVX-NEXT:    retq
261  %v = load atomic half, ptr %fptr unordered, align 2
262  ret half %v
263}
264
265define float @load_float(ptr %fptr) {
266; X86-SSE1-LABEL: load_float:
267; X86-SSE1:       # %bb.0:
268; X86-SSE1-NEXT:    pushl %eax
269; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
270; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
271; X86-SSE1-NEXT:    movl (%eax), %eax
272; X86-SSE1-NEXT:    movl %eax, (%esp)
273; X86-SSE1-NEXT:    flds (%esp)
274; X86-SSE1-NEXT:    popl %eax
275; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
276; X86-SSE1-NEXT:    retl
277;
278; X86-SSE2-LABEL: load_float:
279; X86-SSE2:       # %bb.0:
280; X86-SSE2-NEXT:    pushl %eax
281; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
282; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
283; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
284; X86-SSE2-NEXT:    movss %xmm0, (%esp)
285; X86-SSE2-NEXT:    flds (%esp)
286; X86-SSE2-NEXT:    popl %eax
287; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
288; X86-SSE2-NEXT:    retl
289;
290; X86-AVX-LABEL: load_float:
291; X86-AVX:       # %bb.0:
292; X86-AVX-NEXT:    pushl %eax
293; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
294; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
295; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
296; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
297; X86-AVX-NEXT:    flds (%esp)
298; X86-AVX-NEXT:    popl %eax
299; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
300; X86-AVX-NEXT:    retl
301;
302; X86-NOSSE-LABEL: load_float:
303; X86-NOSSE:       # %bb.0:
304; X86-NOSSE-NEXT:    pushl %eax
305; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
306; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
307; X86-NOSSE-NEXT:    movl (%eax), %eax
308; X86-NOSSE-NEXT:    movl %eax, (%esp)
309; X86-NOSSE-NEXT:    flds (%esp)
310; X86-NOSSE-NEXT:    popl %eax
311; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
312; X86-NOSSE-NEXT:    retl
313;
314; X64-SSE-LABEL: load_float:
315; X64-SSE:       # %bb.0:
316; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
317; X64-SSE-NEXT:    retq
318;
319; X64-AVX-LABEL: load_float:
320; X64-AVX:       # %bb.0:
321; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
322; X64-AVX-NEXT:    retq
323  %v = load atomic float, ptr %fptr unordered, align 4
324  ret float %v
325}
326
327define double @load_double(ptr %fptr) {
328; X86-SSE1-LABEL: load_double:
329; X86-SSE1:       # %bb.0:
330; X86-SSE1-NEXT:    subl $12, %esp
331; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
332; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
333; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
334; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
335; X86-SSE1-NEXT:    movss %xmm0, (%esp)
336; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
337; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
338; X86-SSE1-NEXT:    fldl (%esp)
339; X86-SSE1-NEXT:    addl $12, %esp
340; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
341; X86-SSE1-NEXT:    retl
342;
343; X86-SSE2-LABEL: load_double:
344; X86-SSE2:       # %bb.0:
345; X86-SSE2-NEXT:    subl $12, %esp
346; X86-SSE2-NEXT:    .cfi_def_cfa_offset 16
347; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
348; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
349; X86-SSE2-NEXT:    movlps %xmm0, (%esp)
350; X86-SSE2-NEXT:    fldl (%esp)
351; X86-SSE2-NEXT:    addl $12, %esp
352; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
353; X86-SSE2-NEXT:    retl
354;
355; X86-AVX-LABEL: load_double:
356; X86-AVX:       # %bb.0:
357; X86-AVX-NEXT:    subl $12, %esp
358; X86-AVX-NEXT:    .cfi_def_cfa_offset 16
359; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
360; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
361; X86-AVX-NEXT:    vmovlps %xmm0, (%esp)
362; X86-AVX-NEXT:    fldl (%esp)
363; X86-AVX-NEXT:    addl $12, %esp
364; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
365; X86-AVX-NEXT:    retl
366;
367; X86-NOSSE-LABEL: load_double:
368; X86-NOSSE:       # %bb.0:
369; X86-NOSSE-NEXT:    subl $20, %esp
370; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
371; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
372; X86-NOSSE-NEXT:    fildll (%eax)
373; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
374; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
375; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
376; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
377; X86-NOSSE-NEXT:    movl %eax, (%esp)
378; X86-NOSSE-NEXT:    fldl (%esp)
379; X86-NOSSE-NEXT:    addl $20, %esp
380; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
381; X86-NOSSE-NEXT:    retl
382;
383; X64-SSE-LABEL: load_double:
384; X64-SSE:       # %bb.0:
385; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
386; X64-SSE-NEXT:    retq
387;
388; X64-AVX-LABEL: load_double:
389; X64-AVX:       # %bb.0:
390; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
391; X64-AVX-NEXT:    retq
392  %v = load atomic double, ptr %fptr unordered, align 8
393  ret double %v
394}
395
396define fp128 @load_fp128(ptr %fptr) {
397; X86-SSE-LABEL: load_fp128:
398; X86-SSE:       # %bb.0:
399; X86-SSE-NEXT:    pushl %edi
400; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
401; X86-SSE-NEXT:    pushl %esi
402; X86-SSE-NEXT:    .cfi_def_cfa_offset 12
403; X86-SSE-NEXT:    subl $20, %esp
404; X86-SSE-NEXT:    .cfi_def_cfa_offset 32
405; X86-SSE-NEXT:    .cfi_offset %esi, -12
406; X86-SSE-NEXT:    .cfi_offset %edi, -8
407; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
408; X86-SSE-NEXT:    subl $8, %esp
409; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 8
410; X86-SSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
411; X86-SSE-NEXT:    pushl $0
412; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
413; X86-SSE-NEXT:    pushl $0
414; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
415; X86-SSE-NEXT:    pushl $0
416; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
417; X86-SSE-NEXT:    pushl $0
418; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
419; X86-SSE-NEXT:    pushl $0
420; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
421; X86-SSE-NEXT:    pushl $0
422; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
423; X86-SSE-NEXT:    pushl $0
424; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
425; X86-SSE-NEXT:    pushl $0
426; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
427; X86-SSE-NEXT:    pushl {{[0-9]+}}(%esp)
428; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
429; X86-SSE-NEXT:    pushl %eax
430; X86-SSE-NEXT:    .cfi_adjust_cfa_offset 4
431; X86-SSE-NEXT:    calll __sync_val_compare_and_swap_16
432; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -4
433; X86-SSE-NEXT:    addl $44, %esp
434; X86-SSE-NEXT:    .cfi_adjust_cfa_offset -44
435; X86-SSE-NEXT:    movl (%esp), %eax
436; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
437; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
438; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
439; X86-SSE-NEXT:    movl %edi, 8(%esi)
440; X86-SSE-NEXT:    movl %edx, 12(%esi)
441; X86-SSE-NEXT:    movl %eax, (%esi)
442; X86-SSE-NEXT:    movl %ecx, 4(%esi)
443; X86-SSE-NEXT:    movl %esi, %eax
444; X86-SSE-NEXT:    addl $20, %esp
445; X86-SSE-NEXT:    .cfi_def_cfa_offset 12
446; X86-SSE-NEXT:    popl %esi
447; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
448; X86-SSE-NEXT:    popl %edi
449; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
450; X86-SSE-NEXT:    retl $4
451;
452; X86-AVX-LABEL: load_fp128:
453; X86-AVX:       # %bb.0:
454; X86-AVX-NEXT:    pushl %esi
455; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
456; X86-AVX-NEXT:    subl $56, %esp
457; X86-AVX-NEXT:    .cfi_def_cfa_offset 64
458; X86-AVX-NEXT:    .cfi_offset %esi, -8
459; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %esi
460; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
461; X86-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
462; X86-AVX-NEXT:    vmovups %ymm0, {{[0-9]+}}(%esp)
463; X86-AVX-NEXT:    movl %eax, {{[0-9]+}}(%esp)
464; X86-AVX-NEXT:    leal {{[0-9]+}}(%esp), %eax
465; X86-AVX-NEXT:    movl %eax, (%esp)
466; X86-AVX-NEXT:    vzeroupper
467; X86-AVX-NEXT:    calll __sync_val_compare_and_swap_16
468; X86-AVX-NEXT:    subl $4, %esp
469; X86-AVX-NEXT:    vmovups {{[0-9]+}}(%esp), %xmm0
470; X86-AVX-NEXT:    vmovaps %xmm0, (%esi)
471; X86-AVX-NEXT:    movl %esi, %eax
472; X86-AVX-NEXT:    addl $56, %esp
473; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
474; X86-AVX-NEXT:    popl %esi
475; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
476; X86-AVX-NEXT:    retl $4
477;
478; X86-NOSSE-LABEL: load_fp128:
479; X86-NOSSE:       # %bb.0:
480; X86-NOSSE-NEXT:    pushl %edi
481; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
482; X86-NOSSE-NEXT:    pushl %esi
483; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
484; X86-NOSSE-NEXT:    subl $20, %esp
485; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 32
486; X86-NOSSE-NEXT:    .cfi_offset %esi, -12
487; X86-NOSSE-NEXT:    .cfi_offset %edi, -8
488; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
489; X86-NOSSE-NEXT:    subl $8, %esp
490; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 8
491; X86-NOSSE-NEXT:    leal {{[0-9]+}}(%esp), %eax
492; X86-NOSSE-NEXT:    pushl $0
493; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
494; X86-NOSSE-NEXT:    pushl $0
495; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
496; X86-NOSSE-NEXT:    pushl $0
497; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
498; X86-NOSSE-NEXT:    pushl $0
499; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
500; X86-NOSSE-NEXT:    pushl $0
501; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
502; X86-NOSSE-NEXT:    pushl $0
503; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
504; X86-NOSSE-NEXT:    pushl $0
505; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
506; X86-NOSSE-NEXT:    pushl $0
507; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
508; X86-NOSSE-NEXT:    pushl {{[0-9]+}}(%esp)
509; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
510; X86-NOSSE-NEXT:    pushl %eax
511; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset 4
512; X86-NOSSE-NEXT:    calll __sync_val_compare_and_swap_16
513; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -4
514; X86-NOSSE-NEXT:    addl $44, %esp
515; X86-NOSSE-NEXT:    .cfi_adjust_cfa_offset -44
516; X86-NOSSE-NEXT:    movl (%esp), %eax
517; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
518; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
519; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edi
520; X86-NOSSE-NEXT:    movl %edi, 8(%esi)
521; X86-NOSSE-NEXT:    movl %edx, 12(%esi)
522; X86-NOSSE-NEXT:    movl %eax, (%esi)
523; X86-NOSSE-NEXT:    movl %ecx, 4(%esi)
524; X86-NOSSE-NEXT:    movl %esi, %eax
525; X86-NOSSE-NEXT:    addl $20, %esp
526; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 12
527; X86-NOSSE-NEXT:    popl %esi
528; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
529; X86-NOSSE-NEXT:    popl %edi
530; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
531; X86-NOSSE-NEXT:    retl $4
532;
533; X64-SSE-LABEL: load_fp128:
534; X64-SSE:       # %bb.0:
535; X64-SSE-NEXT:    subq $24, %rsp
536; X64-SSE-NEXT:    .cfi_def_cfa_offset 32
537; X64-SSE-NEXT:    xorl %esi, %esi
538; X64-SSE-NEXT:    xorl %edx, %edx
539; X64-SSE-NEXT:    xorl %ecx, %ecx
540; X64-SSE-NEXT:    xorl %r8d, %r8d
541; X64-SSE-NEXT:    callq __sync_val_compare_and_swap_16@PLT
542; X64-SSE-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
543; X64-SSE-NEXT:    movq %rax, (%rsp)
544; X64-SSE-NEXT:    movaps (%rsp), %xmm0
545; X64-SSE-NEXT:    addq $24, %rsp
546; X64-SSE-NEXT:    .cfi_def_cfa_offset 8
547; X64-SSE-NEXT:    retq
548;
549; X64-AVX-LABEL: load_fp128:
550; X64-AVX:       # %bb.0:
551; X64-AVX-NEXT:    subq $24, %rsp
552; X64-AVX-NEXT:    .cfi_def_cfa_offset 32
553; X64-AVX-NEXT:    xorl %esi, %esi
554; X64-AVX-NEXT:    xorl %edx, %edx
555; X64-AVX-NEXT:    xorl %ecx, %ecx
556; X64-AVX-NEXT:    xorl %r8d, %r8d
557; X64-AVX-NEXT:    callq __sync_val_compare_and_swap_16@PLT
558; X64-AVX-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
559; X64-AVX-NEXT:    movq %rax, (%rsp)
560; X64-AVX-NEXT:    vmovaps (%rsp), %xmm0
561; X64-AVX-NEXT:    addq $24, %rsp
562; X64-AVX-NEXT:    .cfi_def_cfa_offset 8
563; X64-AVX-NEXT:    retq
564  %v = load atomic fp128, ptr %fptr unordered, align 16
565  ret fp128 %v
566}
567
568
569; Check the seq_cst lowering since that's the
570; interesting one from an ordering perspective on x86.
571
572define void @store_float_seq_cst(ptr %fptr, float %v) {
573; X86-LABEL: store_float_seq_cst:
574; X86:       # %bb.0:
575; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
576; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
577; X86-NEXT:    xchgl %ecx, (%eax)
578; X86-NEXT:    retl
579;
580; X64-SSE-LABEL: store_float_seq_cst:
581; X64-SSE:       # %bb.0:
582; X64-SSE-NEXT:    movd %xmm0, %eax
583; X64-SSE-NEXT:    xchgl %eax, (%rdi)
584; X64-SSE-NEXT:    retq
585;
586; X64-AVX-LABEL: store_float_seq_cst:
587; X64-AVX:       # %bb.0:
588; X64-AVX-NEXT:    vmovd %xmm0, %eax
589; X64-AVX-NEXT:    xchgl %eax, (%rdi)
590; X64-AVX-NEXT:    retq
591  store atomic float %v, ptr %fptr seq_cst, align 4
592  ret void
593}
594
595define void @store_double_seq_cst(ptr %fptr, double %v) {
596; X86-SSE1-LABEL: store_double_seq_cst:
597; X86-SSE1:       # %bb.0:
598; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
599; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
600; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
601; X86-SSE1-NEXT:    movlps %xmm0, (%eax)
602; X86-SSE1-NEXT:    lock orl $0, (%esp)
603; X86-SSE1-NEXT:    retl
604;
605; X86-SSE2-LABEL: store_double_seq_cst:
606; X86-SSE2:       # %bb.0:
607; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
608; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
609; X86-SSE2-NEXT:    movlps %xmm0, (%eax)
610; X86-SSE2-NEXT:    lock orl $0, (%esp)
611; X86-SSE2-NEXT:    retl
612;
613; X86-AVX-LABEL: store_double_seq_cst:
614; X86-AVX:       # %bb.0:
615; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
616; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
617; X86-AVX-NEXT:    vmovlps %xmm0, (%eax)
618; X86-AVX-NEXT:    lock orl $0, (%esp)
619; X86-AVX-NEXT:    retl
620;
621; X86-NOSSE-LABEL: store_double_seq_cst:
622; X86-NOSSE:       # %bb.0:
623; X86-NOSSE-NEXT:    subl $12, %esp
624; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 16
625; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
626; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
627; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
628; X86-NOSSE-NEXT:    movl %edx, {{[0-9]+}}(%esp)
629; X86-NOSSE-NEXT:    movl %ecx, (%esp)
630; X86-NOSSE-NEXT:    fildll (%esp)
631; X86-NOSSE-NEXT:    fistpll (%eax)
632; X86-NOSSE-NEXT:    lock orl $0, (%esp)
633; X86-NOSSE-NEXT:    addl $12, %esp
634; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
635; X86-NOSSE-NEXT:    retl
636;
637; X64-SSE-LABEL: store_double_seq_cst:
638; X64-SSE:       # %bb.0:
639; X64-SSE-NEXT:    movq %xmm0, %rax
640; X64-SSE-NEXT:    xchgq %rax, (%rdi)
641; X64-SSE-NEXT:    retq
642;
643; X64-AVX-LABEL: store_double_seq_cst:
644; X64-AVX:       # %bb.0:
645; X64-AVX-NEXT:    vmovq %xmm0, %rax
646; X64-AVX-NEXT:    xchgq %rax, (%rdi)
647; X64-AVX-NEXT:    retq
648  store atomic double %v, ptr %fptr seq_cst, align 8
649  ret void
650}
651
652define float @load_float_seq_cst(ptr %fptr) {
653; X86-SSE1-LABEL: load_float_seq_cst:
654; X86-SSE1:       # %bb.0:
655; X86-SSE1-NEXT:    pushl %eax
656; X86-SSE1-NEXT:    .cfi_def_cfa_offset 8
657; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
658; X86-SSE1-NEXT:    movl (%eax), %eax
659; X86-SSE1-NEXT:    movl %eax, (%esp)
660; X86-SSE1-NEXT:    flds (%esp)
661; X86-SSE1-NEXT:    popl %eax
662; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
663; X86-SSE1-NEXT:    retl
664;
665; X86-SSE2-LABEL: load_float_seq_cst:
666; X86-SSE2:       # %bb.0:
667; X86-SSE2-NEXT:    pushl %eax
668; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
669; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
670; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
671; X86-SSE2-NEXT:    movss %xmm0, (%esp)
672; X86-SSE2-NEXT:    flds (%esp)
673; X86-SSE2-NEXT:    popl %eax
674; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
675; X86-SSE2-NEXT:    retl
676;
677; X86-AVX-LABEL: load_float_seq_cst:
678; X86-AVX:       # %bb.0:
679; X86-AVX-NEXT:    pushl %eax
680; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
681; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
682; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
683; X86-AVX-NEXT:    vmovss %xmm0, (%esp)
684; X86-AVX-NEXT:    flds (%esp)
685; X86-AVX-NEXT:    popl %eax
686; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
687; X86-AVX-NEXT:    retl
688;
689; X86-NOSSE-LABEL: load_float_seq_cst:
690; X86-NOSSE:       # %bb.0:
691; X86-NOSSE-NEXT:    pushl %eax
692; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
693; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
694; X86-NOSSE-NEXT:    movl (%eax), %eax
695; X86-NOSSE-NEXT:    movl %eax, (%esp)
696; X86-NOSSE-NEXT:    flds (%esp)
697; X86-NOSSE-NEXT:    popl %eax
698; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
699; X86-NOSSE-NEXT:    retl
700;
701; X64-SSE-LABEL: load_float_seq_cst:
702; X64-SSE:       # %bb.0:
703; X64-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
704; X64-SSE-NEXT:    retq
705;
706; X64-AVX-LABEL: load_float_seq_cst:
707; X64-AVX:       # %bb.0:
708; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
709; X64-AVX-NEXT:    retq
710  %v = load atomic float, ptr %fptr seq_cst, align 4
711  ret float %v
712}
713
714define double @load_double_seq_cst(ptr %fptr) {
715; X86-SSE1-LABEL: load_double_seq_cst:
716; X86-SSE1:       # %bb.0:
717; X86-SSE1-NEXT:    subl $12, %esp
718; X86-SSE1-NEXT:    .cfi_def_cfa_offset 16
719; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
720; X86-SSE1-NEXT:    xorps %xmm0, %xmm0
721; X86-SSE1-NEXT:    movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
722; X86-SSE1-NEXT:    movss %xmm0, (%esp)
723; X86-SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
724; X86-SSE1-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
725; X86-SSE1-NEXT:    fldl (%esp)
726; X86-SSE1-NEXT:    addl $12, %esp
727; X86-SSE1-NEXT:    .cfi_def_cfa_offset 4
728; X86-SSE1-NEXT:    retl
729;
730; X86-SSE2-LABEL: load_double_seq_cst:
731; X86-SSE2:       # %bb.0:
732; X86-SSE2-NEXT:    subl $12, %esp
733; X86-SSE2-NEXT:    .cfi_def_cfa_offset 16
734; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
735; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
736; X86-SSE2-NEXT:    movlps %xmm0, (%esp)
737; X86-SSE2-NEXT:    fldl (%esp)
738; X86-SSE2-NEXT:    addl $12, %esp
739; X86-SSE2-NEXT:    .cfi_def_cfa_offset 4
740; X86-SSE2-NEXT:    retl
741;
742; X86-AVX-LABEL: load_double_seq_cst:
743; X86-AVX:       # %bb.0:
744; X86-AVX-NEXT:    subl $12, %esp
745; X86-AVX-NEXT:    .cfi_def_cfa_offset 16
746; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
747; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
748; X86-AVX-NEXT:    vmovlps %xmm0, (%esp)
749; X86-AVX-NEXT:    fldl (%esp)
750; X86-AVX-NEXT:    addl $12, %esp
751; X86-AVX-NEXT:    .cfi_def_cfa_offset 4
752; X86-AVX-NEXT:    retl
753;
754; X86-NOSSE-LABEL: load_double_seq_cst:
755; X86-NOSSE:       # %bb.0:
756; X86-NOSSE-NEXT:    subl $20, %esp
757; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 24
758; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
759; X86-NOSSE-NEXT:    fildll (%eax)
760; X86-NOSSE-NEXT:    fistpll {{[0-9]+}}(%esp)
761; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
762; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
763; X86-NOSSE-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
764; X86-NOSSE-NEXT:    movl %eax, (%esp)
765; X86-NOSSE-NEXT:    fldl (%esp)
766; X86-NOSSE-NEXT:    addl $20, %esp
767; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
768; X86-NOSSE-NEXT:    retl
769;
770; X64-SSE-LABEL: load_double_seq_cst:
771; X64-SSE:       # %bb.0:
772; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
773; X64-SSE-NEXT:    retq
774;
775; X64-AVX-LABEL: load_double_seq_cst:
776; X64-AVX:       # %bb.0:
777; X64-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
778; X64-AVX-NEXT:    retq
779  %v = load atomic double, ptr %fptr seq_cst, align 8
780  ret double %v
781}
782