1; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
2; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
3
4; Test integer arguments.
5
6define x86_vectorcallcc i32 @test_int_1() {
7  ret i32 0
8}
9; CHECK-LABEL: {{^}}test_int_1@@0:
10; CHECK: xorl %eax, %eax
11
12define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
13  ret i32 %a
14}
15; X86-LABEL: {{^}}test_int_2@@4:
16; X64-LABEL: {{^}}test_int_2@@8:
17; CHECK: movl %ecx, %eax
18
19define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
20  %at = trunc i64 %a to i32
21  ret i32 %at
22}
23; X86-LABEL: {{^}}test_int_3@@8:
24; X64-LABEL: {{^}}test_int_3@@8:
25; CHECK: movl %ecx, %eax
26
27define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
28  %s = add i32 %a, %b
29  ret i32 %s
30}
31; X86-LABEL: {{^}}test_int_4@@8:
32; X86: leal (%ecx,%edx), %eax
33; X64-LABEL: {{^}}test_int_4@@16:
34; X64: leal (%rcx,%rdx), %eax
35
36define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
37  ret i32 0
38}
39; CHECK-LABEL: {{^}}test_int_5:
40
41define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
42  ret double %b
43}
44; CHECK-LABEL: {{^}}test_fp_1@@16:
45; CHECK: movaps %xmm1, %xmm0
46
47define x86_vectorcallcc double @test_fp_2(
48    double, double, double, double, double, double, double %r) {
49  ret double %r
50}
51; CHECK-LABEL: {{^}}test_fp_2@@56:
52; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
53
54define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
55  ret {double, double, double, double}
56        { double 0.0, double 0.0, double 0.0, double 0.0 }
57}
58; CHECK-LABEL: {{^}}test_fp_3@@0:
59; CHECK: xorps %xmm0
60; CHECK: xorps %xmm1
61; CHECK: xorps %xmm2
62; CHECK: xorps %xmm3
63
64; FIXME: Returning via x87 isn't compatible, but its hard to structure the
65; tablegen any other way.
66define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
67  ret {double, double, double, double, double}
68        { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
69}
70; CHECK-LABEL: {{^}}test_fp_4@@0:
71; CHECK: fldz
72; CHECK: xorps %xmm0
73; CHECK: xorps %xmm1
74; CHECK: xorps %xmm2
75; CHECK: xorps %xmm3
76
77define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
78  ret <16 x i8> %b
79}
80; CHECK-LABEL: {{^}}test_vec_1@@32:
81; CHECK: movaps %xmm1, %xmm0
82
83define x86_vectorcallcc <16 x i8> @test_vec_2(
84    double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
85  ret <16 x i8> %r
86}
87; CHECK-LABEL: {{^}}test_vec_2@@104:
88; x64:           movq    {{[0-9]*}}(%rsp), %rax
89; CHECK:         movaps (%{{rax|ecx}}), %xmm0
90
91%struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> }
92%struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
93%struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> }
94%struct.HVA2 = type { <4 x float>, <4 x float> }
95
96define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) {
97entry:
98  %b = alloca %struct.HVA4, align 16
99  store %struct.HVA4 %bb, %struct.HVA4* %b, align 16
100  %w1 = getelementptr inbounds %struct.HVA4, %struct.HVA4* %b, i32 0, i32 1
101  %0 = load <4 x float>, <4 x float>* %w1, align 16
102  ret <4 x float> %0
103}
104; CHECK-LABEL: test_mixed_1
105; CHECK:       movaps	%xmm1, 16(%{{(e|r)}}sp)
106; CHECK:       movaps	16(%{{(e|r)}}sp), %xmm0
107; CHECK:       ret{{q|l}}
108
109define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, %struct.HVA4* %b, <4 x float> %c) {
110entry:
111  %c.addr = alloca <4 x float>, align 16
112  store <4 x float> %c, <4 x float>* %c.addr, align 16
113  %0 = load <4 x float>, <4 x float>* %c.addr, align 16
114  ret <4 x float> %0
115}
116; CHECK-LABEL: test_mixed_2
117; X86:         movaps  %xmm0, (%esp)
118; X64:         movaps  %xmm2, %xmm0
119; CHECK:       ret{{[ql]}}
120
121define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) {
122entry:
123  %x = getelementptr inbounds %struct.HVA2, %struct.HVA2* %f, i32 0, i32 0
124  %0 = load <4 x float>, <4 x float>* %x, align 16
125  ret <4 x float> %0
126}
127; CHECK-LABEL: test_mixed_3
128; CHECK:       movaps	(%{{[re][ac]}}x), %xmm0
129; CHECK:       ret{{[ql]}}
130
131define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, %struct.HVA2* %bb, <4 x float> %c) {
132entry:
133  %y4 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %bb, i32 0, i32 1
134  %0 = load <4 x float>, <4 x float>* %y4, align 16
135  ret <4 x float> %0
136}
137; CHECK-LABEL: test_mixed_4
138; X86:         movaps	16(%eax), %xmm0
139; X64:         movaps	16(%rdx), %xmm0
140; CHECK:       ret{{[ql]}}
141
142define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %dd) {
143entry:
144  %d = alloca %struct.HVA2, align 16
145  store %struct.HVA2 %dd, %struct.HVA2* %d, align 16
146  %y5 = getelementptr inbounds %struct.HVA2, %struct.HVA2* %d, i32 0, i32 1
147  %0 = load <4 x float>, <4 x float>* %y5, align 16
148  ret <4 x float> %0
149}
150; CHECK-LABEL: test_mixed_5
151; CHECK:       movaps	%xmm5, 16(%{{(e|r)}}sp)
152; CHECK:       movaps	16(%{{(e|r)}}sp), %xmm0
153; CHECK:       ret{{[ql]}}
154
155define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, %struct.HVA4* %b) {
156entry:
157  %retval = alloca %struct.HVA4, align 16
158  %0 = bitcast %struct.HVA4* %retval to i8*
159  %1 = bitcast %struct.HVA4* %b to i8*
160  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 64, i32 16, i1 false)
161  %2 = load %struct.HVA4, %struct.HVA4* %retval, align 16
162  ret %struct.HVA4 %2
163}
164; CHECK-LABEL: test_mixed_6
165; CHECK:       movaps	(%{{[re]}}sp), %xmm0
166; CHECK:       movaps	16(%{{[re]}}sp), %xmm1
167; CHECK:       movaps	32(%{{[re]}}sp), %xmm2
168; CHECK:       movaps	48(%{{[re]}}sp), %xmm3
169; CHECK:       ret{{[ql]}}
170
171declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
172declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
173declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1)
174
175define x86_vectorcallcc void @test_mixed_7(%struct.HVA5* noalias sret %agg.result) {
176entry:
177  %a = alloca %struct.HVA5, align 16
178  %0 = bitcast %struct.HVA5* %a to i8*
179  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 80, i32 16, i1 false)
180  %1 = bitcast %struct.HVA5* %agg.result to i8*
181  %2 = bitcast %struct.HVA5* %a to i8*
182  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 80, i32 16, i1 false)
183  ret void
184}
185; CHECK-LABEL: test_mixed_7
186; CHECK:       movaps	%xmm{{[0-9]}}, 64(%{{rcx|eax}})
187; CHECK:       movaps	%xmm{{[0-9]}}, 48(%{{rcx|eax}})
188; CHECK:       movaps	%xmm{{[0-9]}}, 32(%{{rcx|eax}})
189; CHECK:       movaps	%xmm{{[0-9]}}, 16(%{{rcx|eax}})
190; CHECK:       movaps	%xmm{{[0-9]}}, (%{{rcx|eax}})
191; X64:         mov{{[ql]}}	%rcx, %rax
192; CHECK:       ret{{[ql]}}
193
194define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) {
195entry:
196  %f.addr = alloca <4 x float>, align 16
197  store <4 x float> %f, <4 x float>* %f.addr, align 16
198  %0 = load <4 x float>, <4 x float>* %f.addr, align 16
199  ret <4 x float> %0
200}
201; CHECK-LABEL: test_mixed_8
202; X86:         movaps	%xmm4, %xmm0
203; X64:         movaps	%xmm5, %xmm0
204; CHECK:       ret{{[ql]}}
205
206%struct.HFA4 = type { double, double, double, double }
207declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y)
208
209define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) {
210entry:
211  %call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00)
212  %add = fadd double 1.000000e+00, %call
213  ret double %add
214}
215; CHECK-LABEL: test_mixed_9_caller
216; CHECK:       movaps  %xmm3, %xmm4
217; CHECK:       movaps  %xmm2, %xmm3
218; CHECK:       movaps  %xmm1, %xmm2
219; X32:         movasd  %xmm0, %xmm1
220; X64:         movap{{d|s}}  %xmm5, %xmm1
221; CHECK:       call{{l|q}}   test_mixed_9_callee@@40
222; CHECK:       addsd   {{.*}}, %xmm0
223; CHECK:       ret{{l|q}}
224