1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-linux-gnu < %s  | FileCheck %s
3
4; Basic test coverage for FREM
5
6define void @frem_f16(half %a0, half %a1, ptr%p3) nounwind {
7; CHECK-LABEL: frem_f16:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    pushq %rbx
10; CHECK-NEXT:    subq $16, %rsp
11; CHECK-NEXT:    movq %rdi, %rbx
12; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
13; CHECK-NEXT:    movaps %xmm1, %xmm0
14; CHECK-NEXT:    callq __extendhfsf2@PLT
15; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
16; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
17; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
18; CHECK-NEXT:    callq __extendhfsf2@PLT
19; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
20; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
21; CHECK-NEXT:    callq fmodf@PLT
22; CHECK-NEXT:    callq __truncsfhf2@PLT
23; CHECK-NEXT:    pextrw $0, %xmm0, %eax
24; CHECK-NEXT:    movw %ax, (%rbx)
25; CHECK-NEXT:    addq $16, %rsp
26; CHECK-NEXT:    popq %rbx
27; CHECK-NEXT:    retq
28  %frem = frem half %a0, %a1
29  store half %frem, ptr%p3
30  ret void
31}
32
33define void @frem_f32(float %a0, float %a1, ptr%p3) nounwind {
34; CHECK-LABEL: frem_f32:
35; CHECK:       # %bb.0:
36; CHECK-NEXT:    pushq %rbx
37; CHECK-NEXT:    movq %rdi, %rbx
38; CHECK-NEXT:    callq fmodf@PLT
39; CHECK-NEXT:    movss %xmm0, (%rbx)
40; CHECK-NEXT:    popq %rbx
41; CHECK-NEXT:    retq
42  %frem = frem float %a0, %a1
43  store float %frem, ptr%p3
44  ret void
45}
46
47define void @frem_f64(double %a0, double %a1, ptr%p3) nounwind {
48; CHECK-LABEL: frem_f64:
49; CHECK:       # %bb.0:
50; CHECK-NEXT:    pushq %rbx
51; CHECK-NEXT:    movq %rdi, %rbx
52; CHECK-NEXT:    callq fmod@PLT
53; CHECK-NEXT:    movsd %xmm0, (%rbx)
54; CHECK-NEXT:    popq %rbx
55; CHECK-NEXT:    retq
56  %frem = frem double %a0, %a1
57  store double %frem, ptr%p3
58  ret void
59}
60
61define void @frem_f80(x86_fp80 %a0, x86_fp80 %a1, ptr%p3) nounwind {
62; CHECK-LABEL: frem_f80:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    pushq %rbx
65; CHECK-NEXT:    subq $32, %rsp
66; CHECK-NEXT:    movq %rdi, %rbx
67; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
68; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
69; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
70; CHECK-NEXT:    fstpt (%rsp)
71; CHECK-NEXT:    callq fmodl@PLT
72; CHECK-NEXT:    fstpt (%rbx)
73; CHECK-NEXT:    addq $32, %rsp
74; CHECK-NEXT:    popq %rbx
75; CHECK-NEXT:    retq
76  %frem = frem x86_fp80 %a0, %a1
77  store x86_fp80 %frem, ptr%p3
78  ret void
79}
80
81define void @frem_f128(fp128 %a0, fp128 %a1, ptr%p3) nounwind {
82; CHECK-LABEL: frem_f128:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    pushq %rbx
85; CHECK-NEXT:    movq %rdi, %rbx
86; CHECK-NEXT:    callq fmodl@PLT
87; CHECK-NEXT:    movaps %xmm0, (%rbx)
88; CHECK-NEXT:    popq %rbx
89; CHECK-NEXT:    retq
90  %frem = frem fp128 %a0, %a1
91  store fp128 %frem, ptr%p3
92  ret void
93}
94
95define void @frem_v16f32(<16 x float> %a0, <16 x float> %a1, ptr%p3) nounwind {
96; CHECK-LABEL: frem_v16f32:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    pushq %rbx
99; CHECK-NEXT:    subq $160, %rsp
100; CHECK-NEXT:    movq %rdi, %rbx
101; CHECK-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
102; CHECK-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
103; CHECK-NEXT:    movaps %xmm5, (%rsp) # 16-byte Spill
104; CHECK-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
105; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
106; CHECK-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
107; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
108; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
109; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
110; CHECK-NEXT:    movaps %xmm4, %xmm1
111; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm4[3,3]
112; CHECK-NEXT:    callq fmodf@PLT
113; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
114; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
115; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
116; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
117; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
118; CHECK-NEXT:    callq fmodf@PLT
119; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
120; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
121; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
122; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
123; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
124; CHECK-NEXT:    callq fmodf@PLT
125; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
126; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
127; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
128; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
129; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
130; CHECK-NEXT:    callq fmodf@PLT
131; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
132; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
133; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
134; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
135; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
136; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
137; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
138; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
139; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
140; CHECK-NEXT:    callq fmodf@PLT
141; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
142; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
143; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
144; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
145; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
146; CHECK-NEXT:    callq fmodf@PLT
147; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
148; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
149; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
150; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
151; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
152; CHECK-NEXT:    callq fmodf@PLT
153; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
154; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
155; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
156; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
157; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
158; CHECK-NEXT:    callq fmodf@PLT
159; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
160; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
161; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
162; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
163; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
164; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
165; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
166; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
167; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
168; CHECK-NEXT:    callq fmodf@PLT
169; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
170; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
171; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
172; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
173; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
174; CHECK-NEXT:    callq fmodf@PLT
175; CHECK-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
176; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
177; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
178; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
179; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
180; CHECK-NEXT:    callq fmodf@PLT
181; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
182; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
183; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
184; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
185; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
186; CHECK-NEXT:    callq fmodf@PLT
187; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
188; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
189; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
190; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
191; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
192; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
193; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
194; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
195; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
196; CHECK-NEXT:    callq fmodf@PLT
197; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
198; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
199; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
200; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
201; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
202; CHECK-NEXT:    callq fmodf@PLT
203; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
204; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
205; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
206; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
207; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
208; CHECK-NEXT:    callq fmodf@PLT
209; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
210; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
211; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
212; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
213; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
214; CHECK-NEXT:    callq fmodf@PLT
215; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
216; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
217; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
218; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
219; CHECK-NEXT:    movaps %xmm1, 48(%rbx)
220; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
221; CHECK-NEXT:    movaps %xmm0, 32(%rbx)
222; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
223; CHECK-NEXT:    movaps %xmm0, 16(%rbx)
224; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
225; CHECK-NEXT:    movaps %xmm0, (%rbx)
226; CHECK-NEXT:    addq $160, %rsp
227; CHECK-NEXT:    popq %rbx
228; CHECK-NEXT:    retq
229  %frem = frem <16 x float> %a0, %a1
230  store <16 x float> %frem, ptr%p3
231  ret void
232}
233
234define void @frem_v8f32(<8 x float> %a0, <8 x float> %a1, ptr%p3) nounwind {
235; CHECK-LABEL: frem_v8f32:
236; CHECK:       # %bb.0:
237; CHECK-NEXT:    pushq %rbx
238; CHECK-NEXT:    subq $96, %rsp
239; CHECK-NEXT:    movq %rdi, %rbx
240; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
241; CHECK-NEXT:    movaps %xmm2, (%rsp) # 16-byte Spill
242; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
243; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
244; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
245; CHECK-NEXT:    movaps %xmm2, %xmm1
246; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3],xmm2[3,3]
247; CHECK-NEXT:    callq fmodf@PLT
248; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
249; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
250; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
251; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
252; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
253; CHECK-NEXT:    callq fmodf@PLT
254; CHECK-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
255; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
256; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
257; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
258; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
259; CHECK-NEXT:    callq fmodf@PLT
260; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
261; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
262; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
263; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
264; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
265; CHECK-NEXT:    callq fmodf@PLT
266; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
267; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
268; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
269; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
270; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
271; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
272; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
273; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
274; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
275; CHECK-NEXT:    callq fmodf@PLT
276; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
277; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
278; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
279; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
280; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
281; CHECK-NEXT:    callq fmodf@PLT
282; CHECK-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
283; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
284; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
285; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
286; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
287; CHECK-NEXT:    callq fmodf@PLT
288; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
289; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
290; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
291; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
292; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
293; CHECK-NEXT:    callq fmodf@PLT
294; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
295; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
296; CHECK-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
297; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
298; CHECK-NEXT:    movaps %xmm1, 16(%rbx)
299; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
300; CHECK-NEXT:    movaps %xmm0, (%rbx)
301; CHECK-NEXT:    addq $96, %rsp
302; CHECK-NEXT:    popq %rbx
303; CHECK-NEXT:    retq
304  %frem = frem <8 x float> %a0, %a1
305  store <8 x float> %frem, ptr%p3
306  ret void
307}
308
309define void @frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr%p3) nounwind {
310; CHECK-LABEL: frem_v4f32:
311; CHECK:       # %bb.0:
312; CHECK-NEXT:    pushq %rbx
313; CHECK-NEXT:    subq $64, %rsp
314; CHECK-NEXT:    movq %rdi, %rbx
315; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
316; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
317; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
318; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
319; CHECK-NEXT:    callq fmodf@PLT
320; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
321; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
322; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
323; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
324; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
325; CHECK-NEXT:    callq fmodf@PLT
326; CHECK-NEXT:    unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
327; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
328; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
329; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
330; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
331; CHECK-NEXT:    callq fmodf@PLT
332; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
333; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
334; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
335; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
336; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
337; CHECK-NEXT:    callq fmodf@PLT
338; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
339; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
340; CHECK-NEXT:    unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
341; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
342; CHECK-NEXT:    movaps %xmm1, (%rbx)
343; CHECK-NEXT:    addq $64, %rsp
344; CHECK-NEXT:    popq %rbx
345; CHECK-NEXT:    retq
346  %frem = frem <4 x float> %a0, %a1
347  store <4 x float> %frem, ptr%p3
348  ret void
349}
350
351define void @frem_v8f64(<8 x double> %a0, <8 x double> %a1, ptr%p3) nounwind {
352; CHECK-LABEL: frem_v8f64:
353; CHECK:       # %bb.0:
354; CHECK-NEXT:    pushq %rbx
355; CHECK-NEXT:    subq $144, %rsp
356; CHECK-NEXT:    movq %rdi, %rbx
357; CHECK-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
358; CHECK-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
359; CHECK-NEXT:    movaps %xmm5, (%rsp) # 16-byte Spill
360; CHECK-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
361; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
362; CHECK-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
363; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
364; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
365; CHECK-NEXT:    movaps %xmm4, %xmm1
366; CHECK-NEXT:    callq fmod@PLT
367; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
368; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
369; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
370; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
371; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
372; CHECK-NEXT:    callq fmod@PLT
373; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
374; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
375; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
376; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
377; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
378; CHECK-NEXT:    callq fmod@PLT
379; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
380; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
381; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
382; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
383; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
384; CHECK-NEXT:    callq fmod@PLT
385; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
386; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
387; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
388; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
389; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
390; CHECK-NEXT:    callq fmod@PLT
391; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
392; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
393; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
394; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
395; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
396; CHECK-NEXT:    callq fmod@PLT
397; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
398; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
399; CHECK-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
400; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
401; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
402; CHECK-NEXT:    callq fmod@PLT
403; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
404; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
405; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
406; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
407; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
408; CHECK-NEXT:    callq fmod@PLT
409; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
410; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
411; CHECK-NEXT:    movaps %xmm1, 48(%rbx)
412; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
413; CHECK-NEXT:    movaps %xmm0, 32(%rbx)
414; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
415; CHECK-NEXT:    movaps %xmm0, 16(%rbx)
416; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
417; CHECK-NEXT:    movaps %xmm0, (%rbx)
418; CHECK-NEXT:    addq $144, %rsp
419; CHECK-NEXT:    popq %rbx
420; CHECK-NEXT:    retq
421  %frem = frem <8 x double> %a0, %a1
422  store <8 x double> %frem, ptr%p3
423  ret void
424}
425
426define void @frem_v4f64(<4 x double> %a0, <4 x double> %a1, ptr%p3) nounwind {
427; CHECK-LABEL: frem_v4f64:
428; CHECK:       # %bb.0:
429; CHECK-NEXT:    pushq %rbx
430; CHECK-NEXT:    subq $80, %rsp
431; CHECK-NEXT:    movq %rdi, %rbx
432; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
433; CHECK-NEXT:    movaps %xmm2, (%rsp) # 16-byte Spill
434; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
435; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
436; CHECK-NEXT:    movaps %xmm2, %xmm1
437; CHECK-NEXT:    callq fmod@PLT
438; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
439; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
440; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
441; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
442; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
443; CHECK-NEXT:    callq fmod@PLT
444; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
445; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
446; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
447; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
448; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
449; CHECK-NEXT:    callq fmod@PLT
450; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
451; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
452; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
453; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
454; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
455; CHECK-NEXT:    callq fmod@PLT
456; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
457; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
458; CHECK-NEXT:    movaps %xmm1, 16(%rbx)
459; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
460; CHECK-NEXT:    movaps %xmm0, (%rbx)
461; CHECK-NEXT:    addq $80, %rsp
462; CHECK-NEXT:    popq %rbx
463; CHECK-NEXT:    retq
464  %frem = frem <4 x double> %a0, %a1
465  store <4 x double> %frem, ptr%p3
466  ret void
467}
468
469define void @frem_v2f64(<2 x double> %a0, <2 x double> %a1, ptr%p3) nounwind {
470; CHECK-LABEL: frem_v2f64:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    pushq %rbx
473; CHECK-NEXT:    subq $48, %rsp
474; CHECK-NEXT:    movq %rdi, %rbx
475; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
476; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
477; CHECK-NEXT:    callq fmod@PLT
478; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
479; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
480; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
481; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
482; CHECK-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
483; CHECK-NEXT:    callq fmod@PLT
484; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
485; CHECK-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
486; CHECK-NEXT:    movaps %xmm1, (%rbx)
487; CHECK-NEXT:    addq $48, %rsp
488; CHECK-NEXT:    popq %rbx
489; CHECK-NEXT:    retq
490  %frem = frem <2 x double> %a0, %a1
491  store <2 x double> %frem, ptr%p3
492  ret void
493}
494
495define void @frem_v32f16(<32 x half> %a0, <32 x half> %a1, ptr%p3) nounwind {
496; CHECK-LABEL: frem_v32f16:
497; CHECK:       # %bb.0:
498; CHECK-NEXT:    pushq %rbx
499; CHECK-NEXT:    subq $176, %rsp
500; CHECK-NEXT:    movq %rdi, %rbx
501; CHECK-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
502; CHECK-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
503; CHECK-NEXT:    movaps %xmm5, (%rsp) # 16-byte Spill
504; CHECK-NEXT:    movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
505; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
506; CHECK-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
507; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
508; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
509; CHECK-NEXT:    movdqa %xmm4, %xmm0
510; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
511; CHECK-NEXT:    callq __extendhfsf2@PLT
512; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
513; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
514; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
515; CHECK-NEXT:    callq __extendhfsf2@PLT
516; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
517; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
518; CHECK-NEXT:    callq fmodf@PLT
519; CHECK-NEXT:    callq __truncsfhf2@PLT
520; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
521; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
522; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
523; CHECK-NEXT:    callq __extendhfsf2@PLT
524; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
525; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
526; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
527; CHECK-NEXT:    callq __extendhfsf2@PLT
528; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
529; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
530; CHECK-NEXT:    callq fmodf@PLT
531; CHECK-NEXT:    callq __truncsfhf2@PLT
532; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
533; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
534; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
535; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
536; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
537; CHECK-NEXT:    callq __extendhfsf2@PLT
538; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
539; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
540; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
541; CHECK-NEXT:    callq __extendhfsf2@PLT
542; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
543; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
544; CHECK-NEXT:    callq fmodf@PLT
545; CHECK-NEXT:    callq __truncsfhf2@PLT
546; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
547; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
548; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
549; CHECK-NEXT:    callq __extendhfsf2@PLT
550; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
551; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
552; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
553; CHECK-NEXT:    callq __extendhfsf2@PLT
554; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
555; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
556; CHECK-NEXT:    callq fmodf@PLT
557; CHECK-NEXT:    callq __truncsfhf2@PLT
558; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
559; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
560; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
561; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
562; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
563; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
564; CHECK-NEXT:    psrlq $48, %xmm0
565; CHECK-NEXT:    callq __extendhfsf2@PLT
566; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
567; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
568; CHECK-NEXT:    psrlq $48, %xmm0
569; CHECK-NEXT:    callq __extendhfsf2@PLT
570; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
571; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
572; CHECK-NEXT:    callq fmodf@PLT
573; CHECK-NEXT:    callq __truncsfhf2@PLT
574; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
575; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
576; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
577; CHECK-NEXT:    callq __extendhfsf2@PLT
578; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
579; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
580; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
581; CHECK-NEXT:    callq __extendhfsf2@PLT
582; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
583; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
584; CHECK-NEXT:    callq fmodf@PLT
585; CHECK-NEXT:    callq __truncsfhf2@PLT
586; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
587; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
588; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
589; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
590; CHECK-NEXT:    callq __extendhfsf2@PLT
591; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
592; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
593; CHECK-NEXT:    callq __extendhfsf2@PLT
594; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
595; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
596; CHECK-NEXT:    callq fmodf@PLT
597; CHECK-NEXT:    callq __truncsfhf2@PLT
598; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
599; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
600; CHECK-NEXT:    psrld $16, %xmm0
601; CHECK-NEXT:    callq __extendhfsf2@PLT
602; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
603; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
604; CHECK-NEXT:    psrld $16, %xmm0
605; CHECK-NEXT:    callq __extendhfsf2@PLT
606; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
607; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
608; CHECK-NEXT:    callq fmodf@PLT
609; CHECK-NEXT:    callq __truncsfhf2@PLT
610; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
611; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
612; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
613; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
614; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
615; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
616; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
618; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
619; CHECK-NEXT:    callq __extendhfsf2@PLT
620; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
621; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
622; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
623; CHECK-NEXT:    callq __extendhfsf2@PLT
624; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
625; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
626; CHECK-NEXT:    callq fmodf@PLT
627; CHECK-NEXT:    callq __truncsfhf2@PLT
628; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
629; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
630; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
631; CHECK-NEXT:    callq __extendhfsf2@PLT
632; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
633; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
634; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
635; CHECK-NEXT:    callq __extendhfsf2@PLT
636; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
637; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
638; CHECK-NEXT:    callq fmodf@PLT
639; CHECK-NEXT:    callq __truncsfhf2@PLT
640; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
641; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
642; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
643; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
644; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
645; CHECK-NEXT:    callq __extendhfsf2@PLT
646; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
647; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
648; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
649; CHECK-NEXT:    callq __extendhfsf2@PLT
650; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
651; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
652; CHECK-NEXT:    callq fmodf@PLT
653; CHECK-NEXT:    callq __truncsfhf2@PLT
654; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
655; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
656; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
657; CHECK-NEXT:    callq __extendhfsf2@PLT
658; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
659; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
660; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
661; CHECK-NEXT:    callq __extendhfsf2@PLT
662; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
663; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
664; CHECK-NEXT:    callq fmodf@PLT
665; CHECK-NEXT:    callq __truncsfhf2@PLT
666; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
667; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
668; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
669; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
670; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
671; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
672; CHECK-NEXT:    psrlq $48, %xmm0
673; CHECK-NEXT:    callq __extendhfsf2@PLT
674; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
675; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
676; CHECK-NEXT:    psrlq $48, %xmm0
677; CHECK-NEXT:    callq __extendhfsf2@PLT
678; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
679; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
680; CHECK-NEXT:    callq fmodf@PLT
681; CHECK-NEXT:    callq __truncsfhf2@PLT
682; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
683; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
684; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
685; CHECK-NEXT:    callq __extendhfsf2@PLT
686; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
687; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
688; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
689; CHECK-NEXT:    callq __extendhfsf2@PLT
690; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
691; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
692; CHECK-NEXT:    callq fmodf@PLT
693; CHECK-NEXT:    callq __truncsfhf2@PLT
694; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
695; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
696; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
697; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
698; CHECK-NEXT:    callq __extendhfsf2@PLT
699; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
700; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
701; CHECK-NEXT:    callq __extendhfsf2@PLT
702; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
703; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
704; CHECK-NEXT:    callq fmodf@PLT
705; CHECK-NEXT:    callq __truncsfhf2@PLT
706; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
707; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
708; CHECK-NEXT:    psrld $16, %xmm0
709; CHECK-NEXT:    callq __extendhfsf2@PLT
710; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
711; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
712; CHECK-NEXT:    psrld $16, %xmm0
713; CHECK-NEXT:    callq __extendhfsf2@PLT
714; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
715; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
716; CHECK-NEXT:    callq fmodf@PLT
717; CHECK-NEXT:    callq __truncsfhf2@PLT
718; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
719; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
720; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
721; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
722; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
723; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
724; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
725; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
726; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
727; CHECK-NEXT:    callq __extendhfsf2@PLT
728; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
729; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
730; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
731; CHECK-NEXT:    callq __extendhfsf2@PLT
732; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
733; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
734; CHECK-NEXT:    callq fmodf@PLT
735; CHECK-NEXT:    callq __truncsfhf2@PLT
736; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
737; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
738; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
739; CHECK-NEXT:    callq __extendhfsf2@PLT
740; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
741; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
742; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
743; CHECK-NEXT:    callq __extendhfsf2@PLT
744; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
745; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
746; CHECK-NEXT:    callq fmodf@PLT
747; CHECK-NEXT:    callq __truncsfhf2@PLT
748; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
749; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
750; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
751; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
752; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
753; CHECK-NEXT:    callq __extendhfsf2@PLT
754; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
755; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
756; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
757; CHECK-NEXT:    callq __extendhfsf2@PLT
758; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
759; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
760; CHECK-NEXT:    callq fmodf@PLT
761; CHECK-NEXT:    callq __truncsfhf2@PLT
762; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
763; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
764; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
765; CHECK-NEXT:    callq __extendhfsf2@PLT
766; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
767; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
768; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
769; CHECK-NEXT:    callq __extendhfsf2@PLT
770; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
771; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
772; CHECK-NEXT:    callq fmodf@PLT
773; CHECK-NEXT:    callq __truncsfhf2@PLT
774; CHECK-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
775; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
776; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
777; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
778; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
779; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
780; CHECK-NEXT:    psrlq $48, %xmm0
781; CHECK-NEXT:    callq __extendhfsf2@PLT
782; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
783; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
784; CHECK-NEXT:    psrlq $48, %xmm0
785; CHECK-NEXT:    callq __extendhfsf2@PLT
786; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
787; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
788; CHECK-NEXT:    callq fmodf@PLT
789; CHECK-NEXT:    callq __truncsfhf2@PLT
790; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
791; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
792; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
793; CHECK-NEXT:    callq __extendhfsf2@PLT
794; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
795; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
796; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
797; CHECK-NEXT:    callq __extendhfsf2@PLT
798; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
799; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
800; CHECK-NEXT:    callq fmodf@PLT
801; CHECK-NEXT:    callq __truncsfhf2@PLT
802; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
803; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
804; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
805; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
806; CHECK-NEXT:    callq __extendhfsf2@PLT
807; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
808; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
809; CHECK-NEXT:    callq __extendhfsf2@PLT
810; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
811; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
812; CHECK-NEXT:    callq fmodf@PLT
813; CHECK-NEXT:    callq __truncsfhf2@PLT
814; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
815; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
816; CHECK-NEXT:    psrld $16, %xmm0
817; CHECK-NEXT:    callq __extendhfsf2@PLT
818; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
819; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
820; CHECK-NEXT:    psrld $16, %xmm0
821; CHECK-NEXT:    callq __extendhfsf2@PLT
822; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
823; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
824; CHECK-NEXT:    callq fmodf@PLT
825; CHECK-NEXT:    callq __truncsfhf2@PLT
826; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
827; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
828; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
829; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
830; CHECK-NEXT:    punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload
831; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
832; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
833; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
834; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
835; CHECK-NEXT:    callq __extendhfsf2@PLT
836; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
837; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
838; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
839; CHECK-NEXT:    callq __extendhfsf2@PLT
840; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
841; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
842; CHECK-NEXT:    callq fmodf@PLT
843; CHECK-NEXT:    callq __truncsfhf2@PLT
844; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
845; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
846; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
847; CHECK-NEXT:    callq __extendhfsf2@PLT
848; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
849; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
850; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
851; CHECK-NEXT:    callq __extendhfsf2@PLT
852; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
853; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
854; CHECK-NEXT:    callq fmodf@PLT
855; CHECK-NEXT:    callq __truncsfhf2@PLT
856; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
857; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
858; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
859; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
860; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
861; CHECK-NEXT:    callq __extendhfsf2@PLT
862; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
863; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
864; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
865; CHECK-NEXT:    callq __extendhfsf2@PLT
866; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
867; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
868; CHECK-NEXT:    callq fmodf@PLT
869; CHECK-NEXT:    callq __truncsfhf2@PLT
870; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
871; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
872; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
873; CHECK-NEXT:    callq __extendhfsf2@PLT
874; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
875; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
876; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
877; CHECK-NEXT:    callq __extendhfsf2@PLT
878; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
879; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
880; CHECK-NEXT:    callq fmodf@PLT
881; CHECK-NEXT:    callq __truncsfhf2@PLT
882; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
883; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
884; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
885; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
886; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
887; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
888; CHECK-NEXT:    psrlq $48, %xmm0
889; CHECK-NEXT:    callq __extendhfsf2@PLT
890; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
891; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
892; CHECK-NEXT:    psrlq $48, %xmm0
893; CHECK-NEXT:    callq __extendhfsf2@PLT
894; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
895; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
896; CHECK-NEXT:    callq fmodf@PLT
897; CHECK-NEXT:    callq __truncsfhf2@PLT
898; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
899; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
900; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
901; CHECK-NEXT:    callq __extendhfsf2@PLT
902; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
903; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
904; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
905; CHECK-NEXT:    callq __extendhfsf2@PLT
906; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
907; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
908; CHECK-NEXT:    callq fmodf@PLT
909; CHECK-NEXT:    callq __truncsfhf2@PLT
910; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
911; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
912; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
913; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
914; CHECK-NEXT:    callq __extendhfsf2@PLT
915; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
916; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
917; CHECK-NEXT:    callq __extendhfsf2@PLT
918; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
919; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
920; CHECK-NEXT:    callq fmodf@PLT
921; CHECK-NEXT:    callq __truncsfhf2@PLT
922; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
923; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
924; CHECK-NEXT:    psrld $16, %xmm0
925; CHECK-NEXT:    callq __extendhfsf2@PLT
926; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
927; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
928; CHECK-NEXT:    psrld $16, %xmm0
929; CHECK-NEXT:    callq __extendhfsf2@PLT
930; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
931; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
932; CHECK-NEXT:    callq fmodf@PLT
933; CHECK-NEXT:    callq __truncsfhf2@PLT
934; CHECK-NEXT:    movdqa (%rsp), %xmm1 # 16-byte Reload
935; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
936; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
937; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
938; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
939; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
940; CHECK-NEXT:    movdqa %xmm1, 48(%rbx)
941; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
942; CHECK-NEXT:    movaps %xmm0, 32(%rbx)
943; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
944; CHECK-NEXT:    movaps %xmm0, 16(%rbx)
945; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
946; CHECK-NEXT:    movaps %xmm0, (%rbx)
947; CHECK-NEXT:    addq $176, %rsp
948; CHECK-NEXT:    popq %rbx
949; CHECK-NEXT:    retq
950  %frem = frem <32 x half> %a0, %a1
951  store <32 x half> %frem, ptr%p3
952  ret void
953}
954
955define void @frem_v16f16(<16 x half> %a0, <16 x half> %a1, ptr%p3) nounwind {
956; CHECK-LABEL: frem_v16f16:
957; CHECK:       # %bb.0:
958; CHECK-NEXT:    pushq %rbx
959; CHECK-NEXT:    subq $112, %rsp
960; CHECK-NEXT:    movq %rdi, %rbx
961; CHECK-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
962; CHECK-NEXT:    movdqa %xmm2, (%rsp) # 16-byte Spill
963; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
964; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
965; CHECK-NEXT:    movdqa %xmm2, %xmm0
966; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
967; CHECK-NEXT:    callq __extendhfsf2@PLT
968; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
969; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
970; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
971; CHECK-NEXT:    callq __extendhfsf2@PLT
972; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
973; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
974; CHECK-NEXT:    callq fmodf@PLT
975; CHECK-NEXT:    callq __truncsfhf2@PLT
976; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
977; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
978; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
979; CHECK-NEXT:    callq __extendhfsf2@PLT
980; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
981; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
982; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
983; CHECK-NEXT:    callq __extendhfsf2@PLT
984; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
985; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
986; CHECK-NEXT:    callq fmodf@PLT
987; CHECK-NEXT:    callq __truncsfhf2@PLT
988; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
989; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
990; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
991; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
992; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
993; CHECK-NEXT:    callq __extendhfsf2@PLT
994; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
995; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
996; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
997; CHECK-NEXT:    callq __extendhfsf2@PLT
998; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
999; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1000; CHECK-NEXT:    callq fmodf@PLT
1001; CHECK-NEXT:    callq __truncsfhf2@PLT
1002; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1003; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1004; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1005; CHECK-NEXT:    callq __extendhfsf2@PLT
1006; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1007; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1008; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1009; CHECK-NEXT:    callq __extendhfsf2@PLT
1010; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1011; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1012; CHECK-NEXT:    callq fmodf@PLT
1013; CHECK-NEXT:    callq __truncsfhf2@PLT
1014; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1015; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1016; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1017; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1018; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1019; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
1020; CHECK-NEXT:    psrlq $48, %xmm0
1021; CHECK-NEXT:    callq __extendhfsf2@PLT
1022; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1023; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1024; CHECK-NEXT:    psrlq $48, %xmm0
1025; CHECK-NEXT:    callq __extendhfsf2@PLT
1026; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1027; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1028; CHECK-NEXT:    callq fmodf@PLT
1029; CHECK-NEXT:    callq __truncsfhf2@PLT
1030; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1031; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1032; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1033; CHECK-NEXT:    callq __extendhfsf2@PLT
1034; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1035; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1036; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1037; CHECK-NEXT:    callq __extendhfsf2@PLT
1038; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1039; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1040; CHECK-NEXT:    callq fmodf@PLT
1041; CHECK-NEXT:    callq __truncsfhf2@PLT
1042; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1043; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1044; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1045; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
1046; CHECK-NEXT:    callq __extendhfsf2@PLT
1047; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1048; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1049; CHECK-NEXT:    callq __extendhfsf2@PLT
1050; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1051; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1052; CHECK-NEXT:    callq fmodf@PLT
1053; CHECK-NEXT:    callq __truncsfhf2@PLT
1054; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1055; CHECK-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
1056; CHECK-NEXT:    psrld $16, %xmm0
1057; CHECK-NEXT:    callq __extendhfsf2@PLT
1058; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1059; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1060; CHECK-NEXT:    psrld $16, %xmm0
1061; CHECK-NEXT:    callq __extendhfsf2@PLT
1062; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1063; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1064; CHECK-NEXT:    callq fmodf@PLT
1065; CHECK-NEXT:    callq __truncsfhf2@PLT
1066; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1067; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1068; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1069; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
1070; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1071; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1072; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1073; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1074; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1075; CHECK-NEXT:    callq __extendhfsf2@PLT
1076; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1077; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1078; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1079; CHECK-NEXT:    callq __extendhfsf2@PLT
1080; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1081; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1082; CHECK-NEXT:    callq fmodf@PLT
1083; CHECK-NEXT:    callq __truncsfhf2@PLT
1084; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1085; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1086; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1087; CHECK-NEXT:    callq __extendhfsf2@PLT
1088; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1089; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1090; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1091; CHECK-NEXT:    callq __extendhfsf2@PLT
1092; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1093; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1094; CHECK-NEXT:    callq fmodf@PLT
1095; CHECK-NEXT:    callq __truncsfhf2@PLT
1096; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1097; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1098; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1099; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1100; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1101; CHECK-NEXT:    callq __extendhfsf2@PLT
1102; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1103; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1104; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1105; CHECK-NEXT:    callq __extendhfsf2@PLT
1106; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1107; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1108; CHECK-NEXT:    callq fmodf@PLT
1109; CHECK-NEXT:    callq __truncsfhf2@PLT
1110; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1111; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1112; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1113; CHECK-NEXT:    callq __extendhfsf2@PLT
1114; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1115; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1116; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1117; CHECK-NEXT:    callq __extendhfsf2@PLT
1118; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1119; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1120; CHECK-NEXT:    callq fmodf@PLT
1121; CHECK-NEXT:    callq __truncsfhf2@PLT
1122; CHECK-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
1123; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1124; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1125; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1126; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1127; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1128; CHECK-NEXT:    psrlq $48, %xmm0
1129; CHECK-NEXT:    callq __extendhfsf2@PLT
1130; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1131; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1132; CHECK-NEXT:    psrlq $48, %xmm0
1133; CHECK-NEXT:    callq __extendhfsf2@PLT
1134; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1135; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1136; CHECK-NEXT:    callq fmodf@PLT
1137; CHECK-NEXT:    callq __truncsfhf2@PLT
1138; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1139; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1140; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1141; CHECK-NEXT:    callq __extendhfsf2@PLT
1142; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1143; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1144; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1145; CHECK-NEXT:    callq __extendhfsf2@PLT
1146; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1147; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1148; CHECK-NEXT:    callq fmodf@PLT
1149; CHECK-NEXT:    callq __truncsfhf2@PLT
1150; CHECK-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
1151; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1152; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1153; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1154; CHECK-NEXT:    callq __extendhfsf2@PLT
1155; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1156; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1157; CHECK-NEXT:    callq __extendhfsf2@PLT
1158; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1159; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1160; CHECK-NEXT:    callq fmodf@PLT
1161; CHECK-NEXT:    callq __truncsfhf2@PLT
1162; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1163; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1164; CHECK-NEXT:    psrld $16, %xmm0
1165; CHECK-NEXT:    callq __extendhfsf2@PLT
1166; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1167; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1168; CHECK-NEXT:    psrld $16, %xmm0
1169; CHECK-NEXT:    callq __extendhfsf2@PLT
1170; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1171; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1172; CHECK-NEXT:    callq fmodf@PLT
1173; CHECK-NEXT:    callq __truncsfhf2@PLT
1174; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1175; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1176; CHECK-NEXT:    punpckldq (%rsp), %xmm1 # 16-byte Folded Reload
1177; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
1178; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1179; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1180; CHECK-NEXT:    movdqa %xmm1, 16(%rbx)
1181; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1182; CHECK-NEXT:    movaps %xmm0, (%rbx)
1183; CHECK-NEXT:    addq $112, %rsp
1184; CHECK-NEXT:    popq %rbx
1185; CHECK-NEXT:    retq
1186  %frem = frem <16 x half> %a0, %a1
1187  store <16 x half> %frem, ptr%p3
1188  ret void
1189}
1190
1191define void @frem_v8f16(<8 x half> %a0, <8 x half> %a1, ptr%p3) nounwind {
1192; CHECK-LABEL: frem_v8f16:
1193; CHECK:       # %bb.0:
1194; CHECK-NEXT:    pushq %rbx
1195; CHECK-NEXT:    subq $80, %rsp
1196; CHECK-NEXT:    movq %rdi, %rbx
1197; CHECK-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1198; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1199; CHECK-NEXT:    movdqa %xmm1, %xmm0
1200; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1201; CHECK-NEXT:    callq __extendhfsf2@PLT
1202; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1203; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1204; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1205; CHECK-NEXT:    callq __extendhfsf2@PLT
1206; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1207; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1208; CHECK-NEXT:    callq fmodf@PLT
1209; CHECK-NEXT:    callq __truncsfhf2@PLT
1210; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1211; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1212; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1213; CHECK-NEXT:    callq __extendhfsf2@PLT
1214; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
1215; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1216; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
1217; CHECK-NEXT:    callq __extendhfsf2@PLT
1218; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1219; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1220; CHECK-NEXT:    callq fmodf@PLT
1221; CHECK-NEXT:    callq __truncsfhf2@PLT
1222; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1223; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1224; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1225; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1226; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1227; CHECK-NEXT:    callq __extendhfsf2@PLT
1228; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1229; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1230; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1231; CHECK-NEXT:    callq __extendhfsf2@PLT
1232; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1233; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1234; CHECK-NEXT:    callq fmodf@PLT
1235; CHECK-NEXT:    callq __truncsfhf2@PLT
1236; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1237; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1238; CHECK-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
1239; CHECK-NEXT:    callq __extendhfsf2@PLT
1240; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1241; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1242; CHECK-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1,1]
1243; CHECK-NEXT:    callq __extendhfsf2@PLT
1244; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1245; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1246; CHECK-NEXT:    callq fmodf@PLT
1247; CHECK-NEXT:    callq __truncsfhf2@PLT
1248; CHECK-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
1249; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1250; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1251; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
1252; CHECK-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1253; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1254; CHECK-NEXT:    psrlq $48, %xmm0
1255; CHECK-NEXT:    callq __extendhfsf2@PLT
1256; CHECK-NEXT:    movd %xmm0, (%rsp) # 4-byte Folded Spill
1257; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1258; CHECK-NEXT:    psrlq $48, %xmm0
1259; CHECK-NEXT:    callq __extendhfsf2@PLT
1260; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
1261; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1262; CHECK-NEXT:    callq fmodf@PLT
1263; CHECK-NEXT:    callq __truncsfhf2@PLT
1264; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1265; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1266; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1267; CHECK-NEXT:    callq __extendhfsf2@PLT
1268; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1269; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1270; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
1271; CHECK-NEXT:    callq __extendhfsf2@PLT
1272; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1273; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1274; CHECK-NEXT:    callq fmodf@PLT
1275; CHECK-NEXT:    callq __truncsfhf2@PLT
1276; CHECK-NEXT:    punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
1277; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1278; CHECK-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
1279; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1280; CHECK-NEXT:    callq __extendhfsf2@PLT
1281; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1282; CHECK-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1283; CHECK-NEXT:    callq __extendhfsf2@PLT
1284; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1285; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1286; CHECK-NEXT:    callq fmodf@PLT
1287; CHECK-NEXT:    callq __truncsfhf2@PLT
1288; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1289; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1290; CHECK-NEXT:    psrld $16, %xmm0
1291; CHECK-NEXT:    callq __extendhfsf2@PLT
1292; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
1293; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
1294; CHECK-NEXT:    psrld $16, %xmm0
1295; CHECK-NEXT:    callq __extendhfsf2@PLT
1296; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
1297; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
1298; CHECK-NEXT:    callq fmodf@PLT
1299; CHECK-NEXT:    callq __truncsfhf2@PLT
1300; CHECK-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1301; CHECK-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1302; CHECK-NEXT:    punpckldq (%rsp), %xmm1 # 16-byte Folded Reload
1303; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
1304; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
1305; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
1306; CHECK-NEXT:    movdqa %xmm1, (%rbx)
1307; CHECK-NEXT:    addq $80, %rsp
1308; CHECK-NEXT:    popq %rbx
1309; CHECK-NEXT:    retq
1310  %frem = frem <8 x half> %a0, %a1
1311  store <8 x half> %frem, ptr%p3
1312  ret void
1313}
1314
1315define void @frem_v4f80(<4 x x86_fp80> %a0, <4 x x86_fp80> %a1, ptr%p3) nounwind {
1316; CHECK-LABEL: frem_v4f80:
1317; CHECK:       # %bb.0:
1318; CHECK-NEXT:    pushq %rbx
1319; CHECK-NEXT:    subq $128, %rsp
1320; CHECK-NEXT:    movq %rdi, %rbx
1321; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1322; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1323; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1324; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1325; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1326; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1327; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1328; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1329; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1330; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1331; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1332; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1333; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1334; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
1335; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
1336; CHECK-NEXT:    fstpt (%rsp)
1337; CHECK-NEXT:    callq fmodl@PLT
1338; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1339; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1340; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
1341; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1342; CHECK-NEXT:    fstpt (%rsp)
1343; CHECK-NEXT:    callq fmodl@PLT
1344; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1345; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1346; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
1347; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1348; CHECK-NEXT:    fstpt (%rsp)
1349; CHECK-NEXT:    callq fmodl@PLT
1350; CHECK-NEXT:    fstpt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Spill
1351; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1352; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
1353; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1354; CHECK-NEXT:    fstpt (%rsp)
1355; CHECK-NEXT:    callq fmodl@PLT
1356; CHECK-NEXT:    fstpt 30(%rbx)
1357; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1358; CHECK-NEXT:    fstpt 20(%rbx)
1359; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1360; CHECK-NEXT:    fstpt 10(%rbx)
1361; CHECK-NEXT:    fldt {{[-0-9]+}}(%r{{[sb]}}p) # 10-byte Folded Reload
1362; CHECK-NEXT:    fstpt (%rbx)
1363; CHECK-NEXT:    addq $128, %rsp
1364; CHECK-NEXT:    popq %rbx
1365; CHECK-NEXT:    retq
1366  %frem = frem <4 x x86_fp80> %a0, %a1
1367  store <4 x x86_fp80> %frem, ptr%p3
1368  ret void
1369}
1370