1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0  \
9; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
10
11define void @test_load_store(half* %in, half* %out) #0 {
12; BWON-LABEL: test_load_store:
13; BWON:       # %bb.0:
14; BWON-NEXT:    movzwl (%rdi), %eax
15; BWON-NEXT:    movw %ax, (%rsi)
16; BWON-NEXT:    retq
17;
18; BWOFF-LABEL: test_load_store:
19; BWOFF:       # %bb.0:
20; BWOFF-NEXT:    movw (%rdi), %ax
21; BWOFF-NEXT:    movw %ax, (%rsi)
22; BWOFF-NEXT:    retq
23;
24; CHECK-I686-LABEL: test_load_store:
25; CHECK-I686:       # %bb.0:
26; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
28; CHECK-I686-NEXT:    movw (%ecx), %cx
29; CHECK-I686-NEXT:    movw %cx, (%eax)
30; CHECK-I686-NEXT:    retl
31  %val = load half, half* %in
32  store half %val, half* %out
33  ret void
34}
35
36define i16 @test_bitcast_from_half(half* %addr) #0 {
37; BWON-LABEL: test_bitcast_from_half:
38; BWON:       # %bb.0:
39; BWON-NEXT:    movzwl (%rdi), %eax
40; BWON-NEXT:    retq
41;
42; BWOFF-LABEL: test_bitcast_from_half:
43; BWOFF:       # %bb.0:
44; BWOFF-NEXT:    movw (%rdi), %ax
45; BWOFF-NEXT:    retq
46;
47; CHECK-I686-LABEL: test_bitcast_from_half:
48; CHECK-I686:       # %bb.0:
49; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
50; CHECK-I686-NEXT:    movw (%eax), %ax
51; CHECK-I686-NEXT:    retl
52  %val = load half, half* %addr
53  %val_int = bitcast half %val to i16
54  ret i16 %val_int
55}
56
57define void @test_bitcast_to_half(half* %addr, i16 %in) #0 {
58; CHECK-LABEL: test_bitcast_to_half:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    movw %si, (%rdi)
61; CHECK-NEXT:    retq
62;
63; CHECK-I686-LABEL: test_bitcast_to_half:
64; CHECK-I686:       # %bb.0:
65; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
66; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
67; CHECK-I686-NEXT:    movw %ax, (%ecx)
68; CHECK-I686-NEXT:    retl
69  %val_fp = bitcast i16 %in to half
70  store half %val_fp, half* %addr
71  ret void
72}
73
74define float @test_extend32(half* %addr) #0 {
75; CHECK-LIBCALL-LABEL: test_extend32:
76; CHECK-LIBCALL:       # %bb.0:
77; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
78; CHECK-LIBCALL-NEXT:    jmp __gnu_h2f_ieee@PLT # TAILCALL
79;
80; BWON-F16C-LABEL: test_extend32:
81; BWON-F16C:       # %bb.0:
82; BWON-F16C-NEXT:    movzwl (%rdi), %eax
83; BWON-F16C-NEXT:    vmovd %eax, %xmm0
84; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
85; BWON-F16C-NEXT:    retq
86;
87; CHECK-I686-LABEL: test_extend32:
88; CHECK-I686:       # %bb.0:
89; CHECK-I686-NEXT:    subl $12, %esp
90; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
91; CHECK-I686-NEXT:    movzwl (%eax), %eax
92; CHECK-I686-NEXT:    movl %eax, (%esp)
93; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
94; CHECK-I686-NEXT:    addl $12, %esp
95; CHECK-I686-NEXT:    retl
96  %val16 = load half, half* %addr
97  %val32 = fpext half %val16 to float
98  ret float %val32
99}
100
101define double @test_extend64(half* %addr) #0 {
102; CHECK-LIBCALL-LABEL: test_extend64:
103; CHECK-LIBCALL:       # %bb.0:
104; CHECK-LIBCALL-NEXT:    pushq %rax
105; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
106; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
107; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
108; CHECK-LIBCALL-NEXT:    popq %rax
109; CHECK-LIBCALL-NEXT:    retq
110;
111; BWON-F16C-LABEL: test_extend64:
112; BWON-F16C:       # %bb.0:
113; BWON-F16C-NEXT:    movzwl (%rdi), %eax
114; BWON-F16C-NEXT:    vmovd %eax, %xmm0
115; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
116; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
117; BWON-F16C-NEXT:    retq
118;
119; CHECK-I686-LABEL: test_extend64:
120; CHECK-I686:       # %bb.0:
121; CHECK-I686-NEXT:    subl $12, %esp
122; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
123; CHECK-I686-NEXT:    movzwl (%eax), %eax
124; CHECK-I686-NEXT:    movl %eax, (%esp)
125; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
126; CHECK-I686-NEXT:    addl $12, %esp
127; CHECK-I686-NEXT:    retl
128  %val16 = load half, half* %addr
129  %val32 = fpext half %val16 to double
130  ret double %val32
131}
132
133define void @test_trunc32(float %in, half* %addr) #0 {
134; CHECK-LIBCALL-LABEL: test_trunc32:
135; CHECK-LIBCALL:       # %bb.0:
136; CHECK-LIBCALL-NEXT:    pushq %rbx
137; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
138; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee@PLT
139; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
140; CHECK-LIBCALL-NEXT:    popq %rbx
141; CHECK-LIBCALL-NEXT:    retq
142;
143; BWON-F16C-LABEL: test_trunc32:
144; BWON-F16C:       # %bb.0:
145; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
146; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rdi)
147; BWON-F16C-NEXT:    retq
148;
149; CHECK-I686-LABEL: test_trunc32:
150; CHECK-I686:       # %bb.0:
151; CHECK-I686-NEXT:    pushl %esi
152; CHECK-I686-NEXT:    subl $8, %esp
153; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
154; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
155; CHECK-I686-NEXT:    movss %xmm0, (%esp)
156; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
157; CHECK-I686-NEXT:    movw %ax, (%esi)
158; CHECK-I686-NEXT:    addl $8, %esp
159; CHECK-I686-NEXT:    popl %esi
160; CHECK-I686-NEXT:    retl
161  %val16 = fptrunc float %in to half
162  store half %val16, half* %addr
163  ret void
164}
165
166define void @test_trunc64(double %in, half* %addr) #0 {
167; CHECK-LABEL: test_trunc64:
168; CHECK:       # %bb.0:
169; CHECK-NEXT:    pushq %rbx
170; CHECK-NEXT:    movq %rdi, %rbx
171; CHECK-NEXT:    callq __truncdfhf2@PLT
172; CHECK-NEXT:    movw %ax, (%rbx)
173; CHECK-NEXT:    popq %rbx
174; CHECK-NEXT:    retq
175;
176; CHECK-I686-LABEL: test_trunc64:
177; CHECK-I686:       # %bb.0:
178; CHECK-I686-NEXT:    pushl %esi
179; CHECK-I686-NEXT:    subl $8, %esp
180; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
181; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
182; CHECK-I686-NEXT:    movsd %xmm0, (%esp)
183; CHECK-I686-NEXT:    calll __truncdfhf2
184; CHECK-I686-NEXT:    movw %ax, (%esi)
185; CHECK-I686-NEXT:    addl $8, %esp
186; CHECK-I686-NEXT:    popl %esi
187; CHECK-I686-NEXT:    retl
188  %val16 = fptrunc double %in to half
189  store half %val16, half* %addr
190  ret void
191}
192
193define i64 @test_fptosi_i64(half* %p) #0 {
194; CHECK-LIBCALL-LABEL: test_fptosi_i64:
195; CHECK-LIBCALL:       # %bb.0:
196; CHECK-LIBCALL-NEXT:    pushq %rax
197; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
198; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
199; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
200; CHECK-LIBCALL-NEXT:    popq %rcx
201; CHECK-LIBCALL-NEXT:    retq
202;
203; BWON-F16C-LABEL: test_fptosi_i64:
204; BWON-F16C:       # %bb.0:
205; BWON-F16C-NEXT:    movzwl (%rdi), %eax
206; BWON-F16C-NEXT:    vmovd %eax, %xmm0
207; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
208; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
209; BWON-F16C-NEXT:    retq
210;
211; CHECK-I686-LABEL: test_fptosi_i64:
212; CHECK-I686:       # %bb.0:
213; CHECK-I686-NEXT:    subl $28, %esp
214; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
215; CHECK-I686-NEXT:    movzwl (%eax), %eax
216; CHECK-I686-NEXT:    movl %eax, (%esp)
217; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
218; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
219; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
220; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
221; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
222; CHECK-I686-NEXT:    orl $3072, %eax # imm = 0xC00
223; CHECK-I686-NEXT:    movw %ax, {{[0-9]+}}(%esp)
224; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
225; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
226; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
227; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
228; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %edx
229; CHECK-I686-NEXT:    addl $28, %esp
230; CHECK-I686-NEXT:    retl
231  %a = load half, half* %p, align 2
232  %r = fptosi half %a to i64
233  ret i64 %r
234}
235
236define void @test_sitofp_i64(i64 %a, half* %p) #0 {
237; CHECK-LIBCALL-LABEL: test_sitofp_i64:
238; CHECK-LIBCALL:       # %bb.0:
239; CHECK-LIBCALL-NEXT:    pushq %rbx
240; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
241; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
242; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee@PLT
243; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
244; CHECK-LIBCALL-NEXT:    popq %rbx
245; CHECK-LIBCALL-NEXT:    retq
246;
247; BWON-F16C-LABEL: test_sitofp_i64:
248; BWON-F16C:       # %bb.0:
249; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
250; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
251; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
252; BWON-F16C-NEXT:    retq
253;
254; CHECK-I686-LABEL: test_sitofp_i64:
255; CHECK-I686:       # %bb.0:
256; CHECK-I686-NEXT:    pushl %esi
257; CHECK-I686-NEXT:    subl $24, %esp
258; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
259; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
260; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
261; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
262; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
263; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
264; CHECK-I686-NEXT:    movss %xmm0, (%esp)
265; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
266; CHECK-I686-NEXT:    movw %ax, (%esi)
267; CHECK-I686-NEXT:    addl $24, %esp
268; CHECK-I686-NEXT:    popl %esi
269; CHECK-I686-NEXT:    retl
270  %r = sitofp i64 %a to half
271  store half %r, half* %p
272  ret void
273}
274
275define i64 @test_fptoui_i64(half* %p) #0 {
276; CHECK-LIBCALL-LABEL: test_fptoui_i64:
277; CHECK-LIBCALL:       # %bb.0:
278; CHECK-LIBCALL-NEXT:    pushq %rax
279; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
280; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
281; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rcx
282; CHECK-LIBCALL-NEXT:    movq %rcx, %rdx
283; CHECK-LIBCALL-NEXT:    sarq $63, %rdx
284; CHECK-LIBCALL-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
285; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
286; CHECK-LIBCALL-NEXT:    andq %rdx, %rax
287; CHECK-LIBCALL-NEXT:    orq %rcx, %rax
288; CHECK-LIBCALL-NEXT:    popq %rcx
289; CHECK-LIBCALL-NEXT:    retq
290;
291; BWON-F16C-LABEL: test_fptoui_i64:
292; BWON-F16C:       # %bb.0:
293; BWON-F16C-NEXT:    movzwl (%rdi), %eax
294; BWON-F16C-NEXT:    vmovd %eax, %xmm0
295; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
296; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rcx
297; BWON-F16C-NEXT:    movq %rcx, %rdx
298; BWON-F16C-NEXT:    sarq $63, %rdx
299; BWON-F16C-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
300; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
301; BWON-F16C-NEXT:    andq %rdx, %rax
302; BWON-F16C-NEXT:    orq %rcx, %rax
303; BWON-F16C-NEXT:    retq
304;
305; CHECK-I686-LABEL: test_fptoui_i64:
306; CHECK-I686:       # %bb.0:
307; CHECK-I686-NEXT:    subl $28, %esp
308; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
309; CHECK-I686-NEXT:    movzwl (%eax), %eax
310; CHECK-I686-NEXT:    movl %eax, (%esp)
311; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
312; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
313; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
314; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
315; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
316; CHECK-I686-NEXT:    jae .LBB9_2
317; CHECK-I686-NEXT:  # %bb.1:
318; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
319; CHECK-I686-NEXT:  .LBB9_2:
320; CHECK-I686-NEXT:    subss %xmm1, %xmm0
321; CHECK-I686-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
322; CHECK-I686-NEXT:    setae %al
323; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
324; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
325; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
326; CHECK-I686-NEXT:    orl $3072, %ecx # imm = 0xC00
327; CHECK-I686-NEXT:    movw %cx, {{[0-9]+}}(%esp)
328; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
329; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
330; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
331; CHECK-I686-NEXT:    movzbl %al, %edx
332; CHECK-I686-NEXT:    shll $31, %edx
333; CHECK-I686-NEXT:    xorl {{[0-9]+}}(%esp), %edx
334; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
335; CHECK-I686-NEXT:    addl $28, %esp
336; CHECK-I686-NEXT:    retl
337  %a = load half, half* %p, align 2
338  %r = fptoui half %a to i64
339  ret i64 %r
340}
341
342define void @test_uitofp_i64(i64 %a, half* %p) #0 {
343; CHECK-LIBCALL-LABEL: test_uitofp_i64:
344; CHECK-LIBCALL:       # %bb.0:
345; CHECK-LIBCALL-NEXT:    pushq %rbx
346; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
347; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
348; CHECK-LIBCALL-NEXT:    js .LBB10_1
349; CHECK-LIBCALL-NEXT:  # %bb.2:
350; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
351; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
352; CHECK-LIBCALL-NEXT:  .LBB10_1:
353; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
354; CHECK-LIBCALL-NEXT:    shrq %rax
355; CHECK-LIBCALL-NEXT:    andl $1, %edi
356; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
357; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
358; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
359; CHECK-LIBCALL-NEXT:  .LBB10_3:
360; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee@PLT
361; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
362; CHECK-LIBCALL-NEXT:    popq %rbx
363; CHECK-LIBCALL-NEXT:    retq
364;
365; BWON-F16C-LABEL: test_uitofp_i64:
366; BWON-F16C:       # %bb.0:
367; BWON-F16C-NEXT:    testq %rdi, %rdi
368; BWON-F16C-NEXT:    js .LBB10_1
369; BWON-F16C-NEXT:  # %bb.2:
370; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
371; BWON-F16C-NEXT:    jmp .LBB10_3
372; BWON-F16C-NEXT:  .LBB10_1:
373; BWON-F16C-NEXT:    movq %rdi, %rax
374; BWON-F16C-NEXT:    shrq %rax
375; BWON-F16C-NEXT:    andl $1, %edi
376; BWON-F16C-NEXT:    orq %rax, %rdi
377; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
378; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
379; BWON-F16C-NEXT:  .LBB10_3:
380; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
381; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
382; BWON-F16C-NEXT:    retq
383;
384; CHECK-I686-LABEL: test_uitofp_i64:
385; CHECK-I686:       # %bb.0:
386; CHECK-I686-NEXT:    pushl %esi
387; CHECK-I686-NEXT:    subl $24, %esp
388; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
389; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
390; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
391; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
392; CHECK-I686-NEXT:    shrl $31, %eax
393; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
394; CHECK-I686-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
395; CHECK-I686-NEXT:    fstps (%esp)
396; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
397; CHECK-I686-NEXT:    movw %ax, (%esi)
398; CHECK-I686-NEXT:    addl $24, %esp
399; CHECK-I686-NEXT:    popl %esi
400; CHECK-I686-NEXT:    retl
401  %r = uitofp i64 %a to half
402  store half %r, half* %p
403  ret void
404}
405
406define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
407; CHECK-LIBCALL-LABEL: test_extend32_vec4:
408; CHECK-LIBCALL:       # %bb.0:
409; CHECK-LIBCALL-NEXT:    subq $88, %rsp
410; CHECK-LIBCALL-NEXT:    movl (%rdi), %eax
411; CHECK-LIBCALL-NEXT:    movl 4(%rdi), %ecx
412; CHECK-LIBCALL-NEXT:    movl %eax, (%rsp)
413; CHECK-LIBCALL-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
414; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0
415; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
416; CHECK-LIBCALL-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm0
417; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
418; CHECK-LIBCALL-NEXT:    pextrw $1, %xmm0, %edi
419; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
420; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
421; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
422; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %edi
423; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
424; CHECK-LIBCALL-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
425; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
426; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
427; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
428; CHECK-LIBCALL-NEXT:    pextrw $1, %xmm0, %edi
429; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
430; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
431; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
432; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %edi
433; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
434; CHECK-LIBCALL-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
435; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
436; CHECK-LIBCALL-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
437; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
438; CHECK-LIBCALL-NEXT:    addq $88, %rsp
439; CHECK-LIBCALL-NEXT:    retq
440;
441; BWON-F16C-LABEL: test_extend32_vec4:
442; BWON-F16C:       # %bb.0:
443; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
444; BWON-F16C-NEXT:    retq
445;
446; CHECK-I686-LABEL: test_extend32_vec4:
447; CHECK-I686:       # %bb.0:
448; CHECK-I686-NEXT:    subl $124, %esp
449; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
450; CHECK-I686-NEXT:    movl (%eax), %ecx
451; CHECK-I686-NEXT:    movl 4(%eax), %eax
452; CHECK-I686-NEXT:    movl %eax, {{[0-9]+}}(%esp)
453; CHECK-I686-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
454; CHECK-I686-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
455; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
456; CHECK-I686-NEXT:    movdqa {{[0-9]+}}(%esp), %xmm0
457; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
458; CHECK-I686-NEXT:    pextrw $1, %xmm0, %eax
459; CHECK-I686-NEXT:    movl %eax, (%esp)
460; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
461; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
462; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
463; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
464; CHECK-I686-NEXT:    movl %eax, (%esp)
465; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
466; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
467; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
468; CHECK-I686-NEXT:    pextrw $1, %xmm0, %eax
469; CHECK-I686-NEXT:    movl %eax, (%esp)
470; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
471; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
472; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
473; CHECK-I686-NEXT:    movl %eax, (%esp)
474; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
475; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
476; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
477; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
478; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
479; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
480; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
481; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
482; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
483; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
484; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
485; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
486; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
487; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
488; CHECK-I686-NEXT:    addl $124, %esp
489; CHECK-I686-NEXT:    retl
490  %a = load <4 x half>, <4 x half>* %p, align 8
491  %b = fpext <4 x half> %a to <4 x float>
492  ret <4 x float> %b
493}
494
495define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
496; CHECK-LIBCALL-LABEL: test_extend64_vec4:
497; CHECK-LIBCALL:       # %bb.0:
498; CHECK-LIBCALL-NEXT:    pushq %rbp
499; CHECK-LIBCALL-NEXT:    pushq %r14
500; CHECK-LIBCALL-NEXT:    pushq %rbx
501; CHECK-LIBCALL-NEXT:    subq $32, %rsp
502; CHECK-LIBCALL-NEXT:    movzwl 4(%rdi), %r14d
503; CHECK-LIBCALL-NEXT:    movzwl 6(%rdi), %ebp
504; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %ebx
505; CHECK-LIBCALL-NEXT:    movzwl 2(%rdi), %edi
506; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
507; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
508; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
509; CHECK-LIBCALL-NEXT:    movl %ebx, %edi
510; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
511; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
512; CHECK-LIBCALL-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
513; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
514; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
515; CHECK-LIBCALL-NEXT:    movl %ebp, %edi
516; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
517; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
518; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
519; CHECK-LIBCALL-NEXT:    movl %r14d, %edi
520; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
521; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
522; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
523; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
524; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
525; CHECK-LIBCALL-NEXT:    addq $32, %rsp
526; CHECK-LIBCALL-NEXT:    popq %rbx
527; CHECK-LIBCALL-NEXT:    popq %r14
528; CHECK-LIBCALL-NEXT:    popq %rbp
529; CHECK-LIBCALL-NEXT:    retq
530;
531; BWON-F16C-LABEL: test_extend64_vec4:
532; BWON-F16C:       # %bb.0:
533; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
534; BWON-F16C-NEXT:    vcvtps2pd %xmm0, %ymm0
535; BWON-F16C-NEXT:    retq
536;
537; CHECK-I686-LABEL: test_extend64_vec4:
538; CHECK-I686:       # %bb.0:
539; CHECK-I686-NEXT:    pushl %ebx
540; CHECK-I686-NEXT:    pushl %edi
541; CHECK-I686-NEXT:    pushl %esi
542; CHECK-I686-NEXT:    subl $64, %esp
543; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
544; CHECK-I686-NEXT:    movzwl 6(%eax), %esi
545; CHECK-I686-NEXT:    movzwl (%eax), %edi
546; CHECK-I686-NEXT:    movzwl 2(%eax), %ebx
547; CHECK-I686-NEXT:    movzwl 4(%eax), %eax
548; CHECK-I686-NEXT:    movl %eax, (%esp)
549; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
550; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
551; CHECK-I686-NEXT:    movl %ebx, (%esp)
552; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
553; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
554; CHECK-I686-NEXT:    movl %edi, (%esp)
555; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
556; CHECK-I686-NEXT:    movl %esi, (%esp)
557; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
558; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
559; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
560; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
561; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
562; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
563; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
564; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
565; CHECK-I686-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
566; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
567; CHECK-I686-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
568; CHECK-I686-NEXT:    addl $64, %esp
569; CHECK-I686-NEXT:    popl %esi
570; CHECK-I686-NEXT:    popl %edi
571; CHECK-I686-NEXT:    popl %ebx
572; CHECK-I686-NEXT:    retl
573  %a = load <4 x half>, <4 x half>* %p, align 8
574  %b = fpext <4 x half> %a to <4 x double>
575  ret <4 x double> %b
576}
577
578define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
579; BWON-NOF16C-LABEL: test_trunc32_vec4:
580; BWON-NOF16C:       # %bb.0:
581; BWON-NOF16C-NEXT:    pushq %rbp
582; BWON-NOF16C-NEXT:    pushq %r15
583; BWON-NOF16C-NEXT:    pushq %r14
584; BWON-NOF16C-NEXT:    pushq %rbx
585; BWON-NOF16C-NEXT:    subq $24, %rsp
586; BWON-NOF16C-NEXT:    movq %rdi, %rbx
587; BWON-NOF16C-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
588; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
589; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee@PLT
590; BWON-NOF16C-NEXT:    movl %eax, %r14d
591; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
592; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
593; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee@PLT
594; BWON-NOF16C-NEXT:    movl %eax, %r15d
595; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
596; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
597; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee@PLT
598; BWON-NOF16C-NEXT:    movl %eax, %ebp
599; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
600; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee@PLT
601; BWON-NOF16C-NEXT:    movw %ax, (%rbx)
602; BWON-NOF16C-NEXT:    movw %bp, 6(%rbx)
603; BWON-NOF16C-NEXT:    movw %r15w, 4(%rbx)
604; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
605; BWON-NOF16C-NEXT:    addq $24, %rsp
606; BWON-NOF16C-NEXT:    popq %rbx
607; BWON-NOF16C-NEXT:    popq %r14
608; BWON-NOF16C-NEXT:    popq %r15
609; BWON-NOF16C-NEXT:    popq %rbp
610; BWON-NOF16C-NEXT:    retq
611;
612; BWOFF-LABEL: test_trunc32_vec4:
613; BWOFF:       # %bb.0:
614; BWOFF-NEXT:    pushq %rbp
615; BWOFF-NEXT:    pushq %r15
616; BWOFF-NEXT:    pushq %r14
617; BWOFF-NEXT:    pushq %rbx
618; BWOFF-NEXT:    subq $24, %rsp
619; BWOFF-NEXT:    movq %rdi, %rbx
620; BWOFF-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
621; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
622; BWOFF-NEXT:    callq __gnu_f2h_ieee@PLT
623; BWOFF-NEXT:    movw %ax, %r14w
624; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
625; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
626; BWOFF-NEXT:    callq __gnu_f2h_ieee@PLT
627; BWOFF-NEXT:    movw %ax, %r15w
628; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
629; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
630; BWOFF-NEXT:    callq __gnu_f2h_ieee@PLT
631; BWOFF-NEXT:    movw %ax, %bp
632; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
633; BWOFF-NEXT:    callq __gnu_f2h_ieee@PLT
634; BWOFF-NEXT:    movw %ax, (%rbx)
635; BWOFF-NEXT:    movw %bp, 6(%rbx)
636; BWOFF-NEXT:    movw %r15w, 4(%rbx)
637; BWOFF-NEXT:    movw %r14w, 2(%rbx)
638; BWOFF-NEXT:    addq $24, %rsp
639; BWOFF-NEXT:    popq %rbx
640; BWOFF-NEXT:    popq %r14
641; BWOFF-NEXT:    popq %r15
642; BWOFF-NEXT:    popq %rbp
643; BWOFF-NEXT:    retq
644;
645; BWON-F16C-LABEL: test_trunc32_vec4:
646; BWON-F16C:       # %bb.0:
647; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, (%rdi)
648; BWON-F16C-NEXT:    retq
649;
650; CHECK-I686-LABEL: test_trunc32_vec4:
651; CHECK-I686:       # %bb.0:
652; CHECK-I686-NEXT:    pushl %ebp
653; CHECK-I686-NEXT:    pushl %ebx
654; CHECK-I686-NEXT:    pushl %edi
655; CHECK-I686-NEXT:    pushl %esi
656; CHECK-I686-NEXT:    subl $44, %esp
657; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
658; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
659; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
660; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
661; CHECK-I686-NEXT:    movss %xmm1, (%esp)
662; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
663; CHECK-I686-NEXT:    movw %ax, %si
664; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
665; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
666; CHECK-I686-NEXT:    movss %xmm0, (%esp)
667; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
668; CHECK-I686-NEXT:    movw %ax, %di
669; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
670; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
671; CHECK-I686-NEXT:    movss %xmm0, (%esp)
672; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
673; CHECK-I686-NEXT:    movw %ax, %bx
674; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
675; CHECK-I686-NEXT:    movss %xmm0, (%esp)
676; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
677; CHECK-I686-NEXT:    movw %ax, (%ebp)
678; CHECK-I686-NEXT:    movw %bx, 6(%ebp)
679; CHECK-I686-NEXT:    movw %di, 4(%ebp)
680; CHECK-I686-NEXT:    movw %si, 2(%ebp)
681; CHECK-I686-NEXT:    addl $44, %esp
682; CHECK-I686-NEXT:    popl %esi
683; CHECK-I686-NEXT:    popl %edi
684; CHECK-I686-NEXT:    popl %ebx
685; CHECK-I686-NEXT:    popl %ebp
686; CHECK-I686-NEXT:    retl
687  %v = fptrunc <4 x float> %a to <4 x half>
688  store <4 x half> %v, <4 x half>* %p
689  ret void
690}
691
692define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 {
693; BWON-NOF16C-LABEL: test_trunc64_vec4:
694; BWON-NOF16C:       # %bb.0:
695; BWON-NOF16C-NEXT:    pushq %rbp
696; BWON-NOF16C-NEXT:    pushq %r15
697; BWON-NOF16C-NEXT:    pushq %r14
698; BWON-NOF16C-NEXT:    pushq %rbx
699; BWON-NOF16C-NEXT:    subq $40, %rsp
700; BWON-NOF16C-NEXT:    movq %rdi, %rbx
701; BWON-NOF16C-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
702; BWON-NOF16C-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
703; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
704; BWON-NOF16C-NEXT:    callq __truncdfhf2@PLT
705; BWON-NOF16C-NEXT:    movl %eax, %r14d
706; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
707; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
708; BWON-NOF16C-NEXT:    callq __truncdfhf2@PLT
709; BWON-NOF16C-NEXT:    movl %eax, %r15d
710; BWON-NOF16C-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
711; BWON-NOF16C-NEXT:    callq __truncdfhf2@PLT
712; BWON-NOF16C-NEXT:    movl %eax, %ebp
713; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
714; BWON-NOF16C-NEXT:    callq __truncdfhf2@PLT
715; BWON-NOF16C-NEXT:    movw %ax, 4(%rbx)
716; BWON-NOF16C-NEXT:    movw %bp, (%rbx)
717; BWON-NOF16C-NEXT:    movw %r15w, 6(%rbx)
718; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
719; BWON-NOF16C-NEXT:    addq $40, %rsp
720; BWON-NOF16C-NEXT:    popq %rbx
721; BWON-NOF16C-NEXT:    popq %r14
722; BWON-NOF16C-NEXT:    popq %r15
723; BWON-NOF16C-NEXT:    popq %rbp
724; BWON-NOF16C-NEXT:    retq
725;
726; BWOFF-LABEL: test_trunc64_vec4:
727; BWOFF:       # %bb.0:
728; BWOFF-NEXT:    pushq %rbp
729; BWOFF-NEXT:    pushq %r15
730; BWOFF-NEXT:    pushq %r14
731; BWOFF-NEXT:    pushq %rbx
732; BWOFF-NEXT:    subq $40, %rsp
733; BWOFF-NEXT:    movq %rdi, %rbx
734; BWOFF-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
735; BWOFF-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
736; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
737; BWOFF-NEXT:    callq __truncdfhf2@PLT
738; BWOFF-NEXT:    movw %ax, %r14w
739; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
740; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
741; BWOFF-NEXT:    callq __truncdfhf2@PLT
742; BWOFF-NEXT:    movw %ax, %r15w
743; BWOFF-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
744; BWOFF-NEXT:    callq __truncdfhf2@PLT
745; BWOFF-NEXT:    movw %ax, %bp
746; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
747; BWOFF-NEXT:    callq __truncdfhf2@PLT
748; BWOFF-NEXT:    movw %ax, 4(%rbx)
749; BWOFF-NEXT:    movw %bp, (%rbx)
750; BWOFF-NEXT:    movw %r15w, 6(%rbx)
751; BWOFF-NEXT:    movw %r14w, 2(%rbx)
752; BWOFF-NEXT:    addq $40, %rsp
753; BWOFF-NEXT:    popq %rbx
754; BWOFF-NEXT:    popq %r14
755; BWOFF-NEXT:    popq %r15
756; BWOFF-NEXT:    popq %rbp
757; BWOFF-NEXT:    retq
758;
759; BWON-F16C-LABEL: test_trunc64_vec4:
760; BWON-F16C:       # %bb.0:
761; BWON-F16C-NEXT:    pushq %rbp
762; BWON-F16C-NEXT:    pushq %r15
763; BWON-F16C-NEXT:    pushq %r14
764; BWON-F16C-NEXT:    pushq %rbx
765; BWON-F16C-NEXT:    subq $56, %rsp
766; BWON-F16C-NEXT:    movq %rdi, %rbx
767; BWON-F16C-NEXT:    vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
768; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
769; BWON-F16C-NEXT:    vzeroupper
770; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
771; BWON-F16C-NEXT:    movl %eax, %r14d
772; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
773; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm0
774; BWON-F16C-NEXT:    vmovapd %xmm0, (%rsp) # 16-byte Spill
775; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
776; BWON-F16C-NEXT:    vzeroupper
777; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
778; BWON-F16C-NEXT:    movl %eax, %r15d
779; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
780; BWON-F16C-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
781; BWON-F16C-NEXT:    vzeroupper
782; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
783; BWON-F16C-NEXT:    movl %eax, %ebp
784; BWON-F16C-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
785; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
786; BWON-F16C-NEXT:    movw %ax, 4(%rbx)
787; BWON-F16C-NEXT:    movw %bp, (%rbx)
788; BWON-F16C-NEXT:    movw %r15w, 6(%rbx)
789; BWON-F16C-NEXT:    movw %r14w, 2(%rbx)
790; BWON-F16C-NEXT:    addq $56, %rsp
791; BWON-F16C-NEXT:    popq %rbx
792; BWON-F16C-NEXT:    popq %r14
793; BWON-F16C-NEXT:    popq %r15
794; BWON-F16C-NEXT:    popq %rbp
795; BWON-F16C-NEXT:    retq
796;
797; CHECK-I686-LABEL: test_trunc64_vec4:
798; CHECK-I686:       # %bb.0:
799; CHECK-I686-NEXT:    pushl %ebp
800; CHECK-I686-NEXT:    pushl %ebx
801; CHECK-I686-NEXT:    pushl %edi
802; CHECK-I686-NEXT:    pushl %esi
803; CHECK-I686-NEXT:    subl $60, %esp
804; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
805; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
806; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
807; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
808; CHECK-I686-NEXT:    calll __truncdfhf2
809; CHECK-I686-NEXT:    movw %ax, %si
810; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
811; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
812; CHECK-I686-NEXT:    calll __truncdfhf2
813; CHECK-I686-NEXT:    movw %ax, %di
814; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
815; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
816; CHECK-I686-NEXT:    calll __truncdfhf2
817; CHECK-I686-NEXT:    movw %ax, %bx
818; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
819; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
820; CHECK-I686-NEXT:    calll __truncdfhf2
821; CHECK-I686-NEXT:    movw %ax, 6(%ebp)
822; CHECK-I686-NEXT:    movw %bx, 4(%ebp)
823; CHECK-I686-NEXT:    movw %di, 2(%ebp)
824; CHECK-I686-NEXT:    movw %si, (%ebp)
825; CHECK-I686-NEXT:    addl $60, %esp
826; CHECK-I686-NEXT:    popl %esi
827; CHECK-I686-NEXT:    popl %edi
828; CHECK-I686-NEXT:    popl %ebx
829; CHECK-I686-NEXT:    popl %ebp
830; CHECK-I686-NEXT:    retl
831  %v = fptrunc <4 x double> %a to <4 x half>
832  store <4 x half> %v, <4 x half>* %p
833  ret void
834}
835
836declare float @test_floatret();
837
838; On i686, if SSE2 is available, the return value from test_floatret is loaded
839; to f80 and then rounded to f32.  The DAG combiner should not combine this
840; fp_round and the subsequent fptrunc from float to half.
841define half @test_f80trunc_nodagcombine() #0 {
842; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
843; CHECK-LIBCALL:       # %bb.0:
844; CHECK-LIBCALL-NEXT:    pushq %rax
845; CHECK-LIBCALL-NEXT:    callq test_floatret@PLT
846; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee@PLT
847; CHECK-LIBCALL-NEXT:    popq %rcx
848; CHECK-LIBCALL-NEXT:    retq
849;
850; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
851; BWON-F16C:       # %bb.0:
852; BWON-F16C-NEXT:    pushq %rax
853; BWON-F16C-NEXT:    callq test_floatret@PLT
854; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
855; BWON-F16C-NEXT:    vmovd %xmm0, %eax
856; BWON-F16C-NEXT:    # kill: def $ax killed $ax killed $eax
857; BWON-F16C-NEXT:    popq %rcx
858; BWON-F16C-NEXT:    retq
859;
860; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
861; CHECK-I686:       # %bb.0:
862; CHECK-I686-NEXT:    subl $12, %esp
863; CHECK-I686-NEXT:    calll test_floatret@PLT
864; CHECK-I686-NEXT:    fstps (%esp)
865; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
866; CHECK-I686-NEXT:    addl $12, %esp
867; CHECK-I686-NEXT:    retl
868  %1 = call float @test_floatret()
869  %2 = fptrunc float %1 to half
870  ret half %2
871}
872
873
874
875
876define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
877; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
878; CHECK-LIBCALL:       # %bb.0:
879; CHECK-LIBCALL-NEXT:    pushq %rbx
880; CHECK-LIBCALL-NEXT:    subq $16, %rsp
881; CHECK-LIBCALL-NEXT:    movzwl (%rsi), %ebx
882; CHECK-LIBCALL-NEXT:    cvtsi2ss %edi, %xmm0
883; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee@PLT
884; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
885; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
886; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
887; CHECK-LIBCALL-NEXT:    movl %ebx, %edi
888; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
889; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
890; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee@PLT
891; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
892; CHECK-LIBCALL-NEXT:    addq $16, %rsp
893; CHECK-LIBCALL-NEXT:    popq %rbx
894; CHECK-LIBCALL-NEXT:    jmp __gnu_h2f_ieee@PLT # TAILCALL
895;
896; BWON-F16C-LABEL: test_sitofp_fadd_i32:
897; BWON-F16C:       # %bb.0:
898; BWON-F16C-NEXT:    movzwl (%rsi), %eax
899; BWON-F16C-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
900; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
901; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
902; BWON-F16C-NEXT:    vmovd %eax, %xmm1
903; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
904; BWON-F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
905; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
906; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
907; BWON-F16C-NEXT:    retq
908;
909; CHECK-I686-LABEL: test_sitofp_fadd_i32:
910; CHECK-I686:       # %bb.0:
911; CHECK-I686-NEXT:    pushl %edi
912; CHECK-I686-NEXT:    pushl %esi
913; CHECK-I686-NEXT:    subl $20, %esp
914; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
915; CHECK-I686-NEXT:    movzwl (%eax), %edi
916; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
917; CHECK-I686-NEXT:    movss %xmm0, (%esp)
918; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
919; CHECK-I686-NEXT:    movw %ax, %si
920; CHECK-I686-NEXT:    movl %edi, (%esp)
921; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
922; CHECK-I686-NEXT:    movzwl %si, %eax
923; CHECK-I686-NEXT:    movl %eax, (%esp)
924; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
925; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
926; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
927; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
928; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
929; CHECK-I686-NEXT:    movss %xmm0, (%esp)
930; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
931; CHECK-I686-NEXT:    movzwl %ax, %eax
932; CHECK-I686-NEXT:    movl %eax, (%esp)
933; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
934; CHECK-I686-NEXT:    addl $20, %esp
935; CHECK-I686-NEXT:    popl %esi
936; CHECK-I686-NEXT:    popl %edi
937; CHECK-I686-NEXT:    retl
938  %tmp0 = load half, half* %b
939  %tmp1 = sitofp i32 %a to half
940  %tmp2 = fadd half %tmp0, %tmp1
941  %tmp3 = fpext half %tmp2 to float
942  ret float %tmp3
943}
944
945define half @PR40273(half) #0 {
946; CHECK-LIBCALL-LABEL: PR40273:
947; CHECK-LIBCALL:       # %bb.0:
948; CHECK-LIBCALL-NEXT:    pushq %rax
949; CHECK-LIBCALL-NEXT:    movzwl %di, %edi
950; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee@PLT
951; CHECK-LIBCALL-NEXT:    xorl %eax, %eax
952; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
953; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
954; CHECK-LIBCALL-NEXT:    movl $15360, %ecx # imm = 0x3C00
955; CHECK-LIBCALL-NEXT:    cmovnel %ecx, %eax
956; CHECK-LIBCALL-NEXT:    cmovpl %ecx, %eax
957; CHECK-LIBCALL-NEXT:    # kill: def $ax killed $ax killed $eax
958; CHECK-LIBCALL-NEXT:    popq %rcx
959; CHECK-LIBCALL-NEXT:    retq
960;
961; BWON-F16C-LABEL: PR40273:
962; BWON-F16C:       # %bb.0:
963; BWON-F16C-NEXT:    movzwl %di, %eax
964; BWON-F16C-NEXT:    vmovd %eax, %xmm0
965; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
966; BWON-F16C-NEXT:    xorl %eax, %eax
967; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
968; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
969; BWON-F16C-NEXT:    movl $15360, %ecx # imm = 0x3C00
970; BWON-F16C-NEXT:    cmovnel %ecx, %eax
971; BWON-F16C-NEXT:    cmovpl %ecx, %eax
972; BWON-F16C-NEXT:    # kill: def $ax killed $ax killed $eax
973; BWON-F16C-NEXT:    retq
974;
975; CHECK-I686-LABEL: PR40273:
976; CHECK-I686:       # %bb.0:
977; CHECK-I686-NEXT:    subl $12, %esp
978; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
979; CHECK-I686-NEXT:    movl %eax, (%esp)
980; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
981; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
982; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
983; CHECK-I686-NEXT:    xorl %eax, %eax
984; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
985; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
986; CHECK-I686-NEXT:    movl $15360, %ecx # imm = 0x3C00
987; CHECK-I686-NEXT:    cmovnel %ecx, %eax
988; CHECK-I686-NEXT:    cmovpl %ecx, %eax
989; CHECK-I686-NEXT:    # kill: def $ax killed $ax killed $eax
990; CHECK-I686-NEXT:    addl $12, %esp
991; CHECK-I686-NEXT:    retl
992  %2 = fcmp une half %0, 0xH0000
993  %3 = uitofp i1 %2 to half
994  ret half %3
995}
996
997attributes #0 = { nounwind }
998