1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0  \
9; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
10
11define void @test_load_store(ptr %in, ptr %out) #0 {
12; CHECK-LIBCALL-LABEL: test_load_store:
13; CHECK-LIBCALL:       # %bb.0:
14; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
15; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
16; CHECK-LIBCALL-NEXT:    movw %ax, (%rsi)
17; CHECK-LIBCALL-NEXT:    retq
18;
19; BWON-F16C-LABEL: test_load_store:
20; BWON-F16C:       # %bb.0:
21; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
22; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
23; BWON-F16C-NEXT:    retq
24;
25; CHECK-I686-LABEL: test_load_store:
26; CHECK-I686:       # %bb.0:
27; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
28; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
29; CHECK-I686-NEXT:    pinsrw $0, (%ecx), %xmm0
30; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ecx
31; CHECK-I686-NEXT:    movw %cx, (%eax)
32; CHECK-I686-NEXT:    retl
33  %val = load half, ptr %in
34  store half %val, ptr %out
35  ret void
36}
37
38define i16 @test_bitcast_from_half(ptr %addr) #0 {
39; BWON-LABEL: test_bitcast_from_half:
40; BWON:       # %bb.0:
41; BWON-NEXT:    movzwl (%rdi), %eax
42; BWON-NEXT:    retq
43;
44; BWOFF-LABEL: test_bitcast_from_half:
45; BWOFF:       # %bb.0:
46; BWOFF-NEXT:    movw (%rdi), %ax
47; BWOFF-NEXT:    retq
48;
49; CHECK-I686-LABEL: test_bitcast_from_half:
50; CHECK-I686:       # %bb.0:
51; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
52; CHECK-I686-NEXT:    movw (%eax), %ax
53; CHECK-I686-NEXT:    retl
54  %val = load half, ptr %addr
55  %val_int = bitcast half %val to i16
56  ret i16 %val_int
57}
58
59define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
60; CHECK-LABEL: test_bitcast_to_half:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    movw %si, (%rdi)
63; CHECK-NEXT:    retq
64;
65; CHECK-I686-LABEL: test_bitcast_to_half:
66; CHECK-I686:       # %bb.0:
67; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
68; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
69; CHECK-I686-NEXT:    movw %ax, (%ecx)
70; CHECK-I686-NEXT:    retl
71  %val_fp = bitcast i16 %in to half
72  store half %val_fp, ptr %addr
73  ret void
74}
75
76define float @test_extend32(ptr %addr) #0 {
77; CHECK-LIBCALL-LABEL: test_extend32:
78; CHECK-LIBCALL:       # %bb.0:
79; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
80; CHECK-LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
81;
82; BWON-F16C-LABEL: test_extend32:
83; BWON-F16C:       # %bb.0:
84; BWON-F16C-NEXT:    movzwl (%rdi), %eax
85; BWON-F16C-NEXT:    vmovd %eax, %xmm0
86; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
87; BWON-F16C-NEXT:    retq
88;
89; CHECK-I686-LABEL: test_extend32:
90; CHECK-I686:       # %bb.0:
91; CHECK-I686-NEXT:    subl $12, %esp
92; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
93; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
94; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
95; CHECK-I686-NEXT:    movw %ax, (%esp)
96; CHECK-I686-NEXT:    calll __extendhfsf2
97; CHECK-I686-NEXT:    addl $12, %esp
98; CHECK-I686-NEXT:    retl
99  %val16 = load half, ptr %addr
100  %val32 = fpext half %val16 to float
101  ret float %val32
102}
103
104define double @test_extend64(ptr %addr) #0 {
105; CHECK-LIBCALL-LABEL: test_extend64:
106; CHECK-LIBCALL:       # %bb.0:
107; CHECK-LIBCALL-NEXT:    pushq %rax
108; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
109; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
110; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
111; CHECK-LIBCALL-NEXT:    popq %rax
112; CHECK-LIBCALL-NEXT:    retq
113;
114; BWON-F16C-LABEL: test_extend64:
115; BWON-F16C:       # %bb.0:
116; BWON-F16C-NEXT:    movzwl (%rdi), %eax
117; BWON-F16C-NEXT:    vmovd %eax, %xmm0
118; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
119; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
120; BWON-F16C-NEXT:    retq
121;
122; CHECK-I686-LABEL: test_extend64:
123; CHECK-I686:       # %bb.0:
124; CHECK-I686-NEXT:    subl $12, %esp
125; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
127; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
128; CHECK-I686-NEXT:    movw %ax, (%esp)
129; CHECK-I686-NEXT:    calll __extendhfsf2
130; CHECK-I686-NEXT:    addl $12, %esp
131; CHECK-I686-NEXT:    retl
132  %val16 = load half, ptr %addr
133  %val32 = fpext half %val16 to double
134  ret double %val32
135}
136
137define void @test_trunc32(float %in, ptr %addr) #0 {
138; CHECK-LIBCALL-LABEL: test_trunc32:
139; CHECK-LIBCALL:       # %bb.0:
140; CHECK-LIBCALL-NEXT:    pushq %rbx
141; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
142; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
143; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
144; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
145; CHECK-LIBCALL-NEXT:    popq %rbx
146; CHECK-LIBCALL-NEXT:    retq
147;
148; BWON-F16C-LABEL: test_trunc32:
149; BWON-F16C:       # %bb.0:
150; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
151; BWON-F16C-NEXT:    vmovd %xmm0, %eax
152; BWON-F16C-NEXT:    movw %ax, (%rdi)
153; BWON-F16C-NEXT:    retq
154;
155; CHECK-I686-LABEL: test_trunc32:
156; CHECK-I686:       # %bb.0:
157; CHECK-I686-NEXT:    pushl %esi
158; CHECK-I686-NEXT:    subl $8, %esp
159; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
160; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
161; CHECK-I686-NEXT:    movd %xmm0, (%esp)
162; CHECK-I686-NEXT:    calll __truncsfhf2
163; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
164; CHECK-I686-NEXT:    movw %ax, (%esi)
165; CHECK-I686-NEXT:    addl $8, %esp
166; CHECK-I686-NEXT:    popl %esi
167; CHECK-I686-NEXT:    retl
168  %val16 = fptrunc float %in to half
169  store half %val16, ptr %addr
170  ret void
171}
172
173define void @test_trunc64(double %in, ptr %addr) #0 {
174; CHECK-LIBCALL-LABEL: test_trunc64:
175; CHECK-LIBCALL:       # %bb.0:
176; CHECK-LIBCALL-NEXT:    pushq %rbx
177; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
178; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
179; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
180; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
181; CHECK-LIBCALL-NEXT:    popq %rbx
182; CHECK-LIBCALL-NEXT:    retq
183;
184; BWON-F16C-LABEL: test_trunc64:
185; BWON-F16C:       # %bb.0:
186; BWON-F16C-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
187; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
188; BWON-F16C-NEXT:    vmovd %xmm0, %eax
189; BWON-F16C-NEXT:    movw %ax, (%rdi)
190; BWON-F16C-NEXT:    retq
191;
192; CHECK-I686-LABEL: test_trunc64:
193; CHECK-I686:       # %bb.0:
194; CHECK-I686-NEXT:    pushl %esi
195; CHECK-I686-NEXT:    subl $8, %esp
196; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
197; CHECK-I686-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
198; CHECK-I686-NEXT:    movq %xmm0, (%esp)
199; CHECK-I686-NEXT:    calll __truncdfhf2
200; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
201; CHECK-I686-NEXT:    movw %ax, (%esi)
202; CHECK-I686-NEXT:    addl $8, %esp
203; CHECK-I686-NEXT:    popl %esi
204; CHECK-I686-NEXT:    retl
205  %val16 = fptrunc double %in to half
206  store half %val16, ptr %addr
207  ret void
208}
209
210define i64 @test_fptosi_i64(ptr %p) #0 {
211; CHECK-LIBCALL-LABEL: test_fptosi_i64:
212; CHECK-LIBCALL:       # %bb.0:
213; CHECK-LIBCALL-NEXT:    pushq %rax
214; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
215; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
216; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
217; CHECK-LIBCALL-NEXT:    popq %rcx
218; CHECK-LIBCALL-NEXT:    retq
219;
220; BWON-F16C-LABEL: test_fptosi_i64:
221; BWON-F16C:       # %bb.0:
222; BWON-F16C-NEXT:    movzwl (%rdi), %eax
223; BWON-F16C-NEXT:    vmovd %eax, %xmm0
224; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
225; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
226; BWON-F16C-NEXT:    retq
227;
228; CHECK-I686-LABEL: test_fptosi_i64:
229; CHECK-I686:       # %bb.0:
230; CHECK-I686-NEXT:    subl $12, %esp
231; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
232; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
233; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
234; CHECK-I686-NEXT:    movw %ax, (%esp)
235; CHECK-I686-NEXT:    calll __fixhfdi
236; CHECK-I686-NEXT:    addl $12, %esp
237; CHECK-I686-NEXT:    retl
238  %a = load half, ptr %p, align 2
239  %r = fptosi half %a to i64
240  ret i64 %r
241}
242
243define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
244; CHECK-LIBCALL-LABEL: test_sitofp_i64:
245; CHECK-LIBCALL:       # %bb.0:
246; CHECK-LIBCALL-NEXT:    pushq %rbx
247; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
248; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
249; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
250; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
251; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
252; CHECK-LIBCALL-NEXT:    popq %rbx
253; CHECK-LIBCALL-NEXT:    retq
254;
255; BWON-F16C-LABEL: test_sitofp_i64:
256; BWON-F16C:       # %bb.0:
257; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
258; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
259; BWON-F16C-NEXT:    vmovd %xmm0, %eax
260; BWON-F16C-NEXT:    movw %ax, (%rsi)
261; BWON-F16C-NEXT:    retq
262;
263; CHECK-I686-LABEL: test_sitofp_i64:
264; CHECK-I686:       # %bb.0:
265; CHECK-I686-NEXT:    pushl %esi
266; CHECK-I686-NEXT:    subl $24, %esp
267; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
268; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
269; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
270; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
271; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
272; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
273; CHECK-I686-NEXT:    movd %xmm0, (%esp)
274; CHECK-I686-NEXT:    calll __truncsfhf2
275; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
276; CHECK-I686-NEXT:    movw %ax, (%esi)
277; CHECK-I686-NEXT:    addl $24, %esp
278; CHECK-I686-NEXT:    popl %esi
279; CHECK-I686-NEXT:    retl
280  %r = sitofp i64 %a to half
281  store half %r, ptr %p
282  ret void
283}
284
285define i64 @test_fptoui_i64(ptr %p) #0 {
286; CHECK-LIBCALL-LABEL: test_fptoui_i64:
287; CHECK-LIBCALL:       # %bb.0:
288; CHECK-LIBCALL-NEXT:    pushq %rax
289; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
290; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
291; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rcx
292; CHECK-LIBCALL-NEXT:    movq %rcx, %rdx
293; CHECK-LIBCALL-NEXT:    sarq $63, %rdx
294; CHECK-LIBCALL-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
295; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
296; CHECK-LIBCALL-NEXT:    andq %rdx, %rax
297; CHECK-LIBCALL-NEXT:    orq %rcx, %rax
298; CHECK-LIBCALL-NEXT:    popq %rcx
299; CHECK-LIBCALL-NEXT:    retq
300;
301; BWON-F16C-LABEL: test_fptoui_i64:
302; BWON-F16C:       # %bb.0:
303; BWON-F16C-NEXT:    movzwl (%rdi), %eax
304; BWON-F16C-NEXT:    vmovd %eax, %xmm0
305; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
306; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rcx
307; BWON-F16C-NEXT:    movq %rcx, %rdx
308; BWON-F16C-NEXT:    sarq $63, %rdx
309; BWON-F16C-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
310; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
311; BWON-F16C-NEXT:    andq %rdx, %rax
312; BWON-F16C-NEXT:    orq %rcx, %rax
313; BWON-F16C-NEXT:    retq
314;
315; CHECK-I686-LABEL: test_fptoui_i64:
316; CHECK-I686:       # %bb.0:
317; CHECK-I686-NEXT:    subl $12, %esp
318; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
319; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
320; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
321; CHECK-I686-NEXT:    movw %ax, (%esp)
322; CHECK-I686-NEXT:    calll __fixunshfdi
323; CHECK-I686-NEXT:    addl $12, %esp
324; CHECK-I686-NEXT:    retl
325  %a = load half, ptr %p, align 2
326  %r = fptoui half %a to i64
327  ret i64 %r
328}
329
330define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
331; CHECK-LIBCALL-LABEL: test_uitofp_i64:
332; CHECK-LIBCALL:       # %bb.0:
333; CHECK-LIBCALL-NEXT:    pushq %rbx
334; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
335; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
336; CHECK-LIBCALL-NEXT:    js .LBB10_1
337; CHECK-LIBCALL-NEXT:  # %bb.2:
338; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
339; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
340; CHECK-LIBCALL-NEXT:  .LBB10_1:
341; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
342; CHECK-LIBCALL-NEXT:    shrq %rax
343; CHECK-LIBCALL-NEXT:    andl $1, %edi
344; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
345; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
346; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
347; CHECK-LIBCALL-NEXT:  .LBB10_3:
348; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
349; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
350; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
351; CHECK-LIBCALL-NEXT:    popq %rbx
352; CHECK-LIBCALL-NEXT:    retq
353;
354; BWON-F16C-LABEL: test_uitofp_i64:
355; BWON-F16C:       # %bb.0:
356; BWON-F16C-NEXT:    testq %rdi, %rdi
357; BWON-F16C-NEXT:    js .LBB10_1
358; BWON-F16C-NEXT:  # %bb.2:
359; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
360; BWON-F16C-NEXT:    jmp .LBB10_3
361; BWON-F16C-NEXT:  .LBB10_1:
362; BWON-F16C-NEXT:    movq %rdi, %rax
363; BWON-F16C-NEXT:    shrq %rax
364; BWON-F16C-NEXT:    andl $1, %edi
365; BWON-F16C-NEXT:    orq %rax, %rdi
366; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
367; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
368; BWON-F16C-NEXT:  .LBB10_3:
369; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
370; BWON-F16C-NEXT:    vmovd %xmm0, %eax
371; BWON-F16C-NEXT:    movw %ax, (%rsi)
372; BWON-F16C-NEXT:    retq
373;
374; CHECK-I686-LABEL: test_uitofp_i64:
375; CHECK-I686:       # %bb.0:
376; CHECK-I686-NEXT:    pushl %esi
377; CHECK-I686-NEXT:    subl $24, %esp
378; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
379; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
380; CHECK-I686-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
381; CHECK-I686-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
382; CHECK-I686-NEXT:    shrl $31, %eax
383; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
384; CHECK-I686-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
385; CHECK-I686-NEXT:    fstps (%esp)
386; CHECK-I686-NEXT:    calll __truncsfhf2
387; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
388; CHECK-I686-NEXT:    movw %ax, (%esi)
389; CHECK-I686-NEXT:    addl $24, %esp
390; CHECK-I686-NEXT:    popl %esi
391; CHECK-I686-NEXT:    retl
392  %r = uitofp i64 %a to half
393  store half %r, ptr %p
394  ret void
395}
396
397define <4 x float> @test_extend32_vec4(ptr %p) #0 {
398; CHECK-LIBCALL-LABEL: test_extend32_vec4:
399; CHECK-LIBCALL:       # %bb.0:
400; CHECK-LIBCALL-NEXT:    subq $72, %rsp
401; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
402; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
403; CHECK-LIBCALL-NEXT:    pinsrw $0, 2(%rdi), %xmm0
404; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
405; CHECK-LIBCALL-NEXT:    pinsrw $0, 4(%rdi), %xmm0
406; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
407; CHECK-LIBCALL-NEXT:    pinsrw $0, 6(%rdi), %xmm0
408; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
409; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
410; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
411; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
412; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
413; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
414; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
415; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
416; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
417; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
418; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
419; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
420; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
421; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
422; CHECK-LIBCALL-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
423; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
424; CHECK-LIBCALL-NEXT:    addq $72, %rsp
425; CHECK-LIBCALL-NEXT:    retq
426;
427; BWON-F16C-LABEL: test_extend32_vec4:
428; BWON-F16C:       # %bb.0:
429; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
430; BWON-F16C-NEXT:    retq
431;
432; CHECK-I686-LABEL: test_extend32_vec4:
433; CHECK-I686:       # %bb.0:
434; CHECK-I686-NEXT:    pushl %esi
435; CHECK-I686-NEXT:    subl $88, %esp
436; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
437; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
438; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
439; CHECK-I686-NEXT:    pinsrw $0, 6(%eax), %xmm0
440; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
441; CHECK-I686-NEXT:    pinsrw $0, 4(%eax), %xmm0
442; CHECK-I686-NEXT:    pinsrw $0, 2(%eax), %xmm1
443; CHECK-I686-NEXT:    pextrw $0, %xmm1, %eax
444; CHECK-I686-NEXT:    movw %ax, (%esp)
445; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
446; CHECK-I686-NEXT:    calll __extendhfsf2
447; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
448; CHECK-I686-NEXT:    movw %si, (%esp)
449; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
450; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
451; CHECK-I686-NEXT:    calll __extendhfsf2
452; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
453; CHECK-I686-NEXT:    movw %si, (%esp)
454; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
455; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
456; CHECK-I686-NEXT:    calll __extendhfsf2
457; CHECK-I686-NEXT:    movw %si, (%esp)
458; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
459; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
460; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
461; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
462; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
463; CHECK-I686-NEXT:    calll __extendhfsf2
464; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
465; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
466; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
467; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
468; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
469; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
470; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
471; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
472; CHECK-I686-NEXT:    addl $88, %esp
473; CHECK-I686-NEXT:    popl %esi
474; CHECK-I686-NEXT:    retl
475  %a = load <4 x half>, ptr %p, align 8
476  %b = fpext <4 x half> %a to <4 x float>
477  ret <4 x float> %b
478}
479
480define <4 x double> @test_extend64_vec4(ptr %p) #0 {
481; CHECK-LIBCALL-LABEL: test_extend64_vec4:
482; CHECK-LIBCALL:       # %bb.0:
483; CHECK-LIBCALL-NEXT:    subq $72, %rsp
484; CHECK-LIBCALL-NEXT:    pinsrw $0, 4(%rdi), %xmm0
485; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
486; CHECK-LIBCALL-NEXT:    pinsrw $0, 6(%rdi), %xmm0
487; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
488; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
489; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
490; CHECK-LIBCALL-NEXT:    pinsrw $0, 2(%rdi), %xmm0
491; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
492; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
493; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
494; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
495; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
496; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
497; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
498; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
499; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
500; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
501; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
502; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
503; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
504; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
505; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
506; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
507; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
508; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
509; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
510; CHECK-LIBCALL-NEXT:    addq $72, %rsp
511; CHECK-LIBCALL-NEXT:    retq
512;
513; BWON-F16C-LABEL: test_extend64_vec4:
514; BWON-F16C:       # %bb.0:
515; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
516; BWON-F16C-NEXT:    vcvtps2pd %xmm0, %ymm0
517; BWON-F16C-NEXT:    retq
518;
519; CHECK-I686-LABEL: test_extend64_vec4:
520; CHECK-I686:       # %bb.0:
521; CHECK-I686-NEXT:    pushl %esi
522; CHECK-I686-NEXT:    subl $104, %esp
523; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
524; CHECK-I686-NEXT:    pinsrw $0, 6(%eax), %xmm0
525; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
526; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
527; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
528; CHECK-I686-NEXT:    pinsrw $0, 2(%eax), %xmm0
529; CHECK-I686-NEXT:    pinsrw $0, 4(%eax), %xmm1
530; CHECK-I686-NEXT:    pextrw $0, %xmm1, %eax
531; CHECK-I686-NEXT:    movw %ax, (%esp)
532; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
533; CHECK-I686-NEXT:    calll __extendhfsf2
534; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
535; CHECK-I686-NEXT:    movw %si, (%esp)
536; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
537; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
538; CHECK-I686-NEXT:    calll __extendhfsf2
539; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
540; CHECK-I686-NEXT:    movw %si, (%esp)
541; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
542; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
543; CHECK-I686-NEXT:    calll __extendhfsf2
544; CHECK-I686-NEXT:    movw %si, (%esp)
545; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
546; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
547; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
548; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
549; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
550; CHECK-I686-NEXT:    calll __extendhfsf2
551; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
552; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
553; CHECK-I686-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
554; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
555; CHECK-I686-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
556; CHECK-I686-NEXT:    addl $104, %esp
557; CHECK-I686-NEXT:    popl %esi
558; CHECK-I686-NEXT:    retl
559  %a = load <4 x half>, ptr %p, align 8
560  %b = fpext <4 x half> %a to <4 x double>
561  ret <4 x double> %b
562}
563
564define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
565; CHECK-LIBCALL-LABEL: test_trunc32_vec4:
566; CHECK-LIBCALL:       # %bb.0:
567; CHECK-LIBCALL-NEXT:    pushq %rbx
568; CHECK-LIBCALL-NEXT:    subq $64, %rsp
569; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
570; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
571; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
572; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
573; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
575; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
576; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
577; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
578; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
579; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
580; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
581; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
582; CHECK-LIBCALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
583; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
584; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
585; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
586; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
587; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
588; CHECK-LIBCALL-NEXT:    movw %ax, 6(%rbx)
589; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
590; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
591; CHECK-LIBCALL-NEXT:    movw %ax, 4(%rbx)
592; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
593; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
594; CHECK-LIBCALL-NEXT:    movw %ax, 2(%rbx)
595; CHECK-LIBCALL-NEXT:    addq $64, %rsp
596; CHECK-LIBCALL-NEXT:    popq %rbx
597; CHECK-LIBCALL-NEXT:    retq
598;
599; BWON-F16C-LABEL: test_trunc32_vec4:
600; BWON-F16C:       # %bb.0:
601; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, (%rdi)
602; BWON-F16C-NEXT:    retq
603;
604; CHECK-I686-LABEL: test_trunc32_vec4:
605; CHECK-I686:       # %bb.0:
606; CHECK-I686-NEXT:    pushl %esi
607; CHECK-I686-NEXT:    subl $88, %esp
608; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
609; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
610; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
611; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
612; CHECK-I686-NEXT:    movss %xmm1, (%esp)
613; CHECK-I686-NEXT:    calll __truncsfhf2
614; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
615; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
616; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
617; CHECK-I686-NEXT:    movss %xmm0, (%esp)
618; CHECK-I686-NEXT:    calll __truncsfhf2
619; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
620; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
621; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
622; CHECK-I686-NEXT:    movss %xmm0, (%esp)
623; CHECK-I686-NEXT:    calll __truncsfhf2
624; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
625; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
626; CHECK-I686-NEXT:    movd %xmm0, (%esp)
627; CHECK-I686-NEXT:    calll __truncsfhf2
628; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
629; CHECK-I686-NEXT:    movw %ax, (%esi)
630; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
631; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
632; CHECK-I686-NEXT:    movw %ax, 6(%esi)
633; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
634; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
635; CHECK-I686-NEXT:    movw %ax, 4(%esi)
636; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
637; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
638; CHECK-I686-NEXT:    movw %ax, 2(%esi)
639; CHECK-I686-NEXT:    addl $88, %esp
640; CHECK-I686-NEXT:    popl %esi
641; CHECK-I686-NEXT:    retl
642  %v = fptrunc <4 x float> %a to <4 x half>
643  store <4 x half> %v, ptr %p
644  ret void
645}
646
647define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
648; CHECK-LIBCALL-LABEL: test_trunc64_vec4:
649; CHECK-LIBCALL:       # %bb.0:
650; CHECK-LIBCALL-NEXT:    pushq %rbx
651; CHECK-LIBCALL-NEXT:    subq $64, %rsp
652; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
653; CHECK-LIBCALL-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
654; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
655; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
656; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
657; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
658; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
659; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
660; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
661; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
662; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
663; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
664; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
665; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
666; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
667; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
668; CHECK-LIBCALL-NEXT:    movw %ax, 4(%rbx)
669; CHECK-LIBCALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
670; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
671; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
672; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
673; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
674; CHECK-LIBCALL-NEXT:    movw %ax, 6(%rbx)
675; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
676; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
677; CHECK-LIBCALL-NEXT:    movw %ax, 2(%rbx)
678; CHECK-LIBCALL-NEXT:    addq $64, %rsp
679; CHECK-LIBCALL-NEXT:    popq %rbx
680; CHECK-LIBCALL-NEXT:    retq
681;
682; BWON-F16C-LABEL: test_trunc64_vec4:
683; BWON-F16C:       # %bb.0:
684; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
685; BWON-F16C-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1
686; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
687; BWON-F16C-NEXT:    vmovd %xmm1, %eax
688; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm1
689; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
690; BWON-F16C-NEXT:    vcvtsd2ss %xmm2, %xmm2, %xmm2
691; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
692; BWON-F16C-NEXT:    vmovd %xmm2, %ecx
693; BWON-F16C-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
694; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
695; BWON-F16C-NEXT:    vmovd %xmm0, %edx
696; BWON-F16C-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm0
697; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
698; BWON-F16C-NEXT:    vmovd %xmm0, %esi
699; BWON-F16C-NEXT:    movw %si, 4(%rdi)
700; BWON-F16C-NEXT:    movw %dx, (%rdi)
701; BWON-F16C-NEXT:    movw %cx, 6(%rdi)
702; BWON-F16C-NEXT:    movw %ax, 2(%rdi)
703; BWON-F16C-NEXT:    vzeroupper
704; BWON-F16C-NEXT:    retq
705;
706; CHECK-I686-LABEL: test_trunc64_vec4:
707; CHECK-I686:       # %bb.0:
708; CHECK-I686-NEXT:    pushl %esi
709; CHECK-I686-NEXT:    subl $88, %esp
710; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
711; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
712; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
713; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
714; CHECK-I686-NEXT:    calll __truncdfhf2
715; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
716; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
717; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
718; CHECK-I686-NEXT:    calll __truncdfhf2
719; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
720; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
721; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
722; CHECK-I686-NEXT:    calll __truncdfhf2
723; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
724; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
725; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
726; CHECK-I686-NEXT:    calll __truncdfhf2
727; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
728; CHECK-I686-NEXT:    movw %ax, 6(%esi)
729; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
730; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
731; CHECK-I686-NEXT:    movw %ax, 4(%esi)
732; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
733; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
734; CHECK-I686-NEXT:    movw %ax, 2(%esi)
735; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
736; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
737; CHECK-I686-NEXT:    movw %ax, (%esi)
738; CHECK-I686-NEXT:    addl $88, %esp
739; CHECK-I686-NEXT:    popl %esi
740; CHECK-I686-NEXT:    retl
741  %v = fptrunc <4 x double> %a to <4 x half>
742  store <4 x half> %v, ptr %p
743  ret void
744}
745
746declare float @test_floatret();
747
748; On i686, if SSE2 is available, the return value from test_floatret is loaded
749; to f80 and then rounded to f32.  The DAG combiner should not combine this
750; fp_round and the subsequent fptrunc from float to half.
751define half @test_f80trunc_nodagcombine() #0 {
752; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
753; CHECK-LIBCALL:       # %bb.0:
754; CHECK-LIBCALL-NEXT:    pushq %rax
755; CHECK-LIBCALL-NEXT:    callq test_floatret@PLT
756; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
757; CHECK-LIBCALL-NEXT:    popq %rax
758; CHECK-LIBCALL-NEXT:    retq
759;
760; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
761; BWON-F16C:       # %bb.0:
762; BWON-F16C-NEXT:    pushq %rax
763; BWON-F16C-NEXT:    callq test_floatret@PLT
764; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
765; BWON-F16C-NEXT:    vmovd %xmm0, %eax
766; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
767; BWON-F16C-NEXT:    popq %rax
768; BWON-F16C-NEXT:    retq
769;
770; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
771; CHECK-I686:       # %bb.0:
772; CHECK-I686-NEXT:    subl $12, %esp
773; CHECK-I686-NEXT:    calll test_floatret@PLT
774; CHECK-I686-NEXT:    fstps (%esp)
775; CHECK-I686-NEXT:    calll __truncsfhf2
776; CHECK-I686-NEXT:    addl $12, %esp
777; CHECK-I686-NEXT:    retl
778  %1 = call float @test_floatret()
779  %2 = fptrunc float %1 to half
780  ret half %2
781}
782
783
784
785
786define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
787; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
788; CHECK-LIBCALL:       # %bb.0:
789; CHECK-LIBCALL-NEXT:    subq $40, %rsp
790; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rsi), %xmm0
791; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
792; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
793; CHECK-LIBCALL-NEXT:    cvtsi2ss %edi, %xmm0
794; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
795; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
796; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
797; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
798; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
799; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
800; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
801; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
802; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
803; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
804; CHECK-LIBCALL-NEXT:    addq $40, %rsp
805; CHECK-LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
806;
807; BWON-F16C-LABEL: test_sitofp_fadd_i32:
808; BWON-F16C:       # %bb.0:
809; BWON-F16C-NEXT:    movzwl (%rsi), %eax
810; BWON-F16C-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
811; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
812; BWON-F16C-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
813; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
814; BWON-F16C-NEXT:    vmovd %eax, %xmm1
815; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
816; BWON-F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
817; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
818; BWON-F16C-NEXT:    vmovd %xmm0, %eax
819; BWON-F16C-NEXT:    movzwl %ax, %eax
820; BWON-F16C-NEXT:    vmovd %eax, %xmm0
821; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
822; BWON-F16C-NEXT:    retq
823;
824; CHECK-I686-LABEL: test_sitofp_fadd_i32:
825; CHECK-I686:       # %bb.0:
826; CHECK-I686-NEXT:    subl $60, %esp
827; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
828; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
829; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
830; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
831; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
832; CHECK-I686-NEXT:    movss %xmm0, (%esp)
833; CHECK-I686-NEXT:    calll __truncsfhf2
834; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
835; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
836; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
837; CHECK-I686-NEXT:    movw %ax, (%esp)
838; CHECK-I686-NEXT:    calll __extendhfsf2
839; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
840; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
841; CHECK-I686-NEXT:    movw %ax, (%esp)
842; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
843; CHECK-I686-NEXT:    calll __extendhfsf2
844; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
845; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
846; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
847; CHECK-I686-NEXT:    movss %xmm0, (%esp)
848; CHECK-I686-NEXT:    calll __truncsfhf2
849; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
850; CHECK-I686-NEXT:    movw %ax, (%esp)
851; CHECK-I686-NEXT:    calll __extendhfsf2
852; CHECK-I686-NEXT:    addl $60, %esp
853; CHECK-I686-NEXT:    retl
854  %tmp0 = load half, ptr %b
855  %tmp1 = sitofp i32 %a to half
856  %tmp2 = fadd half %tmp0, %tmp1
857  %tmp3 = fpext half %tmp2 to float
858  ret float %tmp3
859}
860
861define half @PR40273(half) #0 {
862; CHECK-LIBCALL-LABEL: PR40273:
863; CHECK-LIBCALL:       # %bb.0:
864; CHECK-LIBCALL-NEXT:    pushq %rax
865; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
866; CHECK-LIBCALL-NEXT:    xorl %eax, %eax
867; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
868; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
869; CHECK-LIBCALL-NEXT:    movl $15360, %ecx # imm = 0x3C00
870; CHECK-LIBCALL-NEXT:    cmovnel %ecx, %eax
871; CHECK-LIBCALL-NEXT:    cmovpl %ecx, %eax
872; CHECK-LIBCALL-NEXT:    pinsrw $0, %eax, %xmm0
873; CHECK-LIBCALL-NEXT:    popq %rax
874; CHECK-LIBCALL-NEXT:    retq
875;
876; BWON-F16C-LABEL: PR40273:
877; BWON-F16C:       # %bb.0:
878; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
879; BWON-F16C-NEXT:    movzwl %ax, %eax
880; BWON-F16C-NEXT:    vmovd %eax, %xmm0
881; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
882; BWON-F16C-NEXT:    xorl %eax, %eax
883; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
884; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
885; BWON-F16C-NEXT:    movl $15360, %ecx # imm = 0x3C00
886; BWON-F16C-NEXT:    cmovnel %ecx, %eax
887; BWON-F16C-NEXT:    cmovpl %ecx, %eax
888; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
889; BWON-F16C-NEXT:    retq
890;
891; CHECK-I686-LABEL: PR40273:
892; CHECK-I686:       # %bb.0:
893; CHECK-I686-NEXT:    subl $12, %esp
894; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
895; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
896; CHECK-I686-NEXT:    movw %ax, (%esp)
897; CHECK-I686-NEXT:    calll __extendhfsf2
898; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
899; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
900; CHECK-I686-NEXT:    xorl %eax, %eax
901; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
902; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
903; CHECK-I686-NEXT:    movl $15360, %ecx # imm = 0x3C00
904; CHECK-I686-NEXT:    cmovnel %ecx, %eax
905; CHECK-I686-NEXT:    cmovpl %ecx, %eax
906; CHECK-I686-NEXT:    pinsrw $0, %eax, %xmm0
907; CHECK-I686-NEXT:    addl $12, %esp
908; CHECK-I686-NEXT:    retl
909  %2 = fcmp une half %0, 0xH0000
910  %3 = uitofp i1 %2 to half
911  ret half %3
912}
913
914define dso_local void @brcond(half %0) {
915; CHECK-LIBCALL-LABEL: brcond:
916; CHECK-LIBCALL:       # %bb.0: # %entry
917; CHECK-LIBCALL-NEXT:    pushq %rax
918; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
919; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
920; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
921; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
922; CHECK-LIBCALL-NEXT:    setp %al
923; CHECK-LIBCALL-NEXT:    setne %cl
924; CHECK-LIBCALL-NEXT:    orb %al, %cl
925; CHECK-LIBCALL-NEXT:    jne .LBB18_2
926; CHECK-LIBCALL-NEXT:  # %bb.1: # %if.then
927; CHECK-LIBCALL-NEXT:    popq %rax
928; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 8
929; CHECK-LIBCALL-NEXT:    retq
930; CHECK-LIBCALL-NEXT:  .LBB18_2: # %if.end
931;
932; BWON-F16C-LABEL: brcond:
933; BWON-F16C:       # %bb.0: # %entry
934; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
935; BWON-F16C-NEXT:    movzwl %ax, %eax
936; BWON-F16C-NEXT:    vmovd %eax, %xmm0
937; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
938; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
939; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
940; BWON-F16C-NEXT:    setp %al
941; BWON-F16C-NEXT:    setne %cl
942; BWON-F16C-NEXT:    orb %al, %cl
943; BWON-F16C-NEXT:    jne .LBB18_2
944; BWON-F16C-NEXT:  # %bb.1: # %if.then
945; BWON-F16C-NEXT:    retq
946; BWON-F16C-NEXT:  .LBB18_2: # %if.end
947;
948; CHECK-I686-LABEL: brcond:
949; CHECK-I686:       # %bb.0: # %entry
950; CHECK-I686-NEXT:    subl $12, %esp
951; CHECK-I686-NEXT:    .cfi_def_cfa_offset 16
952; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
953; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
954; CHECK-I686-NEXT:    movw %ax, (%esp)
955; CHECK-I686-NEXT:    calll __extendhfsf2
956; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
957; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
958; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
959; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
960; CHECK-I686-NEXT:    setp %al
961; CHECK-I686-NEXT:    setne %cl
962; CHECK-I686-NEXT:    orb %al, %cl
963; CHECK-I686-NEXT:    jne .LBB18_2
964; CHECK-I686-NEXT:  # %bb.1: # %if.then
965; CHECK-I686-NEXT:    addl $12, %esp
966; CHECK-I686-NEXT:    .cfi_def_cfa_offset 4
967; CHECK-I686-NEXT:    retl
968; CHECK-I686-NEXT:  .LBB18_2: # %if.end
969entry:
970  %cmp = fcmp oeq half 0xH0000, %0
971  br i1 %cmp, label %if.then, label %if.end
972
973if.then:                                          ; preds = %entry
974  ret void
975
976if.end:                                           ; preds = %entry
977  unreachable
978}
979
980define half @test_sqrt(half %0) {
981; CHECK-LIBCALL-LABEL: test_sqrt:
982; CHECK-LIBCALL:       # %bb.0: # %entry
983; CHECK-LIBCALL-NEXT:    pushq %rax
984; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
985; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
986; CHECK-LIBCALL-NEXT:    sqrtss %xmm0, %xmm0
987; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
988; CHECK-LIBCALL-NEXT:    popq %rax
989; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 8
990; CHECK-LIBCALL-NEXT:    retq
991;
992; BWON-F16C-LABEL: test_sqrt:
993; BWON-F16C:       # %bb.0: # %entry
994; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
995; BWON-F16C-NEXT:    movzwl %ax, %eax
996; BWON-F16C-NEXT:    vmovd %eax, %xmm0
997; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
998; BWON-F16C-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
999; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1000; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1001; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
1002; BWON-F16C-NEXT:    retq
1003;
1004; CHECK-I686-LABEL: test_sqrt:
1005; CHECK-I686:       # %bb.0: # %entry
1006; CHECK-I686-NEXT:    subl $12, %esp
1007; CHECK-I686-NEXT:    .cfi_def_cfa_offset 16
1008; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1009; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1010; CHECK-I686-NEXT:    movw %ax, (%esp)
1011; CHECK-I686-NEXT:    calll __extendhfsf2
1012; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1013; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1014; CHECK-I686-NEXT:    sqrtss %xmm0, %xmm0
1015; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1016; CHECK-I686-NEXT:    calll __truncsfhf2
1017; CHECK-I686-NEXT:    addl $12, %esp
1018; CHECK-I686-NEXT:    .cfi_def_cfa_offset 4
1019; CHECK-I686-NEXT:    retl
1020entry:
1021  %1 = call half @llvm.sqrt.f16(half %0)
1022  ret half %1
1023}
1024
1025declare half @llvm.sqrt.f16(half)
1026
1027define void @main.158() local_unnamed_addr #0 {
1028; CHECK-LIBCALL-LABEL: main.158:
1029; CHECK-LIBCALL:       # %bb.0: # %entry
1030; CHECK-LIBCALL-NEXT:    pushq %rax
1031; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
1032; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1033; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1034; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1035; CHECK-LIBCALL-NEXT:    ucomiss %xmm0, %xmm1
1036; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
1037; CHECK-LIBCALL-NEXT:    jae .LBB20_2
1038; CHECK-LIBCALL-NEXT:  # %bb.1: # %entry
1039; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1040; CHECK-LIBCALL-NEXT:  .LBB20_2: # %entry
1041; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1042; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
1043; CHECK-LIBCALL-NEXT:    movw %ax, (%rax)
1044; CHECK-LIBCALL-NEXT:    popq %rax
1045; CHECK-LIBCALL-NEXT:    retq
1046;
1047; BWON-F16C-LABEL: main.158:
1048; BWON-F16C:       # %bb.0: # %entry
1049; BWON-F16C-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1050; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1051; BWON-F16C-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1052; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1053; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1054; BWON-F16C-NEXT:    vucomiss %xmm0, %xmm1
1055; BWON-F16C-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1056; BWON-F16C-NEXT:    jae .LBB20_2
1057; BWON-F16C-NEXT:  # %bb.1: # %entry
1058; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1059; BWON-F16C-NEXT:  .LBB20_2: # %entry
1060; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1061; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1062; BWON-F16C-NEXT:    movw %ax, (%rax)
1063; BWON-F16C-NEXT:    retq
1064;
1065; CHECK-I686-LABEL: main.158:
1066; CHECK-I686:       # %bb.0: # %entry
1067; CHECK-I686-NEXT:    subl $12, %esp
1068; CHECK-I686-NEXT:    pxor %xmm0, %xmm0
1069; CHECK-I686-NEXT:    movd %xmm0, (%esp)
1070; CHECK-I686-NEXT:    calll __truncsfhf2
1071; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1072; CHECK-I686-NEXT:    movw %ax, (%esp)
1073; CHECK-I686-NEXT:    calll __extendhfsf2
1074; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1075; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1076; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1077; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
1078; CHECK-I686-NEXT:    jae .LBB20_2
1079; CHECK-I686-NEXT:  # %bb.1: # %entry
1080; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1081; CHECK-I686-NEXT:  .LBB20_2: # %entry
1082; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1083; CHECK-I686-NEXT:    calll __truncsfhf2
1084; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1085; CHECK-I686-NEXT:    movw %ax, (%eax)
1086; CHECK-I686-NEXT:    addl $12, %esp
1087; CHECK-I686-NEXT:    retl
1088entry:
1089  %0 = tail call half @llvm.fabs.f16(half undef)
1090  %1 = fpext half %0 to float
1091  %compare.2 = fcmp ole half %0, 0xH4800
1092  %multiply.95 = fmul float %1, 5.000000e-01
1093  %add.82 = fadd float %multiply.95, -2.000000e+00
1094  %multiply.68 = fmul float %add.82, 0.000000e+00
1095  %subtract.65 = fsub float %multiply.68, 0.000000e+00
1096  %multiply.57 = fmul float undef, 0.000000e+00
1097  %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57
1098  %3 = fptrunc float %2 to half
1099  store half %3, ptr undef, align 2
1100  ret void
1101}
1102
1103define void @main.45() local_unnamed_addr {
1104; CHECK-LIBCALL-LABEL: main.45:
1105; CHECK-LIBCALL:       # %bb.0: # %entry
1106; CHECK-LIBCALL-NEXT:    pushq %rbp
1107; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
1108; CHECK-LIBCALL-NEXT:    pushq %r15
1109; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 24
1110; CHECK-LIBCALL-NEXT:    pushq %r14
1111; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
1112; CHECK-LIBCALL-NEXT:    pushq %rbx
1113; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 40
1114; CHECK-LIBCALL-NEXT:    pushq %rax
1115; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
1116; CHECK-LIBCALL-NEXT:    .cfi_offset %rbx, -40
1117; CHECK-LIBCALL-NEXT:    .cfi_offset %r14, -32
1118; CHECK-LIBCALL-NEXT:    .cfi_offset %r15, -24
1119; CHECK-LIBCALL-NEXT:    .cfi_offset %rbp, -16
1120; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rax), %xmm0
1121; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
1122; CHECK-LIBCALL-NEXT:    movd %eax, %xmm1
1123; CHECK-LIBCALL-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1124; CHECK-LIBCALL-NEXT:    movq %xmm1, %rbx
1125; CHECK-LIBCALL-NEXT:    movq %rbx, %r14
1126; CHECK-LIBCALL-NEXT:    shrq $48, %r14
1127; CHECK-LIBCALL-NEXT:    movq %rbx, %r15
1128; CHECK-LIBCALL-NEXT:    shrq $32, %r15
1129; CHECK-LIBCALL-NEXT:    movl %ebx, %ebp
1130; CHECK-LIBCALL-NEXT:    shrl $16, %ebp
1131; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1132; CHECK-LIBCALL-NEXT:    ucomiss %xmm0, %xmm0
1133; CHECK-LIBCALL-NEXT:    movl $32256, %eax # imm = 0x7E00
1134; CHECK-LIBCALL-NEXT:    cmovpl %eax, %ebp
1135; CHECK-LIBCALL-NEXT:    cmovpl %eax, %r15d
1136; CHECK-LIBCALL-NEXT:    cmovpl %eax, %r14d
1137; CHECK-LIBCALL-NEXT:    cmovpl %eax, %ebx
1138; CHECK-LIBCALL-NEXT:    movw %bx, (%rax)
1139; CHECK-LIBCALL-NEXT:    movw %r14w, (%rax)
1140; CHECK-LIBCALL-NEXT:    movw %r15w, (%rax)
1141; CHECK-LIBCALL-NEXT:    movw %bp, (%rax)
1142; CHECK-LIBCALL-NEXT:    addq $8, %rsp
1143; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 40
1144; CHECK-LIBCALL-NEXT:    popq %rbx
1145; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
1146; CHECK-LIBCALL-NEXT:    popq %r14
1147; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 24
1148; CHECK-LIBCALL-NEXT:    popq %r15
1149; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
1150; CHECK-LIBCALL-NEXT:    popq %rbp
1151; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 8
1152; CHECK-LIBCALL-NEXT:    retq
1153;
1154; BWON-F16C-LABEL: main.45:
1155; BWON-F16C:       # %bb.0: # %entry
1156; BWON-F16C-NEXT:    movzwl (%rax), %eax
1157; BWON-F16C-NEXT:    vmovd %eax, %xmm0
1158; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
1159; BWON-F16C-NEXT:    vmovq %xmm1, %rax
1160; BWON-F16C-NEXT:    movq %rax, %rcx
1161; BWON-F16C-NEXT:    shrq $48, %rcx
1162; BWON-F16C-NEXT:    movq %rax, %rdx
1163; BWON-F16C-NEXT:    shrq $32, %rdx
1164; BWON-F16C-NEXT:    movl %eax, %esi
1165; BWON-F16C-NEXT:    shrl $16, %esi
1166; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1167; BWON-F16C-NEXT:    vucomiss %xmm0, %xmm0
1168; BWON-F16C-NEXT:    movl $32256, %edi # imm = 0x7E00
1169; BWON-F16C-NEXT:    cmovpl %edi, %esi
1170; BWON-F16C-NEXT:    cmovpl %edi, %edx
1171; BWON-F16C-NEXT:    cmovpl %edi, %ecx
1172; BWON-F16C-NEXT:    cmovpl %edi, %eax
1173; BWON-F16C-NEXT:    movw %ax, (%rax)
1174; BWON-F16C-NEXT:    movw %cx, (%rax)
1175; BWON-F16C-NEXT:    movw %dx, (%rax)
1176; BWON-F16C-NEXT:    movw %si, (%rax)
1177; BWON-F16C-NEXT:    retq
1178;
1179; CHECK-I686-LABEL: main.45:
1180; CHECK-I686:       # %bb.0: # %entry
1181; CHECK-I686-NEXT:    pushl %edi
1182; CHECK-I686-NEXT:    .cfi_def_cfa_offset 8
1183; CHECK-I686-NEXT:    pushl %esi
1184; CHECK-I686-NEXT:    .cfi_def_cfa_offset 12
1185; CHECK-I686-NEXT:    subl $20, %esp
1186; CHECK-I686-NEXT:    .cfi_def_cfa_offset 32
1187; CHECK-I686-NEXT:    .cfi_offset %esi, -12
1188; CHECK-I686-NEXT:    .cfi_offset %edi, -8
1189; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
1190; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1191; CHECK-I686-NEXT:    movd %eax, %xmm0
1192; CHECK-I686-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1193; CHECK-I686-NEXT:    movd %xmm0, %esi
1194; CHECK-I686-NEXT:    movl %esi, %edi
1195; CHECK-I686-NEXT:    shrl $16, %edi
1196; CHECK-I686-NEXT:    movw %ax, (%esp)
1197; CHECK-I686-NEXT:    calll __extendhfsf2
1198; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1199; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1200; CHECK-I686-NEXT:    ucomiss %xmm0, %xmm0
1201; CHECK-I686-NEXT:    movl $32256, %eax # imm = 0x7E00
1202; CHECK-I686-NEXT:    cmovpl %eax, %esi
1203; CHECK-I686-NEXT:    cmovpl %eax, %edi
1204; CHECK-I686-NEXT:    movw %di, (%eax)
1205; CHECK-I686-NEXT:    movw %si, (%eax)
1206; CHECK-I686-NEXT:    addl $20, %esp
1207; CHECK-I686-NEXT:    .cfi_def_cfa_offset 12
1208; CHECK-I686-NEXT:    popl %esi
1209; CHECK-I686-NEXT:    .cfi_def_cfa_offset 8
1210; CHECK-I686-NEXT:    popl %edi
1211; CHECK-I686-NEXT:    .cfi_def_cfa_offset 4
1212; CHECK-I686-NEXT:    retl
1213entry:
1214  %0 = load half, ptr undef, align 8
1215  %1 = bitcast half %0 to i16
1216  %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0
1217  %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer
1218  %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0
1219  %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer
1220  %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer
1221  %3 = add <4 x i16> zeroinitializer, %broadcast.splat14
1222  %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3
1223  %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4
1224  %6 = bitcast <4 x i16> %5 to <4 x half>
1225  %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6
1226  store <4 x half> %7, ptr undef, align 16
1227  ret void
1228}
1229
1230define half @fcopysign(half %x, half %y) {
1231; CHECK-LIBCALL-LABEL: fcopysign:
1232; CHECK-LIBCALL:       # %bb.0:
1233; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm1, %eax
1234; CHECK-LIBCALL-NEXT:    andl $-32768, %eax # imm = 0x8000
1235; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %ecx
1236; CHECK-LIBCALL-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1237; CHECK-LIBCALL-NEXT:    orl %eax, %ecx
1238; CHECK-LIBCALL-NEXT:    pinsrw $0, %ecx, %xmm0
1239; CHECK-LIBCALL-NEXT:    retq
1240;
1241; BWON-F16C-LABEL: fcopysign:
1242; BWON-F16C:       # %bb.0:
1243; BWON-F16C-NEXT:    vpextrw $0, %xmm1, %eax
1244; BWON-F16C-NEXT:    andl $-32768, %eax # imm = 0x8000
1245; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %ecx
1246; BWON-F16C-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1247; BWON-F16C-NEXT:    orl %eax, %ecx
1248; BWON-F16C-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0
1249; BWON-F16C-NEXT:    retq
1250;
1251; CHECK-I686-LABEL: fcopysign:
1252; CHECK-I686:       # %bb.0:
1253; CHECK-I686-NEXT:    movl $-32768, %eax # imm = 0x8000
1254; CHECK-I686-NEXT:    andl {{[0-9]+}}(%esp), %eax
1255; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
1256; CHECK-I686-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1257; CHECK-I686-NEXT:    orl %eax, %ecx
1258; CHECK-I686-NEXT:    pinsrw $0, %ecx, %xmm0
1259; CHECK-I686-NEXT:    retl
1260  %a = call half @llvm.copysign.f16(half %x, half %y)
1261  ret half %a
1262}
1263
1264declare half @llvm.fabs.f16(half)
1265declare half @llvm.copysign.f16(half, half)
1266
1267attributes #0 = { nounwind }
1268